streams-manifests/kafka-resources/base/kafka-connect/s3-kafka-connector.yaml

59 lines
2 KiB
YAML

apiVersion: kafka.strimzi.io/v1beta2
kind: KafkaConnector
metadata:
name: kafka-connect-s3-connector
namespace: kafka
labels:
strimzi.io/cluster: kafka-connect-cluster
spec:
class: io.confluent.connect.s3.S3SinkConnector
tasksMax: 2
config:
# Core Connector Configuration
topics: test-topic
# S3 Configuration
s3.region: us-east-1
s3.bucket.name: delta-lake-test
s3.part.size: '134217728' # Part size for upload (1 GB/ 128mb)
# Performance tuning
flush.size: 500
# MinIO (or S3) store URL (use environment variable for security)
store.url: https://s3.dco1.safedc.net
# Storage and Format Configuration
storage.class: io.confluent.connect.s3.storage.S3Storage
format.class: io.confluent.connect.s3.format.parquet.ParquetFormat
partitioner.class: io.confluent.connect.storage.partitioner.TimeBasedPartitioner
path.format: "'year'=YYYY/'month'=MM/'day'=dd/'hour'=HH" # Added explicit path format
partition.duration.ms: 3600000 # Ensures hourly partitions for manageability
# Behavior settings
behavior.on.null.values: ignore
# Use Swedish locale
locale: sv_SE
timezone: Europe/Stockholm
# # Serialization
# # * Key
# key.converter: org.apache.kafka.connect.storage.StringConverter
# key.converter.schemas.enable: false # Avro schemas usually not needed for keys
# key.converter.schema.registry.url: http://89.47.191.210:30081
# # * Value
# value.converter: io.confluent.connect.avro.AvroConverter
# value.converter.schema.registry.url: http://89.47.191.210:30081
# value.converter.schemas.enable: true
# schema.compatibility: BACKWARD # Allow schema evolution
# # Rotation and Batch Handling
# rotate.interval.ms: 600000 # reduce overhead in high-throughput scenarios
# key.converter.use.latest.version: true
# value.converter.use.latest.version: true
# # Optional:
# # s3.compression.type: gzip
# # store.kafka.keys: true
# # logging.level: debug