57 lines
2 KiB
YAML
57 lines
2 KiB
YAML
apiVersion: kafka.strimzi.io/v1beta2
|
|
kind: KafkaConnector
|
|
metadata:
|
|
name: kafka-connect-s3-connector
|
|
namespace: kafka
|
|
labels:
|
|
strimzi.io/cluster: kafka-connect-cluster
|
|
spec:
|
|
class: io.confluent.connect.s3.S3SinkConnector
|
|
tasksMax: 2
|
|
config:
|
|
# Core Connector Configuration
|
|
connector.class: io.confluent.connect.s3.S3SinkConnector
|
|
topics: test-topic
|
|
|
|
# S3 Configuration
|
|
s3.region: us-east-1
|
|
s3.bucket.name: openlake-tmp
|
|
s3.part.size: '1073741824' # Part size for upload (1 GB)
|
|
|
|
# Performance tuning
|
|
flush.size: 1000
|
|
|
|
# MinIO (or S3) store URL (use environment variable for security)
|
|
store.url: https://play.min.io:50000
|
|
|
|
# Storage and Format Configuration
|
|
storage.class: io.confluent.connect.s3.storage.S3Storage
|
|
format.class: io.confluent.connect.s3.format.parquet.ParquetFormat
|
|
partitioner.class: io.confluent.connect.storage.partitioner.TimeBasedPartitioner
|
|
path.format: "'year'=YYYY/'month'=MM/'day'=dd/'hour'=HH" # Added explicit path format
|
|
partition.duration.ms: 3600000 # Ensures hourly partitions for manageability
|
|
|
|
# Behavior settings
|
|
behavior.on.null.values: ignore
|
|
|
|
# Serialization
|
|
# * Key
|
|
key.converter: org.apache.kafka.connect.storage.StringConverter
|
|
key.converter.schemas.enable: false # Avro schemas usually not needed for keys
|
|
key.converter.schema.registry.url: http://schema-registry-release-cp-schema-registry:8081
|
|
# * Value
|
|
value.converter: io.confluent.connect.avro.AvroConverter
|
|
value.converter.schema.registry.url: http://schema-registry-release-cp-schema-registry:8081
|
|
value.converter.schemas.enable: true
|
|
schema.compatibility: BACKWARD # Allow schema evolution
|
|
|
|
# Rotation and Batch Handling
|
|
rotate.interval.ms: 600000 # reduce overhead in high-throughput scenarios
|
|
key.converter.use.latest.version: true
|
|
value.converter.use.latest.version: true
|
|
|
|
# Optional:
|
|
s3.compression.type: gzip
|
|
store.kafka.keys: true
|
|
logging.level: debug
|