apiVersion: kafka.strimzi.io/v1beta2 kind: KafkaConnector metadata: name: kafka-connect-s3-connector namespace: kafka labels: strimzi.io/cluster: kafka-connect-cluster spec: class: io.confluent.connect.s3.S3SinkConnector tasksMax: 2 config: # Core Connector Configuration connector.class: io.confluent.connect.s3.S3SinkConnector topics: test-topic # S3 Configuration s3.region: us-east-1 s3.bucket.name: openlake-tmp s3.part.size: '1073741824' # Part size for upload (1 GB) # Performance tuning flush.size: 1000 # MinIO (or S3) store URL (use environment variable for security) store.url: https://play.min.io:50000 # Storage and Format Configuration storage.class: io.confluent.connect.s3.storage.S3Storage format.class: io.confluent.connect.s3.format.parquet.ParquetFormat partitioner.class: io.confluent.connect.storage.partitioner.TimeBasedPartitioner path.format: "'year'=YYYY/'month'=MM/'day'=dd/'hour'=HH" # Added explicit path format partition.duration.ms: 3600000 # Ensures hourly partitions for manageability # Behavior settings behavior.on.null.values: ignore # Serialization # * Key key.converter: org.apache.kafka.connect.storage.StringConverter key.converter.schemas.enable: false # Avro schemas usually not needed for keys key.converter.schema.registry.url: http://schema-registry-release-cp-schema-registry:8081 # * Value value.converter: io.confluent.connect.avro.AvroConverter value.converter.schema.registry.url: http://schema-registry-release-cp-schema-registry:8081 value.converter.schemas.enable: true schema.compatibility: BACKWARD # Allow schema evolution # Rotation and Batch Handling rotate.interval.ms: 600000 # reduce overhead in high-throughput scenarios key.converter.use.latest.version: true value.converter.use.latest.version: true # Optional: s3.compression.type: gzip store.kafka.keys: true logging.level: debug