apiVersion: kafka.strimzi.io/v1beta2
kind: KafkaConnector
metadata:
  name: kafka-connect-s3-connector
  namespace: kafka
  labels:
    strimzi.io/cluster: kafka-connect-cluster
spec:
  class: io.confluent.connect.s3.S3SinkConnector
  tasksMax: 2
  config:
    # Core Connector Configuration
    connector.class: io.confluent.connect.s3.S3SinkConnector
    topics: test-topic

    # S3 Configuration
    s3.region: us-east-1
    s3.bucket.name: openlake-tmp
    s3.part.size: '1073741824' # Part size for upload (1 GB)

    # Performance tuning
    flush.size: 1000

    # MinIO (or S3) store URL (use environment variable for security)
    store.url: https://play.min.io:50000

    # Storage and Format Configuration
    storage.class: io.confluent.connect.s3.storage.S3Storage
    format.class: io.confluent.connect.s3.format.parquet.ParquetFormat
    partitioner.class: io.confluent.connect.storage.partitioner.TimeBasedPartitioner
    path.format: "'year'=YYYY/'month'=MM/'day'=dd/'hour'=HH" # Added explicit path format
    partition.duration.ms: 3600000 # Ensures hourly partitions for manageability

    # Behavior settings 
    behavior.on.null.values: ignore

    # Serialization
    # * Key
    key.converter: org.apache.kafka.connect.storage.StringConverter
    key.converter.schemas.enable: false # Avro schemas usually not needed for keys
    key.converter.schema.registry.url: http://schema-registry-release-cp-schema-registry:8081
    # * Value
    value.converter: io.confluent.connect.avro.AvroConverter
    value.converter.schema.registry.url: http://schema-registry-release-cp-schema-registry:8081
    value.converter.schemas.enable: true
    schema.compatibility: BACKWARD # Allow schema evolution

    # Rotation and Batch Handling
    rotate.interval.ms: 600000 # reduce overhead in high-throughput scenarios
    key.converter.use.latest.version: true
    value.converter.use.latest.version: true

    # Optional: 
    s3.compression.type: gzip
    store.kafka.keys: true
    logging.level: debug