apiVersion: kafka.strimzi.io/v1beta2
kind: KafkaConnector
metadata:
  name: kafka-connect-s3-connector
  namespace: kafka
  labels:
    strimzi.io/cluster: kafka-connect-cluster
spec:
  class: io.confluent.connect.s3.S3SinkConnector
  tasksMax: 2
  config:
    # Core Connector Configuration
    topics: test-topic

    # S3 Configuration
    s3.region: us-east-1
    s3.bucket.name: delta-lake-test
    s3.part.size: '134217728' # Part size for upload (1 GB/ 128mb)

    # Performance tuning
    flush.size: 500

    # MinIO (or S3) store URL (use environment variable for security)
    store.url: https://s3.dco1.safedc.net

    # Storage and Format Configuration
    storage.class: io.confluent.connect.s3.storage.S3Storage
    format.class: io.confluent.connect.s3.format.parquet.ParquetFormat
    partitioner.class: io.confluent.connect.storage.partitioner.TimeBasedPartitioner
    path.format: "'year'=YYYY/'month'=MM/'day'=dd/'hour'=HH" # Added explicit path format
    partition.duration.ms: 3600000 # Ensures hourly partitions for manageability

    # Behavior settings 
    behavior.on.null.values: ignore

    # Use Swedish locale
    locale: sv_SE
    timezone: Europe/Stockholm
    # # Serialization
    # # * Key
    # key.converter: org.apache.kafka.connect.storage.StringConverter
    # key.converter.schemas.enable: false # Avro schemas usually not needed for keys
    # key.converter.schema.registry.url: http://89.47.191.210:30081
    # # * Value
    # value.converter: io.confluent.connect.avro.AvroConverter
    # value.converter.schema.registry.url: http://89.47.191.210:30081
    # value.converter.schemas.enable: true
    # schema.compatibility: BACKWARD # Allow schema evolution

    # # Rotation and Batch Handling
    # rotate.interval.ms: 600000 # reduce overhead in high-throughput scenarios
    # key.converter.use.latest.version: true
    # value.converter.use.latest.version: true
    # # Optional: 
    # # s3.compression.type: gzip
    # # store.kafka.keys: true
    # # logging.level: debug