39 lines
1.2 KiB
YAML
39 lines
1.2 KiB
YAML
|
apiVersion: kafka.strimzi.io/v1beta2
|
||
|
kind: KafkaConnector
|
||
|
metadata:
|
||
|
name: kafka-connect-s3-connector
|
||
|
namespace: kafka
|
||
|
labels:
|
||
|
strimzi.io/cluster: kafka-connect-cluster
|
||
|
spec:
|
||
|
class: io.confluent.connect.s3.S3SinkConnector
|
||
|
tasksMax: 2
|
||
|
config:
|
||
|
# Core Connector Configuration
|
||
|
topics: kafka-das-topic
|
||
|
|
||
|
# S3 Configuration
|
||
|
s3.region: us-east-1
|
||
|
s3.bucket.name: delta-lake-test
|
||
|
s3.part.size: '134217728' # Part size for upload (1 GB/ 128mb)
|
||
|
|
||
|
# Performance tuning
|
||
|
flush.size: 500
|
||
|
|
||
|
# MinIO (or S3) store URL (use environment variable for security)
|
||
|
store.url: https://s3.dco1.safedc.net
|
||
|
|
||
|
# Storage and Format Configuration
|
||
|
storage.class: io.confluent.connect.s3.storage.S3Storage
|
||
|
format.class: io.confluent.connect.s3.format.parquet.ParquetFormat
|
||
|
partitioner.class: io.confluent.connect.storage.partitioner.TimeBasedPartitioner
|
||
|
path.format: "'year'=YYYY/'month'=MM/'day'=dd/'hour'=HH" # Added explicit path format
|
||
|
partition.duration.ms: 3600000 # Ensures hourly partitions for manageability
|
||
|
|
||
|
# Behavior settings
|
||
|
behavior.on.null.values: ignore
|
||
|
|
||
|
# Use Swedish locale
|
||
|
locale: sv_SE
|
||
|
timezone: Europe/Stockholm
|