Compare commits

...

8 Commits

15 changed files with 8389 additions and 0 deletions

View File

@ -0,0 +1,11 @@
FROM amazoncorretto:17-alpine-jdk
COPY docker/jmx/jmx_prometheus_javaagent-0.3.1.jar /opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar
COPY docker/jmx/client-metrics.yml /opt/jmx_exporter/client-metrics.yml
COPY build/libs/consumer-ssl-0.0.1-SNAPSHOT.jar /app/consumer-ssl-0.0.1-SNAPSHOT.jar
COPY ssl /ssl
EXPOSE 8080
EXPOSE 9400
ENTRYPOINT ["java", "-javaagent:/opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar=9400:/opt/jmx_exporter/client-metrics.yml", "-jar", "/app/consumer-ssl-0.0.1-SNAPSHOT.jar"]

View File

@ -0,0 +1,10 @@
services:
application:
container_name: client-app
image: client-application:latest
ports:
- "8080:8080"
- "9400:9400"
environment:
RUN_ENVIRONMENT: ${RUN_ENVIRONMENT}
CONSUMER_PROTOCOL: ${CONSUMER_PROTOCOL}

View File

@ -0,0 +1,111 @@
services:
controller:
image: apache/kafka:latest
container_name: controller
environment:
KAFKA_NODE_ID: 0
KAFKA_PROCESS_ROLES: controller
KAFKA_LISTENERS: CONTROLLER://:9093
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_CONTROLLER_QUORUM_VOTERS: 0@controller:9093
broker-1:
image: apache/kafka:latest
container_name: broker-1
privileged: true
ports:
- "29092:9092"
- "29093:9093"
- "7071:7071"
environment:
KAFKA_NODE_ID: 1
KAFKA_PROCESS_ROLES: broker
KAFKA_LISTENERS: PLAINTEXT://:19092,PLAINTEXT_HOST://:9092,SSL://:19093,SSL_HOST://:9093
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker-1:19092,PLAINTEXT_HOST://localhost:29092,SSL://broker-1:19093,SSL_HOST://localhost:29093
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT,SSL:SSL,SSL_HOST:SSL
KAFKA_SSL_TRUSTSTORE_FILENAME: kafka.broker.truststore.jks
KAFKA_SSL_TRUSTSTORE_CREDENTIALS: password
KAFKA_SSL_KEYSTORE_FILENAME: kafka.broker.keystore.jks
KAFKA_SSL_KEYSTORE_CREDENTIALS: password
KAFKA_SSL_KEY_CREDENTIALS: password
KAFKA_NUM_PARTITIONS: 3
KAFKA_DEFAULT_REPLICATION_FACTOR: 3
KAFKA_MIN_INSYNC_REPLICAS: 2
KAFKA_OFFSETS_TOPIC_NUM_PARTITIONS: 5
KAFKA_CONTROLLER_QUORUM_VOTERS: 0@controller:9093
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_OPTS: -javaagent:/opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar=7071:/opt/jmx_exporter/server-metrics.yml
depends_on:
- controller
volumes:
- ./jmx/server-metrics.yml:/opt/jmx_exporter/server-metrics.yml
- ./jmx/jmx_prometheus_javaagent-0.3.1.jar:/opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar
- ../ssl:/etc/kafka/secrets
broker-2:
image: apache/kafka:latest
container_name: broker-2
privileged: true
ports:
- "39092:9092"
- "39093:9093"
- "7072:7071"
environment:
KAFKA_NODE_ID: 2
KAFKA_PROCESS_ROLES: broker
KAFKA_LISTENERS: PLAINTEXT://:19092,PLAINTEXT_HOST://:9092,SSL://:19093,SSL_HOST://:9093
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker-2:19092,PLAINTEXT_HOST://localhost:39092,SSL://broker-2:19093,SSL_HOST://localhost:39093
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT,SSL:SSL,SSL_HOST:SSL
KAFKA_SSL_TRUSTSTORE_FILENAME: kafka.broker.truststore.jks
KAFKA_SSL_TRUSTSTORE_CREDENTIALS: password
KAFKA_SSL_KEYSTORE_FILENAME: kafka.broker.keystore.jks
KAFKA_SSL_KEYSTORE_CREDENTIALS: password
KAFKA_SSL_KEY_CREDENTIALS: password
KAFKA_NUM_PARTITIONS: 3
KAFKA_DEFAULT_REPLICATION_FACTOR: 3
KAFKA_MIN_INSYNC_REPLICAS: 2
KAFKA_OFFSETS_TOPIC_NUM_PARTITIONS: 5
KAFKA_CONTROLLER_QUORUM_VOTERS: 0@controller:9093
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_OPTS: -javaagent:/opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar=7071:/opt/jmx_exporter/server-metrics.yml
depends_on:
- controller
volumes:
- ./jmx/server-metrics.yml:/opt/jmx_exporter/server-metrics.yml
- ./jmx/jmx_prometheus_javaagent-0.3.1.jar:/opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar
- ../ssl:/etc/kafka/secrets
broker-3:
image: apache/kafka:latest
container_name: broker-3
privileged: true
ports:
- "49092:9092"
- "49093:9093"
- "7073:7071"
environment:
KAFKA_NODE_ID: 3
KAFKA_PROCESS_ROLES: broker
KAFKA_LISTENERS: PLAINTEXT://:19092,PLAINTEXT_HOST://:9092,SSL://:19093,SSL_HOST://:9093
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker-3:19092,PLAINTEXT_HOST://localhost:49092,SSL://broker-3:19093,SSL_HOST://localhost:49093
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT,SSL:SSL,SSL_HOST:SSL
KAFKA_SSL_TRUSTSTORE_FILENAME: kafka.broker.truststore.jks
KAFKA_SSL_TRUSTSTORE_CREDENTIALS: password
KAFKA_SSL_KEYSTORE_FILENAME: kafka.broker.keystore.jks
KAFKA_SSL_KEYSTORE_CREDENTIALS: password
KAFKA_SSL_KEY_CREDENTIALS: password
KAFKA_NUM_PARTITIONS: 3
KAFKA_DEFAULT_REPLICATION_FACTOR: 3
KAFKA_MIN_INSYNC_REPLICAS: 2
KAFKA_OFFSETS_TOPIC_NUM_PARTITIONS: 5
KAFKA_CONTROLLER_QUORUM_VOTERS: 0@controller:9093
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_OPTS: -javaagent:/opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar=7071:/opt/jmx_exporter/server-metrics.yml
depends_on:
- controller
volumes:
- ./jmx/server-metrics.yml:/opt/jmx_exporter/server-metrics.yml
- ./jmx/jmx_prometheus_javaagent-0.3.1.jar:/opt/jmx_exporter/jmx_prometheus_javaagent-0.3.1.jar
- ../ssl:/etc/kafka/secrets

View File

@ -0,0 +1,38 @@
services:
kafka-ui:
container_name: kafka-ui
image: provectuslabs/kafka-ui:latest
ports:
- "8000:8080"
environment:
KAFKA_CLUSTERS_0_NAME: local
KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: broker-1:19092,broker-2:19092,broker-3:19092
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
command:
- '--config.file=/etc/prometheus/prometheus.yml'
depends_on:
- kafka-ui
grafana:
image: grafana/grafana:latest
container_name: grafana
ports:
- "3000:3000"
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning
- ./grafana/provisioning/dashboards/dashboards:/var/lib/grafana/dashboards
- grafana-storage:/var/lib/grafana
depends_on:
- prometheus
volumes:
grafana-storage:
external: true

View File

@ -0,0 +1,10 @@
apiVersion: 1
providers:
- name: 'default'
orgId: 1
type: file
allowUiUpdates: true
options:
path: "/var/lib/grafana/dashboards"
editable: true

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true

View File

@ -0,0 +1,107 @@
# source: https://blog.voidmainvoid.net/476
lowercaseOutputName: true
rules:
#kafka.connect:type=app-info,client-id="{clientid}"
#kafka.consumer:type=app-info,client-id="{clientid}"
#kafka.producer:type=app-info,client-id="{clientid}"
- pattern: 'kafka.(.+)<type=app-info, client-id=(.+)><>start-time-ms'
name: kafka_$1_start_time_seconds
labels:
clientId: "$2"
help: "Kafka $1 JMX metric start time seconds"
type: GAUGE
valueFactor: 0.001
- pattern: 'kafka.(.+)<type=app-info, client-id=(.+)><>(commit-id|version): (.+)'
name: kafka_$1_$3_info
value: 1
labels:
clientId: "$2"
$3: "$4"
help: "Kafka $1 JMX metric info version and commit-id"
type: GAUGE
#kafka.producer:type=producer-topic-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}"
#kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}"
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.+), topic=(.+), partition=(.+)><>(.+-total|compression-rate|.+-avg|.+-replica|.+-lag|.+-lead)
name: kafka_$2_$6
labels:
clientId: "$3"
topic: "$4"
partition: "$5"
help: "Kafka $1 JMX metric type $2"
type: GAUGE
#kafka.producer:type=producer-topic-metrics,client-id="{clientid}",topic="{topic}"
#kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}"
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.+), topic=(.+)><>(.+-total|compression-rate|.+-avg)
name: kafka_$2_$5
labels:
clientId: "$3"
topic: "$4"
help: "Kafka $1 JMX metric type $2"
type: GAUGE
#kafka.connect:type=connect-node-metrics,client-id="{clientid}",node-id="{nodeid}"
#kafka.consumer:type=consumer-node-metrics,client-id=consumer-1,node-id="{nodeid}"
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.+), node-id=(.+)><>(.+-total|.+-avg)
name: kafka_$2_$5
labels:
clientId: "$3"
nodeId: "$4"
help: "Kafka $1 JMX metric type $2"
type: UNTYPED
#kafka.connect:type=kafka-metrics-count,client-id="{clientid}"
#kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}"
#kafka.consumer:type=consumer-coordinator-metrics,client-id="{clientid}"
#kafka.consumer:type=consumer-metrics,client-id="{clientid}"
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.*)><>(.+-total|.+-avg|.+-bytes|.+-count|.+-rate|.+-ratio|.+-age|.+-flight|.+-threads|.+-connectors|.+-tasks|.+-ago)
name: kafka_$2_$4
labels:
clientId: "$3"
help: "Kafka $1 JMX metric type $2"
type: GAUGE
#kafka.connect:type=connector-task-metrics,connector="{connector}",task="{task}<> status"
- pattern: 'kafka.connect<type=connector-task-metrics, connector=(.+), task=(.+)><>status: ([a-z-]+)'
name: kafka_connect_connector_status
value: 1
labels:
connector: "$1"
task: "$2"
status: "$3"
help: "Kafka Connect JMX Connector status"
type: GAUGE
#kafka.connect:type=task-error-metrics,connector="{connector}",task="{task}"
#kafka.connect:type=source-task-metrics,connector="{connector}",task="{task}"
#kafka.connect:type=sink-task-metrics,connector="{connector}",task="{task}"
#kafka.connect:type=connector-task-metrics,connector="{connector}",task="{task}"
- pattern: kafka.connect<type=(.+)-metrics, connector=(.+), task=(.+)><>(.+-total|.+-count|.+-ms|.+-ratio|.+-avg|.+-failures|.+-requests|.+-timestamp|.+-logged|.+-errors|.+-retries|.+-skipped)
name: kafka_connect_$1_$4
labels:
connector: "$2"
task: "$3"
help: "Kafka Connect JMX metric type $1"
type: GAUGE
#kafka.connect:type=connector-metrics,connector="{connector}"
#kafka.connect:type=connect-worker-metrics,connector="{connector}"
- pattern: kafka.connect<type=connect-worker-metrics, connector=(.+)><>([a-z-]+)
name: kafka_connect_worker_$2
labels:
connector: "$1"
help: "Kafka Connect JMX metric $1"
type: GAUGE
#kafka.connect:type=connect-worker-metrics
- pattern: kafka.connect<type=connect-worker-metrics><>([a-z-]+)
name: kafka_connect_worker_$1
help: "Kafka Connect JMX metric worker"
type: GAUGE
#kafka.connect:type=connect-worker-rebalance-metrics
- pattern: kafka.connect<type=connect-worker-rebalance-metrics><>([a-z-]+)
name: kafka_connect_worker_rebalance_$1
help: "Kafka Connect JMX metric rebalance information"
type: GAUGE

Binary file not shown.

View File

@ -0,0 +1,247 @@
# source: https://github.com/oded-dd/prometheus-jmx-kafka/tree/master
lowercaseOutputName: true
rules:
- pattern: kafka.server<type=ReplicaManager, name=UnderReplicatedPartitions><>Value
name: kafka_server_under_replicated_partitions
help: Number of under-replicated partitions (| ISR | < | all replicas |). Alert if value is greater than 0
type: GAUGE
- pattern: kafka.controller<type=KafkaController, name=OfflinePartitionsCount><>Value
name: kafka_controller_offline_partitions_count
help: Number of partitions that do not have an active leader and are hence not writable or readable. Alert if value is greater than 0
type: GAUGE
- pattern: kafka.controller<type=KafkaController, name=ActiveControllerCount><>Value
name: kafka_controller_active_controller_count
help: Number of active controllers in the cluster. Alert if the aggregated sum across all brokers in the cluster is anything other than 1 (there should be exactly one controller per cluster)
type: GAUGE
- pattern: kafka.server<type=BrokerTopicMetrics, name=BytesInPerSec><>Count
name: kafka_server_total_bytes_in_per_sec
help: Aggregate incoming byte rate
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=BytesInPerSec, topic=(.+)><>Count
name: kafka_server_total_bytes_in_per_sec_per_topic
help: Aggregate incoming byte rate per topic
labels:
topic: "$1"
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=BytesOutPerSec><>Count
name: kafka_server_total_bytes_out_per_sec
help: Aggregate outgoing byte rate
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=BytesOutPerSec, topic=(.+)><>Count
name: kafka_server_total_bytes_out_per_sec_per_topic
help: Aggregate outgoing byte rate per topic
labels:
topic: "$1"
type: UNTYPED
- pattern: kafka.network<type=RequestMetrics, name=RequestsPerSec, request=(Produce|FetchConsumer|FetchFollower)><>Count
name: kafka_network_requests_per_sec
help: Request rate (Produce, FetchConsumer, FetchFollower)
labels:
request: "$1"
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=TotalProduceRequestsPerSec><>Count
name: kafka_server_total_produce_requests_per_sec
help: Produce request rate
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=TotalProduceRequestsPerSec, topic=(.+)><>Count
name: kafka_server_total_produce_requests_per_sec_per_topic
help: Produce request rate per topic
labels:
topic: "$1"
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=TotalFetchRequestsPerSec><>Count
name: kafka_server_total_fetch_requests_per_sec
help: Fetch request rate
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=TotalFetchRequestsPerSec, topic=(.+)><>Count
name: kafka_server_total_fetch_requests_per_sec_per_topic
help: Fetch request rate per topic
labels:
topic: "$1"
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=FailedProduceRequestsPerSec><>Count
name: kafka_server_failed_produce_requests_per_sec
help: Produce request rate for requests that failed
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=FailedProduceRequestsPerSec, topic=(.*)><>Count
name: kafka_server_failed_produce_requests_per_sec_per_topic
help: Produce request rate for requests that failed per topic
labels:
topic: "$1"
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=FailedFetchRequestsPerSec><>Count
name: kafka_server_failed_fetch_requests_per_sec
help: Fetch request rate for requests that failed
type: UNTYPED
- pattern: kafka.server<type=BrokerTopicMetrics, name=FailedFetchRequestsPerSec, topic=(.*)><>Count
name: kafka_server_failed_fetch_requests_per_sec_per_topic
help: Fetch request rate for requests that failed per topic
labels:
topic: "$1"
type: UNTYPED
- pattern: kafka.controller<type=ControllerStats, name=LeaderElectionRateAndTimeMs><>Count
name: kafka_controller_leader_election_rate_time
help: Leader election rate and latency (rate in seconds, latency|time in ms)
type: UNTYPED
- pattern: kafka.controller<type=ControllerStats, name=UncleanLeaderElectionsPerSec><>Count
name: kafka_controller_unclean_leader_elections_per_sec
help: Unclean leader election rate
type: UNTYPED
- pattern: kafka.server<type=ReplicaManager, name=PartitionCount><>Value
name: kafka_server_partition_count
help: Number of partitions on this broker (This should be mostly even across all brokers)
type: GAUGE
- pattern: kafka.server<type=ReplicaManager, name=LeaderCount><>Value
name: kafka_server_leader_count
help: Number of leaders on this broker (This should be mostly even across all brokers)
type: GAUGE
- pattern: kafka.server<type=ReplicaFetcherManager, name=MaxLag, clientId=Replica><>Value
name: kafka_server_max_lag_in_replica
help: Maximum lag in messages between the follower and leader replicas
type: GAUGE
- pattern: kafka.server<type=KafkaRequestHandlerPool, name=RequestHandlerAvgIdlePercent><>Count
name: kafka_server_request_handler_avg_idle_precent
help: Average fraction of time the request handler threads are idle. Values are between 0 (all resources are used) and 1 (all resources are available)
type: GAUGE
- pattern: kafka.network<type=SocketServer, name=NetworkProcessorAvgIdlePercent><>Value
name: kafka_network_network_processor_avg_idle_precent
help: Average fraction of time the network processor threads are idle. Values are between 0 (all resources are used) and 1 (all resources are available)
type: GAUGE
- pattern: kafka.network<type=RequestChannel, name=RequestQueueSize><>Value
name: kafka_network_request_queue_size
help: Size of the request queue. A congested request queue will not be able to process incoming or outgoing requests
type: GAUGE
- pattern: kafka.network<type=RequestMetrics, name=TotalTimeMs, request=(Produce|FetchConsumer|FetchFollower)><>Count
name: kafka_network_total_time_ms
help: Total time in ms to serve the specified request (Produce, FetchConsumer, FetchFollower)
labels:
request: "$1"
type: COUNTER
- pattern: kafka.network<type=RequestMetrics, name=RequestQueueTimeMs, request=(Produce|FetchConsumer|FetchFollower)><>Count
name: kafka_network_request_queue_time_ms
help: Time the request waits in the request queue (Produce, FetchConsumer, FetchFollower)
labels:
request: "$1"
type: COUNTER
- pattern: kafka.network<type=RequestMetrics, name=LocalTimeMs, request=(Produce|FetchConsumer|FetchFollower)><>Count
name: kafka_network_local_time_ms
help: Time the request is processed at the leader (Produce, FetchConsumer, FetchFollower)
labels:
request: "$1"
type: COUNTER
- pattern: kafka.network<type=RequestMetrics, name=RemoteTimeMs, request=(Produce|FetchConsumer|FetchFollower)><>Count
name: kafka_network_remote_time_ms
help: Time the request waits for the follower (Produce, FetchConsumer, FetchFollower)
labels:
request: "$1"
type: COUNTER
- pattern: kafka.network<type=RequestMetrics, name=ResponseQueueTimeMs, request=(Produce|FetchConsumer|FetchFollower)><>Count
name: kafka_network_response_queue_time_ms
help: Time the request waits in the response queue (Produce, FetchConsumer, FetchFollower)
labels:
request: "$1"
type: COUNTER
- pattern: kafka.network<type=RequestMetrics, name=ResponseSendTimeMs, request=(Produce|FetchConsumer|FetchFollower)><>Count
name: kafka_network_response_send_time_ms
help: Time to send the response (Produce, FetchConsumer, FetchFollower)
labels:
request: "$1"
type: COUNTER
- pattern: kafka.server<type=BrokerTopicMetrics, name=MessagesInPerSec><>Count
name: kafka_server_messages_in_per_sec
help: Aggregate incoming message rate
type: COUNTER
- pattern: kafka.server<type=BrokerTopicMetrics, name=MessagesInPerSec, topic=(.+)><>Count
name: kafka_server_messages_in_per_sec_per_topic
help: Aggregate incoming message rate per_topic
labels:
topic: "$1"
type: COUNTER
- pattern: kafka.log<type=LogFlushStats, name=LogFlushRateAndTimeMs><>Count
name: kafka_log_log_flush_rate_time
help: Log flush rate and time (rate in seconds, latency|time in ms)
type: UNTYPED
- pattern: kafka.server<type=ReplicaManager, name=IsrShrinksPerSec><>Count
name: kafka_server_isr_shrinks_per_sec
help: If a broker goes down, ISR for some of the partitions will shrink. When that broker is up again, ISR will be expanded once the replicas are fully caught up. Other than that, the expected value for both ISR shrink rate and expansion rate is 0
type: GAUGE
- pattern: kafka.server<type=ReplicaManager, name=IsrExpandsPerSec><>Count
name: kafka_server_isr_expands_per_sec
help: When a broker is brought up after a failure, it starts catching up by reading from the leader. Once it is caught up, it gets added back to the ISR.
type: GAUGE
- pattern: kafka.server<type=DelayedOperationPurgatory, name=PurgatorySize, delayedOperation=(Produce|Fetch)><>Value
name: kafka_server_purgatory_size
help: Number of requests waiting in the producer purgatory. This should be non-zero when acks=all is used on the producer | Number of requests waiting in the fetch purgatory. This is high if consumers use a large value for fetch.wait.max.ms
labels:
delayed_operation: "$1"
type: GAUGE
- pattern: kafka.server<type=SessionExpireListener, name=ZooKeeperDisconnectsPerSec><>Count
name: kafka_server_zookeeper_disconnects_per_sec
help: Zookeeper client is currently disconnected from the ensemble. The client lost its previous connection to a server and it is currently trying to reconnect. The session is not necessarily expired
type: UNTYPED
- pattern: kafka.server<type=SessionExpireListener, name=ZooKeeperExpiresPerSec><>Count
name: kafka_server_zookeeper_expires_per_sec
help: The ZooKeeper session has expired. When a session expires, we can have leader changes and even a new controller. Alert if value of such events across a Kafka cluster and if the overall number is high
type: UNTYPED
- pattern: kafka.server<type=SessionExpireListener, name=ZooKeeperSyncConnectsPerSec><>Count
name: kafka_server_zookeeper_sync_connects_per_sec
help: ZooKeeper client is connected to the ensemble and ready to execute operations
type: UNTYPED
- pattern: kafka.server<type=SessionExpireListener, name=ZooKeeperAuthFailuresPerSec><>Count
name: kafka_server_zookeeper_auth_failure_per_sec
help: An attempt to connect to the ensemble failed because the client has not provided correct credentials
type: UNTYPED
- pattern: kafka.server<type=SessionExpireListener, name=ZooKeeperReadOnlyConnectsPerSec><>Count
name: kafka_server_zookeeper_readonly_connects_per_sec
help: The server the client is connected to is currently LOOKING, which means that it is neither FOLLOWING nor LEADING. Consequently, the client can only read the ZooKeeper state, but not make any changes (create, delete, or set the data of znodes)
type: UNTYPED
- pattern: kafka.server<type=SessionExpireListener, name=ZooKeeperSaslAuthenticationsPerSec><>Count
name: kafka_server_zookeeper_sasl_auth_per_sec
help: Client has successfully authenticated
type: UNTYPED
- pattern: kafka.server<type=SessionExpireListener, name=ZooKeeperExpiredPerSec><>Count
name: kafka_server_zookeeper_expired_per_sec
help: The ZooKeeper session has expired. When a session expires, we can have leader changes and even a new controller. Alert if value of such events across a Kafka cluster and if the overall number is high
type: UNTYPED

View File

@ -0,0 +1,22 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
scrape_configs:
- job_name: 'kafka-server'
static_configs:
- targets:
- 'broker-1:7071'
- 'broker-2:7071'
- 'broker-3:7071'
- job_name: 'kafka-application-jmx'
static_configs:
- targets:
- 'application:9400'
- job_name: 'kafka-application-actuator'
metrics_path: '/actuator/prometheus'
static_configs:
- targets:
- 'application:8080'

46
keygen.sh Executable file
View File

@ -0,0 +1,46 @@
#!/bin/bash
PASSWORD="abcd1234"
VALIDITY_DAYS=365
KEYS_DIR="ssl"
DNAME="CN=localhost, OU=Dev, O=MyCompany, L=Seoul, C=KR"
CA_DNAME="CN=My Kafka CA, OU=Dev, O=MyCompany, L=Seoul, C=KR"
rm -rf ./${KEYS_DIR}
mkdir ${KEYS_DIR}
cd ${KEYS_DIR}
keytool -genkeypair -alias ca -keyalg RSA -keysize 2048 -validity ${VALIDITY_DAYS} \
-keystore kafka.ca.keystore.jks \
-storepass ${PASSWORD} -keypass ${PASSWORD} -dname "${CA_DNAME}" \
-ext "BasicConstraints:critical=ca:true"
keytool -exportcert -alias ca -file ca.crt \
-keystore kafka.ca.keystore.jks -storepass ${PASSWORD}
keytool -genkeypair -alias broker -keyalg RSA -keysize 2048 -validity ${VALIDITY_DAYS} \
-keystore kafka.broker.keystore.jks \
-storepass ${PASSWORD} -keypass ${PASSWORD} -dname "${DNAME}"
keytool -certreq -alias broker -file broker.csr \
-keystore kafka.broker.keystore.jks -storepass ${PASSWORD}
keytool -gencert -alias ca -infile broker.csr -outfile broker.crt \
-keystore kafka.ca.keystore.jks -storepass ${PASSWORD} -validity ${VALIDITY_DAYS} \
-ext "SAN=dns:broker-1,dns:broker-2,dns:broker-3,dns:localhost" \
-ext "ExtendedKeyUsage=serverAuth,clientAuth"
keytool -importcert -alias ca -file ca.crt \
-keystore kafka.broker.keystore.jks -storepass ${PASSWORD} -noprompt
keytool -importcert -alias broker -file broker.crt \
-keystore kafka.broker.keystore.jks -storepass ${PASSWORD} -noprompt
keytool -importcert -alias ca -file ca.crt \
-keystore kafka.broker.truststore.jks -storepass ${PASSWORD} -noprompt
echo ${PASSWORD} > password
rm ca.crt broker.csr broker.crt
echo ""
echo "🎉 SSL/TLS 키 파일 생성 완료"

89
run.sh Executable file
View File

@ -0,0 +1,89 @@
#!/bin/bash
echo "0. 실행 중인 모든 컨테이너 종료"
find docker/ -name 'docker-compose*.yml' -exec docker compose -f {} down --remove-orphans \;
echo "0. SSL 인증서 생성"
./keygen.sh
echo " "
echo "======================"
echo " "
echo "1. 그라파나 볼륨 생성"
GRAFANA_VOLUME="grafana-storage"
if [ -z "$(docker volume ls --filter name=${GRAFANA_VOLUME} --format '{{ .Name }}')" ]; then
echo "그라파나에서 사용할 ${GRAFANA_VOLUME} 볼륨을 생성합니다."
docker volume create ${GRAFANA_VOLUME}
fi
echo " "
echo "======================"
echo " "
echo "2. 카프카 & 모니터링 툴 실행"
docker compose -f docker/docker-compose-kafka-cluster.yml up -d
docker compose -f docker/docker-compose-monitoring.yml up -d
echo " "
echo "======================"
echo " "
echo "3. 애플리케이션 이미지 빌드"
./gradlew bootjar
docker buildx build -t client-application:latest -f docker/application/Dockerfile_Application .
echo " "
echo "======================"
echo " "
echo "4. 애플리케이션 실행 및 테스트"
echo "[4-1] PLAINTEXT 모드"
rm docker/.env
echo "RUN_ENVIRONMENT=docker" > docker/.env
echo "CONSUMER_PROTOCOL=plaintext" >> docker/.env
docker compose -f docker/docker-compose-application.yml up -d
echo "[4-1] 실행 후 30초 대기..."
sleep 30
echo "[4-1] 테스트 시작"
./gradlew clean test --tests "org.study.consumerssl.data.TestUtils.produceRecords"
echo "[4-1] 테스트 완료. SSL 모드 실행 전 2분 대기.."
sleep 120
echo "[4-1] 토픽 초기화.."
./gradlew clean test --tests "org.study.consumerssl.data.TestUtils.deleteAllExistTopics"
echo " "
echo "---------------------"
echo " "
echo "[4-2] SSL 모드"
rm docker/.env
echo "RUN_ENVIRONMENT=docker" > docker/.env
echo "CONSUMER_PROTOCOL=ssl" >> docker/.env
echo "[4-2] 애플리케이션 재시작"
docker compose -f docker/docker-compose-application.yml down
docker compose -f docker/docker-compose-application.yml up -d
echo "[4-2] 실행 후 30초 대기..."
sleep 30
echo "[4-2] 테스트 시작"
./gradlew clean test --tests "org.study.consumerssl.data.TestUtils.produceRecords"
echo "[4-2] 이전과 동일하게 2분 대기 후 애플리케이션 종료.."
sleep 120
docker compose -f docker/docker-compose-application.yml down
echo " "
echo "======================"
echo " "
echo "테스트 완료. 대시보드 확인하기 👇"
echo "ID: admin / PW: admin"
echo "Consumer: http://localhost:3000/d/-C-IEldWk2/redpanda-kafka-java-consumer"
echo "Springboot: http://localhost:3000/d/spring_boot_21/spring-boot-2-1-system-monitor"
echo "Kafka UI: http://localhost:8000"
echo "확인이 끝났다면 ./close-all.sh 를 실행하여 초기화 가능합니다."