赞
踩
mkdir /data/kafka_exporter -p cat > /data/kafka_exporter/start.sh << 'EOF' #!/bin/bash docker rm -f kafka_exporter cd `dirname $0` docker run -d \ --name kafka_exporter \ --restart=always \ --network host \ -v /etc/localtime:/etc/localtime:ro \ danielqsj/kafka-exporter:v1.7.0 \ --web.listen-address :9308 \ --sasl.enabled \ --sasl.mechanism plain \ --kafka.server 192.168.11.192:9092 \ --sasl.username=alice \ --sasl.password=alice \ --log.level info \ --web.telemetry-path /metrics EOF
kafka_exporter metrics的访问方法
http://192.168.11.192:9308/metrics
#添加自动发现脚本 cat >> /data/prometheus/conf/prometheus.yml << 'EOF' #kafka自动发现 - job_name: 'kafka' file_sd_configs: - files: - /etc/prometheus/sd_config/kafka.yaml refresh_interval: 5s relabel_configs: - source_labels: [__address__] regex: (.*) target_label: instance replacement: $1 - source_labels: [__address__] regex: (.*):(.*) target_label: ip replacement: $1 - source_labels: [__address__] regex: (.*):(.*) target_label: __address__ replacement: $1:9308 EOF
#自动发现配置
cat > /data/prometheus/conf/sd_config/kafka.yaml << 'EOF'
#kafka自动发现
- labels:
type: kafka
targets:
- 192.168.11.192:9092
- 192.168.11.193:9092
- 192.168.11.194:9092
EOF
访问 http://192.168.11.221:9090
grafanaid: 7589
自定义图表,请参考:https://blog.csdn.net/qq_34864753/article/details/103953385
cat > /data/prometheus/conf/rules/kafka.rules << 'EOF' groups: - name: kafka-监控告警 rules: - alert: 告警!Kafka Topics 副本数少于3 expr: sum(kafka_topic_partition_in_sync_replica) by (topic) < 3 for: 0m labels: severity: 严重告警 annotations: summary: "{{ $labels.instance }} Kafka topics 副本数少于3" description: "Kafka topic 分区不同步\n 当前值 = {{ $value }}" - alert: 告警!KafkaConsumersGroup expr: sum(kafka_consumergroup_lag) by (consumergroup) > 50 for: 1m labels: severity: 严重告警 annotations: summary: "{{ $labels.instance }} Kafka consumers group" description: "Kafka consumers group\n 当前值 = {{ $value }}" EOF
mkdir /data/kafka/etc -p
cd /data/kafka/etc/
wget https://raw.githubusercontent.com/prometheus/jmx_exporter/main/example_configs/kafka-kraft-3_0_0.yml
cd /data/kafka/
wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.20.0/jmx_prometheus_javaagent-0.20.0.jar
mkdir /data/kafka/etc -p cat > /data/kafka/start.sh << 'EOF' #!/bin/bash cd `dirname $0` docker rm -f kafka docker run -d \ --name kafka \ --restart=always \ --net host \ -e KAFKA_NODE_ID=0 \ -e KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://192.168.11.100:9092 \ -e KAFKA_HEAP_OPTS="-Xmx512m -Xms512m" \ -e KAFKA_OPTS="-javaagent:/opt/jmx_prometheus_javaagent-0.20.0.jar=9999:/opt/kafka-kraft-3_0_0.yml" \ -e KAFKA_CFG_PROCESS_ROLES=broker,controller \ -e KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER \ -e KAFKA_CFG_SASL_MECHANISM_CONTROLLER_PROTOCOL=PLAIN \ -e KAFKA_CONTROLLER_USER=contr0ller \ -e KAFKA_CONTROLLER_PASSWORD=Contr0ller#XXXX \ -e KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:SASL_PLAINTEXT,CONTROLLER:SASL_PLAINTEXT \ -e KAFKA_CFG_LISTENERS=PLAINTEXT://0.0.0.0:9092,CONTROLLER://0.0.0.0:9093 \ -e KAFKA_ENABLE_KRAFT=yes \ -e KAFKA_KRAFT_CLUSTER_ID="Aqvf7RVETX-DInZbNUXXXX" \ -e KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=0@192.168.11.100:9093 \ -e ALLOW_PLAINTEXT_LISTENER=yes \ -e KAFKA_TLS_CLIENT_AUTH=none \ -e KAFKA_CFG_SASL_ENABLED_MECHANISMS=PLAIN \ -e KAFKA_CLIENT_LISTENER_NAME=PLAINTEXT \ -e KAFKA_CLIENT_USERS=gohangout \ -e KAFKA_CLIENT_PASSWORDS=Gohangout#XXXX \ -e KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=true \ -v `pwd`/etc/sasl_config.properties:/opt/bitnami/kafka/config/sasl_config.properties \ -v `pwd`/etc/kafka-kraft-3_0_0.yml:/opt/kafka-kraft-3_0_0.yml \ -v `pwd`/jmx_prometheus_javaagent-0.20.0.jar:/opt/jmx_prometheus_javaagent-0.20.0.jar \ -v `pwd`/data:/bitnami/kafka/ \ -v /etc/localtime:/etc/localtime \ bitnami/kafka:3.7.0 EOF bash /data/kafka/start.sh
curl http://192.168.11.100:9999/metrics
prometheus.yml
- job_name: 'kafka-jmx' file_sd_configs: - files: - /etc/prometheus/sd_config/kafka-jmx.yaml refresh_interval: 5s relabel_configs: - source_labels: [__address__] regex: (.*) target_label: instance replacement: $1 - source_labels: [__address__] regex: (.*):(.*) target_label: ip replacement: $1 - source_labels: [__address__] regex: (.*):(.*) target_label: __address__ replacement: $1:9999
kafka-jmx.yaml
- labels:
type: kafka-jmx
targets:
- 192.168.11.100:9092
- 192.168.11.101:9092
- 192.168.11.102:9092
id: 11962
id: 18276
cat >kafka_jmx.rules <<'EOF' groups: - name: kafka-jmx-监控告警 rules: - alert: 告警!Kafka 复制分区不足 expr: kafka_server_replicamanager_underreplicatedpartitions > 0 for: 1m labels: severity: 严重告警 annotations: description: "{{ $labels.instance }} Kafka 复制分区不足,当前值 = {{ $value }}" - alert: 告警!Kafka Controller 大于 1 expr: sum(kafka_controller_kafkacontroller_activecontrollercount) by (instance,ip) > 1 for: 1m labels: severity: 严重告警 annotations: description: "{{ $labels.instance }} Kafka Controller数量,当前值 = {{ $value }}" - alert: 告警!Kafka 离线分区数 expr: kafka_controller_kafkacontroller_offlinepartitionscount > 0 for: 1m labels: severity: 严重告警 annotations: description: "{{ $labels.instance }} Kafka 离线分区数,当前值 = {{ $value }}" EOF
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。