赞
踩
phometheus:当前一套非常流行的开源监控和报警系统。
运行原理:通过HTTP协议周期性抓取被监控组件的状态。输出被监控组件信息的HTTP接口称为exporter。
常用组件大部分都有exporter可以直接使用,比如haproxy,nginx,Mysql,Linux系统信息(包括磁盘、内存、CPU、网络等待)。
prometheus主要特点:
Grafana:是一个用于可视化大型测量数据的开源系统,可以对Prometheus 的指标数据进行可视化。
Prometheus的体系结构图:
Prometheus直接或通过中间推送网关从已检测的作业中删除指标,以处理短暂的作业。它在本地存储所有报废的样本,并对这些数据运行规则,以汇总和记录现有数据中的新时间序列,或生成警报。Grafana或其他API使用者可以用来可视化收集的数据。
- [root@k8s-node01 k8s-prometheus]# cat prometheus-rbac.yaml
- apiVersion: v1
- kind: ServiceAccount
- metadata:
- name: prometheus
- namespace: kube-system
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRole
- metadata:
- name: prometheus
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- rules:
- - apiGroups:
- - ""
- resources:
- - nodes
- - nodes/metrics
- - services
- - endpoints
- - pods
- verbs:
- - get
- - list
- - watch
- - apiGroups:
- - ""
- resources:
- - configmaps
- verbs:
- - get
- - nonResourceURLs:
- - "/metrics"
- verbs:
- - get
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRoleBinding
- metadata:
- name: prometheus
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: prometheus
- subjects:
- - kind: ServiceAccount
- name: prometheus
- namespace: kube-system
-
-
-
- [root@k8s-node01 k8s-prometheus]# kubectl apply -f prometheus-rbac.yaml
- serviceaccount/prometheus created
- clusterrole.rbac.authorization.k8s.io/prometheus created
- clusterrolebinding.rbac.authorization.k8s.io/prometheus created
-
使用Configmap保存不需要加密配置信息,yaml中修改对应的NODE IP即可。
- [root@k8s-node01 k8s-prometheus]# cat prometheus-configmap.yaml
- # Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
- apiVersion: v1
- kind: ConfigMap
- metadata:
- name: prometheus-config
- namespace: kube-system
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: EnsureExists
- data:
- prometheus.yml: |
- rule_files:
- - /etc/config/rules/*.rules
-
- scrape_configs:
- - job_name: prometheus
- static_configs:
- - targets:
- - localhost:9090
-
- - job_name: kubernetes-nodes
- scrape_interval: 30s
- static_configs:
- - targets:
- - 11.0.1.13:9100
- - 11.0.1.14:9100
-
- - job_name: kubernetes-apiservers
- kubernetes_sd_configs:
- - role: endpoints
- relabel_configs:
- - action: keep
- regex: default;kubernetes;https
- source_labels:
- - __meta_kubernetes_namespace
- - __meta_kubernetes_service_name
- - __meta_kubernetes_endpoint_port_name
- scheme: https
- tls_config:
- ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- insecure_skip_verify: true
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
- - job_name: kubernetes-nodes-kubelet
- kubernetes_sd_configs:
- - role: node
- relabel_configs:
- - action: labelmap
- regex: __meta_kubernetes_node_label_(.+)
- scheme: https
- tls_config:
- ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- insecure_skip_verify: true
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
- - job_name: kubernetes-nodes-cadvisor
- kubernetes_sd_configs:
- - role: node
- relabel_configs:
- - action: labelmap
- regex: __meta_kubernetes_node_label_(.+)
- - target_label: __metrics_path__
- replacement: /metrics/cadvisor
- scheme: https
- tls_config:
- ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
- insecure_skip_verify: true
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
-
- - job_name: kubernetes-service-endpoints
- kubernetes_sd_configs:
- - role: endpoints
- relabel_configs:
- - action: keep
- regex: true
- source_labels:
- - __meta_kubernetes_service_annotation_prometheus_io_scrape
- - action: replace
- regex: (https?)
- source_labels:
- - __meta_kubernetes_service_annotation_prometheus_io_scheme
- target_label: __scheme__
- - action: replace
- regex: (.+)
- source_labels:
- - __meta_kubernetes_service_annotation_prometheus_io_path
- target_label: __metrics_path__
- - action: replace
- regex: ([^:]+)(?::\d+)?;(\d+)
- replacement: $1:$2
- source_labels:
- - __address__
- - __meta_kubernetes_service_annotation_prometheus_io_port
- target_label: __address__
- - action: labelmap
- regex: __meta_kubernetes_service_label_(.+)
- - action: replace
- source_labels:
- - __meta_kubernetes_namespace
- target_label: kubernetes_namespace
- - action: replace
- source_labels:
- - __meta_kubernetes_service_name
- target_label: kubernetes_name
-
- - job_name: kubernetes-services
- kubernetes_sd_configs:
- - role: service
- metrics_path: /probe
- params:
- module:
- - http_2xx
- relabel_configs:
- - action: keep
- regex: true
- source_labels:
- - __meta_kubernetes_service_annotation_prometheus_io_probe
- - source_labels:
- - __address__
- target_label: __param_target
- - replacement: blackbox
- target_label: __address__
- - source_labels:
- - __param_target
- target_label: instance
- - action: labelmap
- regex: __meta_kubernetes_service_label_(.+)
- - source_labels:
- - __meta_kubernetes_namespace
- target_label: kubernetes_namespace
- - source_labels:
- - __meta_kubernetes_service_name
- target_label: kubernetes_name
-
- - job_name: kubernetes-pods
- kubernetes_sd_configs:
- - role: pod
- relabel_configs:
- - action: keep
- regex: true
- source_labels:
- - __meta_kubernetes_pod_annotation_prometheus_io_scrape
- - action: replace
- regex: (.+)
- source_labels:
- - __meta_kubernetes_pod_annotation_prometheus_io_path
- target_label: __metrics_path__
- - action: replace
- regex: ([^:]+)(?::\d+)?;(\d+)
- replacement: $1:$2
- source_labels:
- - __address__
- - __meta_kubernetes_pod_annotation_prometheus_io_port
- target_label: __address__
- - action: labelmap
- regex: __meta_kubernetes_pod_label_(.+)
- - action: replace
- source_labels:
- - __meta_kubernetes_namespace
- target_label: kubernetes_namespace
- - action: replace
- source_labels:
- - __meta_kubernetes_pod_name
- target_label: kubernetes_pod_name
- alerting:
- alertmanagers:
- - static_configs:
- - targets: ["alertmanager:80"]
-
- [root@k8s-node01 k8s-prometheus]# kubectl apply -f prometheus-configmap.yaml
- configmap/prometheus-config created
-
这里使用storageclass进行动态供给,给prometheus的数据进行持久化
- [root@k8s-node01 k8s-prometheus]# cat prometheus-statefulset.yaml
- apiVersion: apps/v1
- kind: StatefulSet
- metadata:
- name: prometheus
- namespace: kube-system
- labels:
- k8s-app: prometheus
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- version: v2.2.1
- spec:
- serviceName: "prometheus"
- replicas: 1
- podManagementPolicy: "Parallel"
- updateStrategy:
- type: "RollingUpdate"
- selector:
- matchLabels:
- k8s-app: prometheus
- template:
- metadata:
- labels:
- k8s-app: prometheus
- annotations:
- scheduler.alpha.kubernetes.io/critical-pod: ''
- spec:
- priorityClassName: system-cluster-critical
- serviceAccountName: prometheus
- initContainers:
- - name: "init-chown-data"
- image: "busybox:latest"
- imagePullPolicy: "IfNotPresent"
- command: ["chown", "-R", "65534:65534", "/data"]
- volumeMounts:
- - name: prometheus-data
- mountPath: /data
- subPath: ""
- containers:
- - name: prometheus-server-configmap-reload
- image: "jimmidyson/configmap-reload:v0.1"
- imagePullPolicy: "IfNotPresent"
- args:
- - --volume-dir=/etc/config
- - --webhook-url=http://localhost:9090/-/reload
- volumeMounts:
- - name: config-volume
- mountPath: /etc/config
- readOnly: true
- resources:
- limits:
- cpu: 10m
- memory: 10Mi
- requests:
- cpu: 10m
- memory: 10Mi
-
- - name: prometheus-server
- image: "prom/prometheus:v2.2.1"
- imagePullPolicy: "IfNotPresent"
- args:
- - --config.file=/etc/config/prometheus.yml
- - --storage.tsdb.path=/data
- - --web.console.libraries=/etc/prometheus/console_libraries
- - --web.console.templates=/etc/prometheus/consoles
- - --web.enable-lifecycle
- ports:
- - containerPort: 9090
- readinessProbe:
- httpGet:
- path: /-/ready
- port: 9090
- initialDelaySeconds: 30
- timeoutSeconds: 30
- livenessProbe:
- httpGet:
- path: /-/healthy
- port: 9090
- initialDelaySeconds: 30
- timeoutSeconds: 30
- # based on 10 running nodes with 30 pods each
- resources:
- limits:
- cpu: 200m
- memory: 1000Mi
- requests:
- cpu: 200m
- memory: 1000Mi
-
- volumeMounts:
- - name: config-volume
- mountPath: /etc/config
- - name: prometheus-data
- mountPath: /data
- subPath: ""
- - name: prometheus-rules
- mountPath: /etc/config/rules
-
- terminationGracePeriodSeconds: 300
- volumes:
- - name: config-volume
- configMap:
- name: prometheus-config
- - name: prometheus-rules
- configMap:
- name: prometheus-rules
-
- volumeClaimTemplates:
- - metadata:
- name: prometheus-data
- spec:
- storageClassName: managed-nfs-storage
- accessModes:
- - ReadWriteOnce
- resources:
- requests:
- storage: "1Gi"
-
- [root@k8s-node01 k8s-prometheus]# kubectl apply -f prometheus-statefulset.yaml
- Warning: spec.template.metadata.annotations[scheduler.alpha.kubernetes.io/critical-pod]: non-functional in v1.16+; use the "priorityClassName" field instead
- statefulset.apps/prometheus created
-
- [root@k8s-node01 k8s-prometheus]#kubectl get pod -n kube-system |grep prometheus
- NAME READY STATUS RESTARTS AGE
- prometheus-0 2/2 Running 6 1d
-
- [root@k8s-node01 k8s-prometheus]# cat prometheus-service.yaml
- kind: Service
- apiVersion: v1
- metadata:
- name: prometheus
- namespace: kube-system
- labels:
- kubernetes.io/name: "Prometheus"
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- spec:
- type: NodePort
- ports:
- - name: http
- port: 9090
- protocol: TCP
- targetPort: 9090
- nodePort: 30090
- selector:
- k8s-app: prometheus
-
- [root@k8s-master prometheus-k8s]# kubectl apply -f prometheus-service.yaml
使用任意一个NodeIP加端口进行访问,访问地址:http://NodeIP:Port
- [root@k8s-master prometheus-k8s]# cat grafana.yaml
- apiVersion: apps/v1
- kind: StatefulSet
- metadata:
- name: grafana
- namespace: kube-system
- spec:
- serviceName: "grafana"
- replicas: 1
- selector:
- matchLabels:
- app: grafana
- template:
- metadata:
- labels:
- app: grafana
- spec:
- containers:
- - name: grafana
- image: grafana/grafana
- ports:
- - containerPort: 3000
- protocol: TCP
- resources:
- limits:
- cpu: 100m
- memory: 256Mi
- requests:
- cpu: 100m
- memory: 256Mi
- volumeMounts:
- - name: grafana-data
- mountPath: /var/lib/grafana
- subPath: grafana
- securityContext:
- fsGroup: 472
- runAsUser: 472
- volumeClaimTemplates:
- - metadata:
- name: grafana-data
- spec:
- storageClassName: managed-nfs-storage #和prometheus使用同一个存储类
- accessModes:
- - ReadWriteOnce
- resources:
- requests:
- storage: "1Gi"
-
- ---
-
- apiVersion: v1
- kind: Service
- metadata:
- name: grafana
- namespace: kube-system
- spec:
- type: NodePort
- ports:
- - port : 80
- targetPort: 3000
- nodePort: 30091
- selector:
- app: grafana
-
- [root@k8s-master prometheus-k8s]#kubectl apply -f grafana.yaml
访问方式:
使用任意一个NodeIP加端口进行访问,访问地址:http://NodeIP:Port ,默认账号密码为admin
Pod:
kubelet的节点使用cAdvisor提供的metrics接口获取该节点所有Pod和容器相关的性能指标数据,安装kubelet默认就开启了
Node:
需要使用node_exporter收集器采集节点资源利用率。
使用node_exporter.sh脚本分别在所有服务器上部署node_exporter收集器,不需要修改可直接运行脚本
- [root@k8s-master prometheus-k8s]# cat node_exporter.sh
- #!/bin/bashwget https://github.com/prometheus/node_exporter/releases/download/v0.17.0/node_exporter-0.17.0.linux-amd64.tar.gz
-
- tar zxf node_exporter-0.17.0.linux-amd64.tar.gz
- mv node_exporter-0.17.0.linux-amd64 /usr/local/node_exporter
-
- cat <<EOF >/usr/lib/systemd/system/node_exporter.service
- [Unit]
- Description=https://prometheus.io
-
- [Service]
- Restart=on-failure
- ExecStart=/usr/local/node_exporter/node_exporter --collector.systemd --collector.systemd.unit-whitelist=(docker|kubelet|kube-proxy|flanneld).service
-
- [Install]
- WantedBy=multi-user.target
- EOF
-
- systemctl daemon-reload
- systemctl enable node_exporter
- systemctl restart node_exporter
- [root@k8s-master prometheus-k8s]# ./node_exporter.sh
-
- [root@k8s-master prometheus-k8s]# ps -ef|grep node_exporter
- root 6227 1 0 Oct08 ? 00:06:43 /usr/local/node_exporter/node_exporter --collector.systemd --collector.systemd.unit-whitelist=(docker|kubelet|kube-proxy|flanneld).service
- root 118269 117584 0 23:27 pts/0 00:00:00 grep --color=auto node_exporter
资源对象:
kube-state-metrics采集了k8s中各种资源对象的状态信息,只需要在master节点部署就行
1.创建rbac的yaml对metrics进行授权
- [root@k8s-master prometheus-k8s]# cat kube-state-metrics-rbac.yaml
- apiVersion: v1
- kind: ServiceAccount
- metadata:
- name: kube-state-metrics
- namespace: kube-system
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRole
- metadata:
- name: kube-state-metrics
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- rules:
- - apiGroups: [""]
- resources:
- - configmaps
- - secrets
- - nodes
- - pods
- - services
- - resourcequotas
- - replicationcontrollers
- - limitranges
- - persistentvolumeclaims
- - persistentvolumes
- - namespaces
- - endpoints
- verbs: ["list", "watch"]
- - apiGroups: ["extensions"]
- resources:
- - daemonsets
- - deployments
- - replicasets
- verbs: ["list", "watch"]
- - apiGroups: ["apps"]
- resources:
- - statefulsets
- verbs: ["list", "watch"]
- - apiGroups: ["batch"]
- resources:
- - cronjobs
- - jobs
- verbs: ["list", "watch"]
- - apiGroups: ["autoscaling"]
- resources:
- - horizontalpodautoscalers
- verbs: ["list", "watch"]
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: Role
- metadata:
- name: kube-state-metrics-resizer
- namespace: kube-system
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- rules:
- - apiGroups: [""]
- resources:
- - pods
- verbs: ["get"]
- - apiGroups: ["extensions"]
- resources:
- - deployments
- resourceNames: ["kube-state-metrics"]
- verbs: ["get", "update"]
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: ClusterRoleBinding
- metadata:
- name: kube-state-metrics
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: kube-state-metrics
- subjects:
- - kind: ServiceAccount
- name: kube-state-metrics
- namespace: kube-system
- ---
- apiVersion: rbac.authorization.k8s.io/v1
- kind: RoleBinding
- metadata:
- name: kube-state-metrics
- namespace: kube-system
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: Role
- name: kube-state-metrics-resizer
- subjects:
- - kind: ServiceAccount
- name: kube-state-metrics
- namespace: kube-system
- [root@k8s-master prometheus-k8s]# kubectl apply -f kube-state-metrics-rbac.yaml
2.编写Deployment和ConfigMap的yaml进行metrics pod部署,不需要进行修改
- [root@k8s-master prometheus-k8s]# cat kube-state-metrics-deployment.yaml
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: kube-state-metrics
- namespace: kube-system
- labels:
- k8s-app: kube-state-metrics
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- version: v1.3.0
- spec:
- selector:
- matchLabels:
- k8s-app: kube-state-metrics
- version: v1.3.0
- replicas: 1
- template:
- metadata:
- labels:
- k8s-app: kube-state-metrics
- version: v1.3.0
- annotations:
- scheduler.alpha.kubernetes.io/critical-pod: ''
- spec:
- priorityClassName: system-cluster-critical
- serviceAccountName: kube-state-metrics
- containers:
- - name: kube-state-metrics
- image: lizhenliang/kube-state-metrics:v1.3.0
- ports:
- - name: http-metrics
- containerPort: 8080
- - name: telemetry
- containerPort: 8081
- readinessProbe:
- httpGet:
- path: /healthz
- port: 8080
- initialDelaySeconds: 5
- timeoutSeconds: 5
- - name: addon-resizer
- image: lizhenliang/addon-resizer:1.8.3
- resources:
- limits:
- cpu: 100m
- memory: 30Mi
- requests:
- cpu: 100m
- memory: 30Mi
- env:
- - name: MY_POD_NAME
- valueFrom:
- fieldRef:
- fieldPath: metadata.name
- - name: MY_POD_NAMESPACE
- valueFrom:
- fieldRef:
- fieldPath: metadata.namespace
- volumeMounts:
- - name: config-volume
- mountPath: /etc/config
- command:
- - /pod_nanny
- - --config-dir=/etc/config
- - --container=kube-state-metrics
- - --cpu=100m
- - --extra-cpu=1m
- - --memory=100Mi
- - --extra-memory=2Mi
- - --threshold=5
- - --deployment=kube-state-metrics
- volumes:
- - name: config-volume
- configMap:
- name: kube-state-metrics-config
- ---
- # Config map for resource configuration.
- apiVersion: v1
- kind: ConfigMap
- metadata:
- name: kube-state-metrics-config
- namespace: kube-system
- labels:
- k8s-app: kube-state-metrics
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- data:
- NannyConfiguration: |-
- apiVersion: nannyconfig/v1alpha1
- kind: NannyConfiguration
- [root@k8s-master prometheus-k8s]# kubectl apply -f kube-state-metrics-deployment.yaml
3.编写Service的yaml对metrics进行端口暴露
- [root@k8s-master prometheus-k8s]# cat kube-state-metrics-service.yaml
- apiVersion: v1
- kind: Service
- metadata:
- name: kube-state-metrics
- namespace: kube-system
- labels:
- kubernetes.io/cluster-service: "true"
- addonmanager.kubernetes.io/mode: Reconcile
- kubernetes.io/name: "kube-state-metrics"
- annotations:
- prometheus.io/scrape: 'true'
- spec:
- ports:
- - name: http-metrics
- port: 8080
- targetPort: http-metrics
- protocol: TCP
- - name: telemetry
- port: 8081
- targetPort: telemetry
- protocol: TCP
- selector:
- k8s-app: kube-state-metrics
- [root@k8s-master prometheus-k8s]# kubectl apply -f kube-state-metrics-service.yaml
-
- [root@k8s-master prometheus-k8s]# kubectl get pod,svc -n kube-system
- NAME READY STATUS RESTARTS AGE
- pod/alertmanager-5d75d5688f-fmlq6 2/2 Running 0 9dpod/coredns-5bd5f9dbd9-wv45t 1/1 Running 1 9dpod/grafana-0 1/1 Running 2 15dpod/kube-state-metrics-7c76bdbf68-kqqgd 2/2 Running 6 14dpod/kubernetes-dashboard-7d77666777-d5ng4 1/1 Running 5 16dpod/prometheus-0 2/2 Running 6 15dNAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
- service/alertmanager ClusterIP 10.0.0.207 <none> 80/TCP 13dservice/grafana NodePort 10.0.0.74 <none> 80:30091/TCP 15dservice/kube-dns ClusterIP 10.0.0.2 <none> 53/UDP,53/TCP 14dservice/kube-state-metrics ClusterIP 10.0.0.194 <none> 8080/TCP,8081/TCP 14dservice/kubernetes-dashboard NodePort 10.0.0.127 <none> 443:30001/TCP 17dservice/prometheus NodePort 10.0.0.33 <none> 9090:30090/TCP 14d
报错一:进行2.1步骤时报错:ensure CRDs are installed first
- [root@k8s-node01 k8s-prometheus]# kubectl apply -f prometheus-rbac.yaml
- serviceaccount/prometheus unchanged
- resource mapping not found for name: "prometheus" namespace: "" from "prometheus-rbac.yaml": no matches for kind "ClusterRole" in version "rbac.authorization.k8s.io/v1beta1"
- ensure CRDs are installed first
- resource mapping not found for name: "prometheus" namespace: "" from "prometheus-rbac.yaml": no matches for kind "ClusterRoleBinding" in version "rbac.authorization.k8s.io/v1beta1"
- ensure CRDs are installed first
使用附件的原yaml会报错,原因是因为api过期,需要手动修改 apiVersion: rbac.authorization.k8s.io/v1beta1为apiVersion: rbac.authorization.k8s.io/v1
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。