赞
踩
本来不打算写这块来的,当初觉得这部分相对来说比较简单,主要部署好skywalking-oap server 跟展示端 skywalking-ui就完事了,结果在实际部署过程中发现,通过SSL协议连接ES这块居然是个卡点,搞了半天,网上也没啥文章能照着做通的,唯一让我有实质性帮助的是chatgpt…为了给人类扳回一程,我不得不写下此篇!!
顺便吐槽一下Apache开源体系丰富是丰富,真是有点乱,一个项目一个标准。就ssl通讯这块,ES节点之间主要用的pk12证书,kinbana连ES 变成了pem证书(Java客户端用的应该也是这个),到sktwalking这了又改成用jks证书,就这么点事都不能统一,非得各搞各的,给人感觉像故意体现项目独特性。
--------------------- 原创不易,如果大家看完觉得有帮助,希望能多多点赞关注,感谢各位的支持 ----------------------
k8s部署skywalking的oap server并不是复杂,就一套yml
service我照例用的是固定ip
apiVersion: v1 kind: Service metadata: name: skywalking-oap-svc namespace: devops-tools annotations: desc : skywalking-oap-server服务访问入口 spec: selector: k8s-app: skywalking-oap type: ClusterIP #type: ClusterIP【默认】 | NodePort | LoadBalancer(外部负载均衡) | ExternalName (外部DNS解析) clusterIP: 10.106.160.105 ports: - port: 12800 targetPort: 12800 name: rest - port: 11800 targetPort: 11800 name: grpc
要点说明
- name: SW_STORAGE #声明使用ES做存储 value: "elasticsearch" - name: SW_NAMESPACE #skywalking 使用的ES索引前缀配置 value: "skywalking-log" - name: SW_STORAGE_ES_CLUSTER_NODES #集群连接地址 value: "es-01-svc.es:9200" - name: SW_STORAGE_ES_HTTP_PROTOCOL #集群连接协议,简单点直接用http也不是不行 value: "https" - name: SW_ES_USER #连接ES集群用户 value: "elastic" - name: SW_ES_PASSWORD #连接ES集群用户密码 value: "password" - name: SW_STORAGE_ES_SSL_JKS_PATH #ssl协议证书存储位置 value: "/local-certs/es01_http.jks" - name: SW_STORAGE_ES_SSL_JKS_PASS #ssl协议证书密码 value: "jks密码" - name: "SW_STORAGE_ES_SUPER_DATASET_INDEX_SHARDS_FACTOR" #skywalking 索引分片数 默认5 value: "1" - name: SW_CORE_RECORD_DATA_TTL #skywalking 明细数据存储时长,单位:天,默认3 value: "14" - name: SW_CORE_METRICS_DATA_TTL #skywalking 统计数据存储时长,单位:天,默认7 value: "30"
apiVersion: apps/v1 kind: StatefulSet # Deployment | StatefulSet | DaemonSet | JobSet metadata: name: skywalking-oap namespace: devops-tools spec: replicas: 1 #运行副本数 selector: matchLabels: k8s-app: skywalking-oap #与下方template节点中的 labels 保持一致 revisionHistoryLimit: 10 #设定保留最近的几个revision 用于回滚,默认10 updateStrategy: #更新策略 [Statefulset] #strategy: #更新策略 [Deployment] type: RollingUpdate # RollingUpdate (滚动更新) | OnDelete (删除时更新) rollingUpdate: partition: 0 #[Statefulset] 灰度发布控制器,每次只更新部署的pod序号 >= partition的pod,如果有5个pod[0-4],0=更新所有,4=更新1pod,3=更新2pod persistentVolumeClaimRetentionPolicy: # Retain | Delete whenDeleted: Delete whenScaled: Retain volumeClaimTemplates: #statefulset 专属动态创建pod的存储 - metadata: name: skywalking-oap-volume spec: storageClassName: "local-path" ## 基于PV的动态创建 accessModes: - ReadWriteOnce resources: requests: storage: 10Gi template: metadata: labels: k8s-app: skywalking-oap annotations: #"cni.projectcalico.org/ipAddrs": "[\"10.244.220.10\"]" #pod绑定固定ip,依赖于calico ipam插件,必须使用calico 3.24.1以上的版本才可以 spec: restartPolicy: Always terminationGracePeriodSeconds: 30 #容器被删除变为Terminating状态的等待时间,默认是30s,以便于做一些容器删除前的处理工作 initContainers: - name: init-skywalking-oap image: apache/skywalking-oap-server:latest #securityContext: # runAsUser: 0 #由于需要更改挂在目录的归属用户,需要以root运行 command: - "bash" - "-c" # 可执行多行命令 - > echo $DOWN_LOAD_URLS; IFS=',' read -r -a my_array <<< "$DOWN_LOAD_URLS"; for URL in "${my_array[@]}"; do echo 'URL:'$URL; FILE=$(echo $URL | awk -F '/' '{print $NF}'); SAVE_PATH='/local-certs'; if [ ! -f $SAVE_PATH"/"$FILE ] || [ "$DOWN_LOAD" = "true" ];then res_status=$(curl -u $DOWNLOAD_ACCESS_USER:$DOWNLOAD_ACCESS_PASS -I -m 10 -o /dev/null -s -w %{http_code} $URL); echo 'res_status: ' $res_status; if [ "$res_status" != "200" ];then echo "===================== "$FILE"不可下载 ======================="; else echo "===================== "$FILE"可下载,执行下载 ======================="; cd $SAVE_PATH; curl -u $DOWNLOAD_ACCESS_USER:$DOWNLOAD_ACCESS_PASS $URL -O; ls $SAVE_PATH; fi else echo "===================== "$FILE"已存在 或 ENV:DOWN_LOAD=false ======================="; fi done env: #环境变量配置 - name: DOWN_LOAD value: "true" - name: "DOWN_LOAD_URLS" value: "http://xxxxx/xxx/es01_http.jks" #jks证书下载url - name: DOWNLOAD_ACCESS_USER value: "http auth 用户名" - name: DOWNLOAD_ACCESS_PASS value: "http auth 密码" - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name volumeMounts: - name: skywalking-oap-volume #挂载存储目录 mountPath: /local-certs subPath: local-certs containers: - name: skywalking-oap image: apache/skywalking-oap-server:latest imagePullPolicy: IfNotPresent # IfNotPresent | Always | Never resources: requests: memory: "2000Mi" #Gi=G Mi=M 只支持整数 cpu: "500m" #1000m=1cpu (cpu物理线程) limits: memory: "2200Mi" #Gi=G Mi=M 只支持整数 cpu: "1000m" #1000m=1cpu (cpu物理线程) ports: - containerPort: 11800 protocol: TCP - containerPort: 12800 protocol: TCP - | docker-entrypoint.sh; sleep 3600; env: #环境变量配置 - name: SW_STORAGE value: "elasticsearch" - name: SW_NAMESPACE value: "skywalking-log" - name: SW_STORAGE_ES_CLUSTER_NODES value: "es-01-svc.es:9200" - name: SW_STORAGE_ES_HTTP_PROTOCOL value: "https" - name: SW_ES_USER value: "elastic" - name: SW_ES_PASSWORD value: "1q2w3e4r" - name: SW_STORAGE_ES_SSL_JKS_PATH value: "/local-certs/es01_http.jks" - name: SW_STORAGE_ES_SSL_JKS_PASS value: "1q2w3e" #es index 分片数 默认5 - name: "SW_STORAGE_ES_SUPER_DATASET_INDEX_SHARDS_FACTOR" value: "1" - name: SW_CORE_RECORD_DATA_TTL value: "14" - name: SW_CORE_METRICS_DATA_TTL value: "30" - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP volumeMounts: - name: skywalking-oap-volume #挂载部署目录 mountPath: /skywalking/data subPath: data - name: skywalking-oap-volume #挂载部署目录 mountPath: /skywalking/logs subPath: logs - name: skywalking-oap-volume #挂载存储目录 mountPath: /local-certs subPath: local-certs - name: skywalking-oap-config #挂载配置文件 mountPath: /config - name: host-time #挂载本地时区 mountPath: /etc/localtime readOnly: true volumes: - name: host-time hostPath: #挂载本地时区 path: /etc/localtime type: ""
登录到要连接的ES集群的任意节点上,使用ES集群的http.p12证书进行生成
注意
/usr/share/elasticsearch/jdk/bin/keytool -importkeystore -destkeystore ./es_http.jks -deststoretype JKS -srckeystore /usr/share/elasticsearch/config/local-certs/http.p12 -srcstoretype PKCS12 -storepass "jks证书密码" -srcstorepass "$http12证书密码"
这样skywalking-oap server就部署好了。
接下来是skywalking-ui,这个更简单,只要设定好要连接的skywalking-oap server的地址就行了,yml部署文件如下
# api service apiVersion: v1 kind: Service metadata: name: skywalking-ui-svc namespace: devops-tools annotations: desc : skywalking-ui服务访问入口 spec: selector: k8s-app: skywalking-ui type: ClusterIP #type: ClusterIP【默认】 | NodePort | LoadBalancer(外部负载均衡) | ExternalName (外部DNS解析) clusterIP: 10.106.160.106 ports: - port: 80 targetPort: 8080 name: http --- apiVersion: apps/v1 kind: StatefulSet # Deployment | StatefulSet | DaemonSet | JobSet metadata: name: skywalking-ui namespace: devops-tools spec: replicas: 1 #运行副本数 selector: matchLabels: k8s-app: skywalking-ui #与下方template节点中的 labels 保持一致 revisionHistoryLimit: 10 #设定保留最近的几个revision 用于回滚,默认10 #serviceName: "nginx-headless" #设置绑定的service,以支持内部dns访问 <pod-name>.<svc-name>.<namespace>.svc.cluster.local updateStrategy: #更新策略 [Statefulset] #strategy: #更新策略 [Deployment] type: RollingUpdate # RollingUpdate (滚动更新) | OnDelete (删除时更新) rollingUpdate: #maxSurge: 1 #[Deployment]支持-升级过程中可以启动超过原先设置的POD数量的上限:数量 或 百分比 1 | 20% #maxUnavailable: 1 #[Deployment]支持-升级过程中无法提供服务的POD数量的上限:数量 或 百分比 1 | 20%,最好与maxSurge保持一致,这样能确保更新过程中的服务能力不会下降 partition: 0 #[Statefulset] 灰度发布控制器,每次只更新部署的pod序号 >= partition的pod,如果有5个pod[0-4],0=更新所有,4=更新1pod,3=更新2pod persistentVolumeClaimRetentionPolicy: # Retain | Delete whenDeleted: Delete whenScaled: Retain volumeClaimTemplates: #statefulset 专属动态创建pod的存储 - metadata: name: skywalking-ui-volume spec: storageClassName: "local-path" ## 基于PV的动态创建 accessModes: - ReadWriteOnce resources: requests: storage: 10Gi template: metadata: labels: k8s-app: skywalking-ui annotations: #"cni.projectcalico.org/ipAddrs": "[\"10.244.220.10\"]" #pod绑定固定ip,依赖于calico ipam插件,必须使用calico 3.24.1以上的版本才可以 spec: restartPolicy: Always terminationGracePeriodSeconds: 30 #容器被删除变为Terminating状态的等待时间,默认是30s,以便于做一些容器删除前的处理工作 containers: - name: skywalking-ui image: apache/skywalking-ui imagePullPolicy: IfNotPresent # IfNotPresent | Always | Never #securityContext: ##开启特权,因为要调整系统内核 # privileged: true resources: requests: memory: "256Mi" #Gi=G Mi=M 只支持整数 cpu: "250m" #1000m=1cpu (cpu物理线程) limits: memory: "512Mi" #Gi=G Mi=M 只支持整数 cpu: "500m" #1000m=1cpu (cpu物理线程) #securityContext: ###添加参数启用容器root权限 # privileged: true ports: - containerPort: 8080 protocol: TCP env: - name: SW_OAP_ADDRESS #设定好slkywalking-oap server的地址 value: http://skywalking-oap-svc:12800 - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP volumeMounts: - name: skywalking-ui-volume #挂载部署目录 mountPath: /skywalking-ui/data subPath: data - name: skywalking-ui-volume #挂载部署目录 mountPath: /skywalking-ui/logs subPath: logs - name: host-time #挂载本地时区 mountPath: /etc/localtime readOnly: true volumes: - name: host-time hostPath: #挂载本地时区 path: /etc/localtime type: ""
这里一般有2种方式
1.通过init-containers将agent文件部署到容器内,同时在应用启动脚本中添加java-agent JVM参数来实现;
2.将agent文件放置到应用项目中,同时在应用启动脚本中添加java-agent JVM参数来实现
我推荐使用第1种方式,这种方式对项目代码无侵入,具体过程如下:
-javaagent:/data/deploy/skywalking-agent/skywalking-agent.jar=agent.service_name=${APP_GROUP}::${APP_NAME},collector.backend_service=skywalking-oap-server:11800,logging.dir=/data/logs,logging.level=warn,logging.max_history_files=10
这里我设定了几个参数
agent.service_name=${APP_GROUP}::${APP_NAME} 【必须设定】当前应用的service名称,可以使用${APP_GROUP}::${APP_NAME}来给服务分组,相同的${APP_GROUP}在skywalking-ui中展示的时候会分到同一组中
collector.backend_service=skywalking-oap-server:11800 【必须设定】 这个没啥好说的
logging.dir=/data/logs,logging.level=warn,logging.max_history_files=10 【可选】设定日志存储位置,级别,最多几个
完整参数支持可以参考官方文档 agent参数配置
一定要注意格式:
-javaagent:/data/deploy/skywalking-agent/skywalking-agent.jar=key:value,key:value
最后再给大家贴一下我的Java 应用容器部署模板片段
initContainers: - name: init-deploy image: registry:80/web/openjdk17lts:1.1 #我的私服 imagePullPolicy: IfNotPresent # IfNotPresent | Always | Never command: - "bash" - "-c" # # 初始化用于部署的文件资源 - > cd $DOWN_LOAD_PATH; echo "第 67 次构建"; echo "APP_PACK_URL: " http://jenkins/job/xxjob/ws/deploy/app-version.jar; res_status=$(curl -u $DOWNLOAD_ACCESS_USER:$DOWNLOAD_ACCESS_PASS -I -m 10 -o /dev/null -s -w %{http_code} http://jenkins/job/xxjob/ws/deploy/app-version.jar); echo 'res_status: ' $res_status; if [ "$res_status" != "200" ]];then echo "===================== 部署包不存在,不需要执行下载 ======================="; else echo "===================== 部署包存在,执行下载 ======================="; curl -u $DOWNLOAD_ACCESS_USER:$DOWNLOAD_ACCESS_PASS http://jenkins/job/xxjob/ws/deploy/app-version.jar -O /deploy; ls; fi; echo "需要下载的文件urls: "$DOWN_LOAD_URLS; IFS=',' read -r -a my_array <<< "$DOWN_LOAD_URLS"; for URL in "${my_array[@]}"; do echo 'URL:'$URL; FILE=$(echo $URL | awk -F '/' '{print $NF}'); SAVE_PATH=$DOWN_LOAD_PATH; if [ ! $FILE = "" ];then if [ ! -f $SAVE_PATH"/"$FILE ] || [ "$DOWN_LOAD" = "1" ];then res_status=$(curl -u $DOWNLOAD_ACCESS_USER:$DOWNLOAD_ACCESS_PASS -I -m 10 -o /dev/null -s -w %{http_code} $URL); echo 'res_status: ' $res_status; if [ "$res_status" != "200" ];then echo "===================== "$FILE"不可下载 ======================="; else echo "===================== "$FILE"可下载,执行下载 ======================="; cd $SAVE_PATH; curl -u $DOWNLOAD_ACCESS_USER:$DOWNLOAD_ACCESS_PASS $URL -O; echo "===================== 获取文件后缀 ======================="; name=${FILE%.*}; ext_name=${FILE##*.}; if [ "$DOWN_LOAD_UNZIP" = "1" ];then if [ "$ext_name" = "zip" ];then echo "===================== unzip -o"$FILE" ======================="; unzip -o $FILE; elif [ "$ext_name" = "tar" ];then echo "===================== tar -xvf "$FILE" ======================="; tar -xvf $FILE; elif [ "$ext_name" = "tar.gz" ];then echo "===================== tar -zxvf "$FILE" ======================="; tar -zxvf $FILE; fi fi ls $SAVE_PATH; fi else echo "===================== "$FILE"已存在 或 ENV:DOWN_LOAD="$DOWN_LOAD" ======================="; fi fi done; env: #环境变量配置 - name: DOWN_LOAD_PATH value: "/data/deploy" #下载文件存放目录位置 - name: DOWN_LOAD value: "1" #是否强制执行下载(即使文件存在) - name: DOWN_LOAD_URLS value: ",http://jenkins/job/deploy-need-files/ws/deploy/skywalking-agent.zip" #下载文件的url,多个以','分隔 - name: DOWNLOAD_ACCESS_USER value: "user" #用于访问下载url的basic auth 用户 - name: DOWNLOAD_ACCESS_PASS value: "password" #用于访问下载url的basic auth 密码 - name: DOWN_LOAD_UNZIP value: "1" # 针对zip\tar\tar.gz下载后是否自动解压缩 0 不处理 1自动解压缩 volumeMounts: - name: webdemo-volume #挂载部署目录 mountPath: /data/deploy subPath: data/deploy containers: - name: app image: registry:80/web/openjdk17lts:1.1 #使用私服 imagePullPolicy: IfNotPresent # IfNotPresent | Always | Never resources: limits: memory: "2200Mi" cpu: "1000m" requests: memory: "2Gi" cpu: "200m" ports: - containerPort: 8080 protocol: TCP command: ["/bin/sh","-c"] #添加registry垃圾回收定时任务,并启动系统定时调度服务 args: #可以设置多行命令,不过启动后初始化还是推荐使用postStart钩子函数来执行,不能有#注释符 - | java -server -Xms1g -Xmx1g -Xss256k -Xlog:gc:/data/logs/gc.log -XX:+UseZGC -XX:MetaspaceSize=256m -XX:MaxMetaspaceSize=512m --add-opens java.base/java.lang=ALL-UNNAMED -Dfile.encoding=UTF-8 -Denv=pro -javaagent:/data/deploy/skywalking-agent/skywalking-agent.jar=agent.service_name=app_group::=app,collector.backend_service=skywalking-oap-server:11800,logging.dir=/data/logs,logging.level=warn,logging.max_history_files=10 -jar /data/deploy/app-$versioin.jar volumeMounts: - name: app-volume #挂载部署目录 mountPath: /data/deploy subPath: deploy - name: app-volume #挂载数据目录 mountPath: /data/logs subPath: logs - name: host-time #挂载本地时区 mountPath: /etc/localtime readOnly: true volumes: - name: app-volume #使用pvc persistentVolumeClaim: claimName: app-pvc - name: host-time hostPath: #挂载本地时区 path: /etc/localtime type: ""
本期文章到此结束,希望对大家有所帮助
--------------------- 原创不易,如果大家看完觉得有帮助,希望能多多点赞关注,感谢各位的支持 ----------------------
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。