CENTO OS上的网络安全工具(二十一)Hadoop HA swarm容器化集群部署

loaded plugins: fastestmirror, ovl loading mirror speeds from cached hostfil



        1. 准备更换镜像源的Centos7




  1. FROM centos:centos7
  2. RUN sed -e 's|^mirrorlist=|#mirrorlist=|g' \
  3. -e 's|^#baseurl=http://mirror.centos.org/centos|baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos|g' \
  4. -i.bak \
  5. /etc/yum.repos.d/CentOS-*.repo \
  6. && yum makecache
  7. CMD ["/bin/bash"]

       (2) 构建国内镜像源的centos7


  1. [root@pig1 docker]# docker build -t pig/centos7 .
  2. [+] Building 223.0s (6/6) FINISHED
  3. => [internal] load build definition from Dockerfile 0.1s
  4. => => transferring dockerfile: 314B 0.0s
  5. => [internal] load .dockerignore 0.1s
  6. => => transferring context: 2B 0.0s
  7. => [internal] load metadata for docker.io/library/centos:centos7 0.0s
  8. => [1/2] FROM docker.io/library/centos:centos7 0.0s
  9. => [2/2] RUN sed -e 's|^mirrorlist=|#mirrorlist=|g' -e 's|^#baseurl=http://mirror.centos.org/centos|baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos|g' 219.6s
  10. => exporting to image 3.2s
  11. => => exporting layers 3.2s
  12. => => writing image sha256:dd9333ee62cd83a0b0db29ac247f9282ab00bd59354074aec28e0d934ffb1677 0.0s
  13. => => naming to docker.io/pig/centos7 0.0s
  14. [root@pig1 docker]# docker images
  16. pig/centos7 latest dd9333ee62cd 12 seconds ago 632MB
  17. [root@pig1 docker]#





  •         openssh
  •         openssh-server
  •         openssh-clients


  •         在 /etc/hosts 尾部添加所有参与ssh免密的主机名和主机ip映射
  •         在~/.ssh下设置客户端公私钥文件  ~/.ssh/id_rsa,~/.ssh/id_rsa.pub
  •         在~/.ssh下设置已认证客户端密钥文件  ~/.ssh/authorizedkey


  •         运行/sbin/sshd-keygen,在/etc/ssh下生成服务端公私钥
  •         运行/sbin/sshd -D & 启动ssh服务


  •         hostlist文件,记录所有参与ssh免密的主机名和主机ip映射。由于/etc/host不能在docker奖项构建时更改(此时文件系统只读),所以只能在容器启动后通过脚本方式更改;另外,由于系统启动后,会在/etc/hosts后面增加本机的hostname和ip,所以会和hostlist中的一行重复,但/etc/host中的这一行会被系统锁定,不能删除,只能考虑将hostlist中的重复行删除再向/etc/host追加的方式实现。
  •         init-ssh.sh文件。除上述更改/etc/host的操作外,启动ssh服务的操作也只能在容器启动后进行,所以需要一个启动脚本程序来完成。



  1. [root@pig1 docker]# python3
  2. Python 3.6.8 (default, Oct 26 2022, 09:13:21)
  3. [GCC 8.5.0 20210514 (Red Hat 8.5.0-17)] on linux
  4. Type "help", "copyright", "credits" or "license" for more information.
  5. >>>
  6. >>> infile = open("hostlist","w")
  7. >>> for i in range(1,16):
  8. ... infile.write("172.17.0.{:d} pignode{:d}\n".format(i+1,i))
  9. ...
  10. >>> infile.close()


  1. [root@pig1 docker]# cat hostlist
  2. pignode1
  3. pignode2
  4. pignode3
  5. pignode4
  6. pignode5
  7. pignode6
  8. pignode7
  9. pignode8
  10. pignode9
  11. pignode10
  12. pignode11
  13. pignode12
  14. pignode13
  15. pignode14
  16. pignode15


        如前所述,在docker build阶段,诸如更改hosts、启动ssh服务等操作是没有办法执行的,所以需要我们在容器启动时,通过dockfile中CMD、ENTRYPOINT等指示默认加载的启动脚本来进行。

  1. #!/bin/bash
  2. #1.向/etc/host文件尾部添加IP到主机名映射
  3. #1.1 从/etc/host文件尾部提取已有主机名和IP映射
  4. ipaddrs=`cat /etc/hosts |grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\s\w+$'|sed 's/\s[[:alnum:]]\+$//g'`
  5. #1.2 从hostlist文件中提取需要向/etc/hosts文件追加的IP到主机名映射表
  6. hostlists=`cat /root/hostlist`
  7. #1.3 从hostlist表中排除/etc/hosts中已经存在的IP到主机名映射关系
  8. for line in $ipaddrs
  9. do
  10. hostlists=`echo "${hostlists}"|sed '/'"${line}"'/d'`
  11. done
  12. #1.4 将剩余不重复的主机名追加到hosts文件尾部
  13. if [ -n "$hostlists" ]
  14. then
  15. echo"${hostlists}"添加到/etc/hosts中
  16. echo $hostlists >> /etc/hosts
  17. fi
  18. #2. 启动SSH服务,&表示在后台启动
  19. /sbin/sshd -D &
  20. #3. 因为sshd在后台运行,此处前台程序执行完毕,docker会自行exit
  21. # 所以在此处需要重新调用/bin/bash,让程序保持在前台
  22. /bin/bash





  1. [root@pig1 docker]# docker run -it --name pig1 --hostname pignode1 --ip pig/centos7 bash
  2. [root@pignode1 /]# hostname
  3. pignode1
  4. [root@pignode1 /]# cat /etc/hosts
  5. localhost
  6. ::1 localhost ip6-localhost ip6-loopback
  7. fe00::0 ip6-localnet
  8. ff00::0 ip6-mcastprefix
  9. ff02::1 ip6-allnodes
  10. ff02::2 ip6-allrouters
  11. pignode1
  12. [root@pignode1 /]# yum install openssh openssh-server openssh-clients -y
  13. Loaded plugins: fastestmirror, ovl
  14. Loading mirror speeds from cached hostfile
  15. Resolving Dependencies
  16. --> Running transaction check
  17. …………………………
  18. Installed:
  19. openssh.x86_64 0:7.4p1-22.el7_9 openssh-clients.x86_64 0:7.4p1-22.el7_9 openssh-server.x86_64 0:7.4p1-22.el7_9
  20. Dependency Installed:
  21. fipscheck.x86_64 0:1.4.1-6.el7 fipscheck-lib.x86_64 0:1.4.1-6.el7 libedit.x86_64 0:3.0-12.20121213cvs.el7
  22. tcp_wrappers-libs.x86_64 0:7.6-77.el7
  23. Complete!
  24. [root@pignode1 /]# cd /sbin
  25. [root@pignode1 sbin]# sshd-keygen
  26. [root@pignode1 ssh]# /sbin/sshd -D &
  27. [1] 78
  28. [root@pignode1 ssh]# ps -au
  30. root 1 0.1 0.0 11844 3080 pts/0 Ss 12:18 0:00 bash
  31. root 78 0.0 0.1 112952 7928 pts/0 S 12:22 0:00 /sbin/sshd -D
  32. root 79 0.0 0.0 51748 3460 pts/0 R+ 12:22 0:00 ps -au
  33. [root@pignode1 ssh]# ssh-keygen
  34. Generating public/private rsa key pair.
  35. Enter file in which to save the key (/root/.ssh/id_rsa):
  36. Created directory '/root/.ssh'.
  37. Enter passphrase (empty for no passphrase):
  38. Enter same passphrase again:
  39. Your identification has been saved in /root/.ssh/id_rsa.
  40. Your public key has been saved in /root/.ssh/id_rsa.pub.
  41. …………………………………………
  42. +----[SHA256]-----+
  43. [root@pignode1 ssh]# passwd
  44. New password:
  45. Retype new password:
  46. passwd: all authentication tokens updated successfully.
  47. [root@pignode1 ssh]# ssh-copy-id pignode1
  48. /usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "/root/.ssh/id_rsa.pub"
  49. /usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
  50. /usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
  51. root@pignode1's password:
  52. Number of key(s) added: 1
  53. Now try logging into the machine, with: "ssh 'pignode1'"
  54. and check to make sure that only the key(s) you wanted were added.
  55. [root@pignode1 ssh]# cd ~/.ssh
  56. [root@pignode1 .ssh]# ls
  57. authorized_keys id_rsa id_rsa.pub known_hosts


        如同我们在 CENTO OS上的网络安全工具(二十)ClickHouse swarm容器化集群部署中提到过的扩充公钥的方法,对15各pignode节点都赋予访问密钥,并将.ssh文件夹中除了known_hosts文件的文件都拷贝出来备用。


  1. [root@pig1 docker]# ls -a
  2. . .. Dockerfile hostlist init-ssh.sh .ssh




  •         将hostlist、init-ssh.sh拷贝到/root目录下——当然随便拷贝到哪都行
  •         更改init-ssh.sh的权限,添加执行权限
  •         将.ssh目录整个拷贝到/root目录下
  •         更改/root/.ssh/id_rsa私钥文件权限为0400,即只有root用户可读(如果使用其他用户对应修改),否则ssh会拒绝执行
  •         设置root用户的密钥,否则初次ssh连接的时候过不去
  •         安装openssh及服务端、客户端;
  •         生成sshd密钥
  •         在ssh客户端配置文件/etc/ssh/ssh_config中添加关闭指纹校验的选项,以防免密连接的时候跳出确认是否生成指纹的问题,毕竟我们不能指望hadoop有这个功夫在终端中敲个“yes”


  1. FROM pig/centos7
  2. COPY init-ssh.sh /root/init-ssh.sh
  3. COPY hostlist /root/hostlist
  4. COPY .ssh /root/.ssh
  5. RUN chmod +x /root/init-ssh.sh \
  6. && chmod 0400 /root/.ssh/id_rsa \
  7. && echo 'default123' | passwd --stdin root \
  8. && yum install openssh openssh-server openssh-clients -y \
  9. && /sbin/sshd-keygen \
  10. && echo -e '\nHost *\nStrictHostKeyChecking no\nUserKnownHostsFile=/dev/null' >> etc/ssh/ssh_config
  11. ENTRYPOINT ["/root/init-ssh.sh"]


  1. [root@pig1 docker]# docker build -t pig/ssh .
  2. [+] Building 20.5s (10/10) FINISHED
  3. => [internal] load build definition from Dockerfile 0.0s
  4. => => transferring dockerfile: 378B 0.0s
  5. => [internal] load .dockerignore 0.0s
  6. => => transferring context: 2B 0.0s
  7. => [internal] load metadata for docker.io/pig/centos7:latest 0.0s
  8. => [internal] load build context 0.0s
  9. => => transferring context: 382B 0.0s
  10. => [1/5] FROM docker.io/pig/centos7 0.0s
  11. => CACHED [2/5] COPY init-ssh.sh /root/init-ssh.sh 0.0s
  12. => CACHED [3/5] COPY hostlist /root/hostlist 0.0s
  13. => CACHED [4/5] COPY .ssh /root/.ssh 0.0s
  14. => [5/5] RUN chmod +x /root/init-ssh.sh && chmod 0400 /root/.ssh && echo 'default123' | passwd 18.3s
  15. => exporting to image 2.0s
  16. => => exporting layers 2.0s
  17. => => writing image sha256:3e4be2ca4730b61cc8aa7f4349ebf5b0afa582aeb1f3e3a3577ce15ffcd4eee5 0.0s
  18. => => naming to docker.io/pig/ssh 0.0s
  19. [root@pig1 docker]#




  1. [root@pig1 centos]# docker run -it --name pig1 --hostname pignode1 --ip pig/ssh bash
  2. 将172.17.0.3 pignode2
  3. pignode3
  4. pignode4
  5. pignode5
  6. pignode6
  7. pignode7
  8. pignode8
  9. pignode9
  10. pignode10
  11. pignode11
  12. pignode12
  13. pignode13
  14. pignode14
  15. pignode15添加到/etc/hosts中
  16. [root@pignode1 /]# ssh pignode2
  17. Warning: Permanently added 'pignode2,' (ECDSA) to the list of known hosts.
  18. [root@pignode2 ~]# exit
  19. logout


  1. [root@pig1 docker]# docker run -it --name pig2 --hostname pignode2 --ip pig/ssh bash
  2. 将172.17.0.2 pignode1
  3. pignode3
  4. pignode4
  5. pignode5
  6. pignode6
  7. pignode7
  8. pignode8
  9. pignode9
  10. pignode10
  11. pignode11
  12. pignode12
  13. pignode13
  14. pignode14
  15. pignode15添加到/etc/hosts中
  16. [root@pignode2 /]# ssh pignode1
  17. Warning: Permanently added 'pignode1,' (ECDSA) to the list of known hosts.
  18. Last login: Fri Apr 14 04:59:39 2023 from pignode1
  19. [root@pignode1 ~]# exit
  20. logout







  1. version: "3"
  2. services:
  3. pigssh1:
  4. image: centos:centos7
  5. networks:
  6. - pig
  7. hostname: pignode1
  8. pigssh2:
  9. image: centos:centos7
  10. networks:
  11. networks:
  12. - pig
  13. hostname: pignode2
  14. pigssh3:
  15. image: centos:centos7
  16. networks:
  17. networks:
  18. - pig
  19. hostname: pignode3
  20. networks:
  21. pig:


  1. [root@pig1 docker]# docker node ls
  3. 3rrx62qy2gtwcixg46xpsffas * pig1 Ready Active Leader 23.0.1
  4. v3p0j04u0wbxfkhtkzlj0zq0d pig2 Ready Active 23.0.1
  5. u8phg5zq1rlay99acmyca1vlo pig3 Ready Active 23.0.1
  6. [root@pig1 docker]#
  7. [root@pig1 docker]# docker stack deploy -c docker-compose.yml ttt
  8. Updating service ttt_pigssh3 (id: msks8cep346rmpzujo99j91xk)
  9. Updating service ttt_pigssh1 (id: ousc72qs2ygyzcbno2i300zh2)
  10. Updating service ttt_pigssh2 (id: mi6nd9l1bn5st0d97zfmxj62b)
  11. [root@pig1 docker]# docker stack ps ttt
  13. 6srjdbhgglae ttt_pigssh1.1 centos:centos7 pig1 Ready Ready 4 seconds ago
  14. b95dfzw79nwa \_ ttt_pigssh1.1 centos:centos7 pig1 Shutdown Complete 4 seconds ago
  15. rv5e1vko0asc \_ ttt_pigssh1.1 centos:centos7 pig1 Shutdown Complete 10 seconds ago
  16. jd7650kov15k ttt_pigssh2.1 centos:centos7 pig1 Ready Ready less than a second ago
  17. yn1t2lli0j28 \_ ttt_pigssh2.1 centos:centos7 pig1 Shutdown Complete less than a second ago
  18. u4bwnzi4pvgi ttt_pigssh3.1 centos:centos7 pig2 Ready Ready 2 seconds ago
  19. 5vwa1d98o2bo \_ ttt_pigssh3.1 centos:centos7 pig2 Shutdown Complete 3 seconds ago
  20. 1vxrkembyuh4 \_ ttt_pigssh3.1 centos:centos7 pig2 Shutdown Complete 10 seconds ago
  21. z815wmav05m1 \_ ttt_pigssh3.1 centos:centos7 pig2 Shutdown Complete 17 seconds ago


  1. FROM centos:centos7
  2. ENTRYPOINT ["tail","-f","/dev/null"]

         也就是更改官方镜像最后从/bin/bash入口的方式,使用CMD或ENTRYPOINT,以tail -f /dev/null命令作为前台,该命令会一直将前台进程阻塞,从而避免被swarm错误退出。


  1. version: "3"
  2. services:
  3. pigssh1:
  4. image: pig/test
  5. networks:
  6. - pig
  7. hostname: pignode1
  8. pigssh2:
  9. image: pig/test
  10. networks:
  11. networks:
  12. networks:
  13. - pig
  14. hostname: pignode2
  15. pigssh3:
  16. image: pig/test
  17. networks:
  18. networks:
  19. networks:
  20. - pig
  21. hostname: pignode3
  22. networks:
  23. pig:


  1. [root@pig1 docker]# docker stack deploy -c docker-compose.yml ttt
  2. Creating network ttt_pig
  3. Creating service ttt_pigssh2
  4. Creating service ttt_pigssh3
  5. Creating service ttt_pigssh1
  6. [root@pig1 docker]# docker stack ps ttt
  8. 7vqun3os7por ttt_pigssh1.1 pig/test:latest pig1 Running Running 2 seconds ago
  9. hjnb05mcabhm ttt_pigssh2.1 pig/test:latest pig3 Running Running 15 seconds ago
  10. y0wyocsblwrf ttt_pigssh3.1 pig/test:latest pig1 Running Running 8 seconds ago
  11. [root@pig1 docker]# docker ps -a
  13. 6d90c934bb09 pig/test:latest "tail -f /dev/null" 16 seconds ago Up 15 seconds ttt_pigssh1.1.7vqun3os7poryxdbzbr844gxe
  14. 3d47036fd047 pig/test:latest "tail -f /dev/null" 22 seconds ago Up 21 seconds ttt_pigssh3.1.y0wyocsblwrfw49v57l6huv82
  15. [root@pig1 docker]# docker exec -it 6d90c934bb09 bash
  16. [root@pignode1 /]#





  1. [root@pignode1 /]# ping pignode2
  2. PING pignode2 ( 56(84) bytes of data.
  3. 64 bytes from ttt_pigssh2.1.hjnb05mcabhm4vk2loeg89o3v.ttt_pig ( icmp_seq=1 ttl=64 time=1.86 ms
  4. 64 bytes from ttt_pigssh2.1.hjnb05mcabhm4vk2loeg89o3v.ttt_pig ( icmp_seq=2 ttl=64 time=1.04 ms
  5. 64 bytes from ttt_pigssh2.1.hjnb05mcabhm4vk2loeg89o3v.ttt_pig ( icmp_seq=3 ttl=64 time=1.40 ms
  6. ^C
  7. --- pignode2 ping statistics ---
  8. 3 packets transmitted, 3 received, 0% packet loss, time 2003ms
  9. rtt min/avg/max/mdev = 1.047/1.438/1.865/0.334 ms
  10. [root@pignode1 /]# cat /etc/hosts
  11. localhost
  12. ::1 localhost ip6-localhost ip6-loopback
  13. fe00::0 ip6-localnet
  14. ff00::0 ip6-mcastprefix
  15. ff02::1 ip6-allnodes
  16. ff02::2 ip6-allrouters
  17. pignode1

        在我们已经启动的这些pignodes中,我们登入其中一个——比如pignode1,从pignode1里面直接ping pignode2,是能ping通的,也就是说,pignode1能够正确解析pignode2的名字;然后cat一下hosts文件,发现两件事——一是pignode2的IP映射关系并不是通过该文件记录的;二是pignode1的IP和一般容器部署时的IP不一样,是10.0.1.*。




        所以,如下更改init.sh,只负责启动sshd服务,并且使用tail -f /dev/null挂住前台就好:        

  1. #!/bin/bash
  2. #1. 启动SSH服务,&表示在后台启动
  3. /sbin/sshd -D &
  4. #2. 因为sshd在后台运行,此处前台程序执行完毕,docker会自行exit
  5. # 另swarm集群下,似乎会将bash认为是后台程序,从而自动退出
  6. # 故而此处使用tail -f /dev/null阻塞程序,让程序保持在前台
  7. tail -f /dev/null

        再创建镜像时,也  不需要再拷贝hostlist:      

  1. FROM pig/centos7
  2. COPY init-ssh.sh /root/init-ssh.sh
  3. COPY .ssh /root/.ssh
  4. RUN chmod +x /root/init-ssh.sh \
  5. && chmod 0400 /root/.ssh/id_rsa \
  6. && echo 'default123' | passwd --stdin root \
  7. && yum install openssh openssh-server openssh-clients -y \
  8. && /sbin/sshd-keygen \
  9. && echo -e '\nHost *\nStrictHostKeyChecking no\nUserKnownHostsFile=/dev/null' >> etc/ssh/ssh_config
  10. ENTRYPOINT ["/root/init-ssh.sh"]


  1. version: "3"
  2. services:
  3. pigssh1:
  4. image: pig/sshs
  5. networks:
  6. - pig
  7. hostname: pignode1
  8. pigssh2:
  9. image: pig/sshs
  10. networks:
  11. - pig
  12. hostname: pignode2
  13. pigssh3:
  14. image: pig/sshs
  15. networks:
  16. - pig
  17. hostname: pignode3
  18. networks:
  19. pig:


  1. [root@pig1 docker]# docker stack deploy -c docker-compose.yml ttt
  2. Creating network ttt_pig
  3. Creating service ttt_pigssh1
  4. Creating service ttt_pigssh2
  5. Creating service ttt_pigssh3
  6. [root@pig1 docker]# docker stack ps ttt
  8. wqtkk5uwb1oa ttt_pigssh1.1 pig/sshs:latest pig3 Running Running 7 seconds ago
  9. qbczq4fx8ulb ttt_pigssh2.1 pig/sshs:latest pig2 Running Running 3 seconds ago
  10. vfeakouuzsbu ttt_pigssh3.1 pig/sshs:latest pig1 Running Running less than a second ago
  11. [root@pig1 docker]# docker ps -a
  13. 002fe4668083 pig/sshs:latest "/root/init-ssh.sh" 8 seconds ago Up 7 seconds ttt_pigssh3.1.vfeakouuzsbuidqubs3yoruz1
  14. [root@pig1 docker]# docker exec -it 002fe4668083 bash
  15. [root@pignode3 /]# ssh pignode1
  16. Warning: Permanently added 'pignode1,' (ECDSA) to the list of known hosts.
  17. [root@pignode1 ~]#



        1. 构建Hadoop节点镜像




  1. # 1. 还是从官方的centos7镜像为起点
  2. FROM centos:centos7
  3. # 2. 口令参数需要从外部传入,即 docker build --build-arg password='default123' -t pig/hadoop .
  4. ARG password
  5. # 3. 构造更改了清华镜像源的centos7镜像,其实如果采取离线安装方式也不需要
  6. RUN sed -e 's|^mirrorlist=|#mirrorlist=|g' \
  7. -e 's|^#baseurl=http://mirror.centos.org/centos|baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos|g' \
  8. -i.bak \
  9. /etc/yum.repos.d/CentOS-*.repo\
  10. && yum clean all\
  11. && yum makecache
  12. # 4. 拷贝启动容器时的初始化脚本,用于执行启动sshd服务、初始化hadoop的系列操作
  13. COPY init-hadoop.sh /root/init-hadoop.sh
  14. # 5. 拷贝SSH免密登录的相关密钥文件,目前只放置了15个
  15. COPY .ssh /root/.ssh
  16. # 6. 拷贝所有待安装软件(主要是用于离线安装openssh和javasdk的rpm包)
  17. COPY ./rpm /root/rpm/.
  18. # 7. 解压Hadoop到/root目录下,一般会解压形成一个名为欸hadoop-3.3.5的文件夹
  19. ADD hadoop-3.3.5.tar.gz /root
  20. # 8. 构建ssh一键部署相关配置(私钥文件、公钥认证文件权限设置,root用户口令设置)
  21. RUN chmod 0400 /root/.ssh/id_rsa \
  22. && chmod 0600 /root/.ssh/authorized_keys \
  23. && echo ${password} | passwd --stdin root
  24. # 9. 安装openssh
  25. # 在线安装方式: RUN yum install openssh openssh-server openssh-clients -y
  26. # 离线安装方式:
  27. RUN rpm -ivh /root/rpm/tcp_wrappers-libs-7.6-77.el7.x86_64.rpm\
  28. && rpm -ivh /root/rpm/libedit-3.0-12.20121213cvs.el7.x86_64.rpm\
  29. && rpm -ivh /root/rpm/fipscheck-1.4.1-6.el7.x86_64.rpm /root/rpm/fipscheck-lib-1.4.1-6.el7.x86_64.rpm\
  30. && rpm -ivh /root/rpm/openssh-7.4p1-22.el7_9.x86_64.rpm\
  31. && rpm -ivh /root/rpm/openssh-clients-7.4p1-22.el7_9.x86_64.rpm\
  32. && rpm -ivh /root/rpm/openssh-server-7.4p1-22.el7_9.x86_64.rpm
  33. # 10.1 生成服务器端密钥
  34. RUN /sbin/sshd-keygen \
  35. # 10.2 配置SSHD免密登录(更改强制指纹验证为no,避免弹出指纹确认问题)
  36. && echo -e '\nHost *\nStrictHostKeyChecking no\nUserKnownHostsFile=/dev/null' >> etc/ssh/ssh_config
  37. # 11. 安装JAVA环境
  38. # 在线安装方式:RUN yum install java-11* -y
  39. # 离线安装方式:
  40. RUN rpm -ivh /root/rpm/jdk-11.0.19_linux-x64_bin.rpm
  41. # 由于一些文章说不安装这个包会导致namenode相互不能连接,反正也不大,不管有没有用,先装一个以防万一
  42. RUN rpm -ivh /root/rpm/psmisc-22.20-17.el7.x86_64.rpm
  43. # 12. 设置初始化脚本可执行属性,并删除已经安装完成的rpm包,避免镜像过大
  44. RUN chmod +x /root/init-hadoop.sh \
  45. && rm /root/rpm -rf\
  46. #------------------------------------安装Hadoop环境-------------------------------------#
  47. # 1. 设置与HADOOP相关的全局环境变量,设置hadoop安装及工作目录,并赋值给HADOOP_HOME,然后将HADOOP_HOME加入到PATH,这样执行hdfs start-dfs.sh等命令时,不用必须进入到hadoop工作目录。
  48. # 1.1 将hadoop工作目录改个名,用起来方便
  49. RUN mv /root/hadoop-3.3.5 /root/hadoop\
  50. # 1.2 实际只有/.bashrc中的配置会在容器启动时被加载并发挥作用,不过无所谓,都改了也没啥
  51. && echo -e "export HADOOP_HOME=/root/hadoop\nexport PATH=\$PATH:\$HADOOP_HOME/bin\nexport PATH=\$PATH:\$HADOOP_HOME/sbin" >> /etc/profile\
  52. && echo -e "export HADOOP_HOME=/root/hadoop\nexport PATH=\$PATH:\$HADOOP_HOME/bin\nexport PATH=\$PATH:\$HADOOP_HOME/sbin" >> /root/.bashrc\
  53. && source /root/.bashrc
  54. # 2. 设置$HADOOP_HOME/etc/hadoop/hadoop-env.sh中的JAVA_HOME环境变量
  55. RUN sed -i 's|#[[:blank:]]export[[:blank:]]JAVA_HOME=$|export JAVA_HOME=/usr|g' /root/hadoop/etc/hadoop/hadoop-env.sh
  56. # 3. 设置 HDFS的用户角色
  57. RUN echo -e "export HDFS_NAMENODE_USER=root\nexport HDFS_DATANODE_USER=root\nexport HDFS_SECONDARYNAMENODE_USER=root\n">>/root/hadoop/etc/hadoop/hadoop-env.sh\
  58. # 4. 设置 YARN的用户角色
  59. && echo -e "export YARN_RESOURCEMANAGER_USER=root\nexport YARN_NODEMANAGER_USER=root\nexport YARN_PROXYSERVER_USER=root">>/root/hadoop/etc/hadoop/yarn-env.sh
  60. # 5. 默认启动脚本
  61. CMD ["/root/init-hadoop.sh"]





  1. [root@pighost1 Dockerfile-hadoop]# docker build --build-arg password='your password' -t pig/hadoop:cluster .
  2. [+] Building 85.1s (12/12) FINISHED
  3. => [internal] load build definition from Dockerfile 0.0s
  4. => => transferring dockerfile: 3.21kB 0.0s
  5. => [internal] load .dockerignore 0.0s
  6. => => transferring context: 2B 0.0s
  7. => [internal] load metadata for docker.io/library/centos:centos7 0.0s
  8. => [1/7] FROM docker.io/library/centos:centos7 0.0s
  9. => [internal] load build context 0.0s
  10. => => transferring context: 6.12kB 0.0s
  11. => CACHED [2/7] RUN sed -e 's|^mirrorlist=|#mirrorlist=|g' -e 's|^#baseurl=http://mirror.centos.org/centos|baseurl=ht 0.0s
  12. => [3/7] COPY init-hadoop.sh /root/init-hadoop.sh 0.0s
  13. => [4/7] COPY .ssh /root/.ssh 0.0s
  14. => [5/7] COPY ./rpm /root/rpm/. 1.1s
  15. => [6/7] ADD hadoop-3.3.5.tar.gz /root 14.0s
  16. => [7/7] RUN chmod +x /root/init-hadoop.sh && chmod 0400 /root/.ssh/id_rsa && chmod 0600 /root/.ssh/authorized_ke 56.4s
  17. => exporting to image 13.3s
  18. => => exporting layers 13.3s
  19. => => writing image sha256:6bb64f678a7292b9edb7d6b8d58a9b61e8cc8718ef545f9623a84e19652cb77a 0.0s
  20. => => naming to docker.io/pig/hadoop:cluster 0.0s


        (2) 离线方式





  1. [root@pighost1 ~]# ping registry-1.docker.io
  2. PING registry-1.docker.io ( 56(84) bytes of data.
  3. ^C
  4. --- registry-1.docker.io ping statistics ---
  5. 6 packets transmitted, 0 received, 100% packet loss, time 5105ms
  6. [root@pighost1 ~]# docker pull hello-world
  7. Using default tag: latest
  8. Error response from daemon: Get "https://registry-1.docker.io/v2/": dial tcp: lookup registry-1.docker.io on no such host

        但是同样是在虚拟机所在的windows主机上,还有一个早期安装的Docker Desktop,工作则完全正常,丝滑无比。所以一开始我以为是虚拟机网络的问题,查了好久。



        抓狂的是,这种故障并不稳定。比如,如果我一直pull hello-world的话,可能在数十次尝试后突然就可以了,然后又在几分钟后就又不行了……。一开始我以为是Docker升级了用户权限控制,因为中间有一段,在我登录了以后,就可以在desktop上流畅无比的pull。但是隔了两天,我没有登录的情况下,也能够在desktop(就是后面新安装的那个)流畅无比的pull……。


        其中框出的两部分,就是一次成功pull(上面),一次失败的pull(下面)。可以看出,失败的主要原因,就是对registry-1.dockers.io的dns查询失败了。虽然如上上面那张图所示,即使ping registry-1.docker.io确实能够看到IP的情况下,pull指令本身仍会执行失败。感觉好像就是pull命令会发出一次dns查询,失败了它就不干活一样。



  1. C:\Users\pig> nslookup registry-1.docker.io
  2. 服务器: UnKnown
  3. Address: 2408:8000:1010:1::8
  4. 非权威应答:
  5. 名称: registry-1.docker.io
  6. Addresses:


  1. [root@pighost1 ~]# docker pull hello-world
  2. Using default tag: latest
  3. Error response from daemon: Get "https://registry-1.docker.io/v2/": dial tcp: lookup registry-1.docker.io on no such host
  4. [root@pighost1 ~]# nslookup registry-1.docker.io
  5. Server:
  6. Address:
  7. Non-authoritative answer:
  8. Name: registry-1.docker.io
  9. Address:
  10. Name: registry-1.docker.io
  11. Address:
  12. Name: registry-1.docker.io
  13. Address:
  14. [root@pighost1 ~]# nslookup registry-1.docker.io
  15. Server:
  16. Address:
  17. Non-authoritative answer:
  18. Name: registry-1.docker.io
  19. Address:





        当然因为后面我还得弄集群,并且我现在看着IPv6地址也头疼,就不尝试ipv6能不能解决这个问题了。但是改了DNS以后,docker pull是没有问题,但是在电信宽带的网上,容器内清华的镜像repo库又开始剧烈的不稳定了,甚至一些情况下压根就不通了。无奈之下,尝试使用代理穿透,在改DNS和魔法的双重加持下,pull和repo就都正常了。总之,故障似乎不在我能控制的范围内,一脸懵逼的情况下,还是尝试离线安装吧,又稳定又快。


        离线安装所需的rpm包可以参考我们之前的方法,使用yumdownloader下载,然后使用rpm -ivh命令去安装,就如#9步骤一样。


           2. 配置Hadoop




        在PATH中加入$HADOOP_HOME/bin 和 $HADOOP_HOME/sbin, 以下在#1中完成






        如同之前的记录中搭建hadoop集群所要做的一样,需要在$HADOOP_HOME/etc/hadoop下的hadoop-env.sh文件中,取消注释export JAVA_HOME哪一行,并且将其填为/usr,因为java在/usr/bin/java处。

  • 增加HDFS和YARN的root用户



        (2) HADOOP配置文件

        其实大致的配置和CENTOS上的网络安全工具(十二)走向Hadoop(4) Hadoop 集群搭建差不多,只是要根据swarm的特点做一点小小的改变——虽然这个小小的改变也是一个超级大坑。


  • core-site.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <!--
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License. See accompanying LICENSE file.
  13. -->
  14. <!-- Put site-specific property overrides in this file. -->
  15. <configuration>
  16. <property><!-- 设置名字服务器的URI,Hadoop根据此设置识别NameNode,否则datanode无法识别namenode -->
  17. <name>fs.defaultFS</name>
  18. <value>hdfs://pignode1:9000</value>
  19. </property>
  20. <property><!-- 设置可以通过WEB页面访问的用户身份, 否则无法登录WEB管理界面 -->
  21. <name>hadoop.http.staticuser.user</name>
  22. <value>root</value>
  23. </property>
  24. <property><!-- 设置dfs目录, 容器需要将宿主机目录映射到该目录上 -->
  25. <name>hadoop.tmp.dir</name>
  26. <value>/hadoopdata</value>
  27. </property>
  28. </configuration>
  • hdfs-site.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <!--
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License. See accompanying LICENSE file.
  13. -->
  14. <!-- Put site-specific property overrides in this file. -->
  15. <configuration>
  16. <property><!--设置文件系统副本数, 一般默认为3 -->
  17. <name>dfs.replication</name>
  18. <value>3</value>
  19. </property>
  20. <property><!--设置名字服务器WEB管理页面访问端口, 注意这个0.0.0.0,和之前设置为pignode1不一样了 -->
  21. <name>dfs.namenode.http-address</name>
  22. <value></value>
  23. </property>
  24. <property><!--设置第二名字服务器,地址与页面访问端口, start-dfs.sh会根据这个设置在相应节点启动第二名字服务器 -->
  25. <name>dfs.namenode.secondary.http-address</name>
  26. <value>pignode2:9890</value>
  27. </property>
  28. </configuration>
  • yarn-site.xml

  1. <?xml version="1.0"?>
  2. <!--
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License. See accompanying LICENSE file.
  12. -->
  13. <configuration>
  14. <!-- Site specific YARN configuration properties -->
  15. <property><!--设置YARN服务器主机,但似乎start-yarn.sh识别但不使用这个,如果在pignode1上运行start-all.sh,resourcemanager是无法启动的,必须在pignode2上start-yarn.sh才行-->
  16. <name>yarn.resourcemanager.hostname</name>
  17. <value>pignode2</value>
  18. </property>
  19. <property><!--设置YARN的WEB管理界面登录端口, 同样需要设置0.0.0.0 -->
  20. <name>yarn.resourcemanager.webapp.address</name>
  21. <value></value>
  22. </property>
  23. <property><!--设置YARN的WEB代理服务器地址 -->
  24. <name>yarn.web-proxy.address</name>
  25. <value>pignode2:8090</value>
  26. </property>
  27. <property><!--设置工作节点的算法为mapreduce -->
  28. <name>yarn.nodemanager.aux-services</name>
  29. <value>mapreduce_shuffle</value>
  30. </property>
  31. <property><!--设置环境变量白名单,不设会工作异常 -->
  32. <name>yarn.nodemanager.env-whitelist</name>
  34. </property>
  35. </configuration>
  • mapred-site.xml
  1. <?xml version="1.0"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <!--
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License. See accompanying LICENSE file.
  13. -->
  14. <!-- Put site-specific property overrides in this file. -->
  15. <configuration>
  16. <property><!--指定MapReduce使用的集群框架(YARN)-->
  17. <name>mapreduce.framework.name</name>
  18. <value>yarn</value>
  19. </property>
  20. <property><!--设置Job History Server的地址-->
  21. <name>mapreduce.jobhistory.address</name>
  22. <value>pignode3:10020</value>
  23. </property>
  24. <property><!--设置Job History Server的WEB管理界面端口-->
  25. <name>mapreduce.jobhistory.webapp.address</name>
  26. <value></value>
  27. </property>
  28. <property><!--设置mapreduce 库/算子的路径, 不设就找不到, 找不到就算不了 -->
  29. <name>mapreduce.application.classpath</name>
  30. <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
  31. </property>
  32. </configuration>





  1. [root@pighost1 Dockerfile-hadoop]# route add -net gw dev docker_gwbridge
  2. [root@pighost1 Dockerfile-hadoop]#



       在之前配置clickhouse的时候,我也啥都没干,就照样顺畅的连上了clickhouse的服务器啊,唯一的区别,只不过是那次我用的是官方的镜像而已。 总不至于是我的Dockerfile或者docker-compse.yml又问题吧。在纠结了两天应该如何EXPOSE以及如何在宿主机的iptables下添加记录(明明防火墙都关了)以后,我突然想起来,在clickhouse的配置中,还有一样东西是在hadoop配置里没有做到的:CENTO OS上的网络安全工具(二十)ClickHouse swarm容器化集群部署




        3. 初始化启动脚本

        初始化脚本如下,看注释就好。总之就是启动sshd,然后看是不是所有节点都启动成功了,启动成功了,就判断是否格式化,没有就先格了,然后依次start dfs,yarn和history server。

  1. #! /bin/bash
  2. # the NODE_COUNT param set by swarm config yml file, using endpoint_environment flag.
  4. TRYLOOP=50
  5. ############################################################################################################
  6. ## 1. source一下环境变量,虽然docker也会在载入的时候source它,保险起见,自己也来一遍
  7. ############################################################################################################
  8. source /etc/profile
  9. source /root/.bashrc
  10. ############################################################################################################
  11. ## 2. 启动openssh服务
  12. ############################################################################################################
  13. /sbin/sshd -D &
  14. ############################################################################################################
  15. ## 3. 定义后面初始化过程中要调用的函数
  16. ############################################################################################################
  17. #FUNCTION:测试是否所有节点都已经启动的函数,避免在节点尚未全部启动时就执行format的尴尬----------------------------
  18. #param1: 节点hostname的前缀(就是不包含尾巴后面数字的部分)
  19. #param2: 节点数量
  20. #param3: 在放弃前执行多少轮转圈ping节点的操作
  21. isAllNodesConnected(){
  25. tryloop=0
  26. ind=1
  27. #init pignode hostname array,and pignode status array
  28. while(( $ind <= $PIGNODE_COUNT ))
  29. do
  30. pignodes[$ind]="$PIGNODE_PRENAME$ind"
  31. pignodes_stat[$ind]=0
  32. let "ind++"
  33. done
  34. #check wether all the pignodes can be connected
  35. noactivecount=$PIGNODE_COUNT
  36. while(( $noactivecount > 0 ))
  37. do
  38. noactivecount=$PIGNODE_COUNT
  39. ind=1
  40. while(( $ind <= $PIGNODE_COUNT ))
  41. do
  42. if (( ${pignodes_stat[$ind]}==0 ))
  43. then
  44. ping -c 1 ${pignodes[$ind]} > /dev/null
  45. if (($?==0))
  46. then
  47. pignodes_stat[$ind]=1
  48. let "noactivecount-=1"
  49. echo "Try to connect ${pignodes[$ind]}:successed." >>init.log
  50. else
  51. echo "Try to connect ${pignodes[$ind]}: failed." >>init.log
  52. fi
  53. else
  54. let "noactivecount-=1"
  55. fi
  56. let "ind++"
  57. done
  58. if (( ${noactivecount}>0 ))
  59. then
  60. let "tryloop++"
  61. if (($tryloop>$TRYLOOP_COUNT))
  62. then
  63. echo "ERROR Tried ${TRYLOOP_COUNT} loops. ${noactivecount} nodes failed, exit." >>init.log
  64. break;
  65. fi
  66. echo "${noactivecount} left for ${PIGNODE_COUNT} nodes not connected, waiting for next try">>init.log
  67. sleep 5
  68. else
  69. echo "All nodes are connected.">>init.log
  70. fi
  71. done
  72. return $noactivecount
  73. }
  74. #----------------------------------------------------------------------------------------------------------
  75. #FUNCTION:从core-site文件中获取所设置的hadoop dfs所在文件夹---------------------------------------------------
  76. getDataDirectory(){
  77. configfiledir=`echo "${HADOOP_HOME}/etc/hadoop/core-site.xml"`
  78. datadir=`cat ${configfiledir} | grep -A 2 'hadoop.tmp.dir' | grep '<value>' | sed 's/^[[:blank:]]*<value>//g' | sed 's/<\/value>$//g'`
  79. echo $datadir
  80. }
  81. ############################################################################################################
  82. ## 4. 测试是否是主节点(hostname1),是则执行初始化操作 ##
  83. ############################################################################################################
  84. nodehostname=`hostname`
  85. nodehostnameprefix=`echo $nodehostname|sed -e 's|[[:digit:]]\+$||g'`
  86. nodeindex=`hostname | sed "s/${nodehostnameprefix}//g"`
  87. #切换到Hadoop安装目录
  88. cd $HADOOP_HOME
  89. #判断节点ID,主节点则执行初始化,否则等待即可
  90. if (($nodeindex!=1));then
  91. echo $nodehostname waiting for init...>>init.log
  92. else
  93. # 求yarn节点id(默认装在第2节点)和mapreduce节点id(默认装在第3节点)
  94. if (($NODECOUNT>=2));then
  95. yarnnodeid=2
  96. else
  97. yarnnodeid=1
  98. fi
  99. if (($NODECOUNT>=3));then
  100. maprednodeid=3
  101. else
  102. maprednodeid=1
  103. fi
  104. # 测试是否所有节点都可以ping通
  105. echo $nodehostname is one of the init manager nodes...>>init.log
  106. #waiting for all the nodes connected
  107. isAllNodesConnected $nodehostnameprefix $NODECOUNT $TRYLOOP
  108. if (($?==0));then
  109. #all the nodes is connected,from then to init hadoop
  110. datadirectory=`echo $(getDataDirectory)`
  111. #如果hadoop数据目录不为空,证明已经格式化,直接启动dfs,否则需执行格式化
  112. if [ $datadirectory ];then
  113. #check wether hadoop was formatted.
  114. datadircontent=`ls -A ${datadirectory}`
  115. if [ -z $datadircontent ];then
  116. echo "format dfs">>init.log
  117. bin/hdfs namenode -format >>init.log
  118. else
  119. echo "dfs is already formatted.">>init.log
  120. fi
  121. else
  122. echo "ERROR:Can not get hadoop tmp data directory.init can not be done. ">>init.log
  123. fi
  124. #start-all.sh已经弃用,所以分别使用start-dfs.sh和start-yarn.sh启动
  125. echo "Init dfs --------------------------------------------------------------------" >> init.log
  126. sbin/start-dfs.sh
  127. echo "Init yarn -------------------------------------------------------------------" >> init.log
  128. ssh root@${nodehostnameprefix}${yarnnodeid} "bash ${HADOOP_HOME}/sbin/start-yarn.sh" >> init.log
  129. # history server需要单独启动
  130. echo "Init JobHistory server-------------------------------------------------------" >> init.log
  131. ssh root@${nodehostnameprefix}${maprednodeid} "bash ${HADOOP_HOME}/bin/mapred --daemon start historyserver">>init.log
  132. else
  133. echo "ERROR:Not all the nodes is connected. init can not be done. exit...">>init.log
  134. fi
  135. fi
  136. #挂住前台,防止swarn重启
  137. tail -f /dev/null


  1. #! /bin/bash
  2. index=1
  3. rm /hadoopdata/* -rf
  4. while(($index<=12));do
  5. file="/hadoopdata/${index}"
  6. mkdir $file
  7. let "index++"
  8. done

        4. swarm stack的配置文件


  1. version: "3.7"
  2. services:
  3. # 使用pignode1作为Hadoop的Nameode,开放9000端口
  4. # 使用pignode1作为Hadoop的Namenode Http服务器,开放9870端口
  5. pignode1:
  6. image: pig/hadoop:cluster
  7. deploy:
  8. endpoint_mode: dnsrr
  9. restart_policy:
  10. condition: on-failure
  11. placement:
  12. constraints:
  13. - node.hostname==pighost1
  14. hostname: pignode1
  15. environment:
  16. - NODE_COUNT=12
  17. networks:
  18. - pig
  19. ports:
  20. - target: 22
  21. published: 9011
  22. protocol: tcp
  23. mode: host
  24. - target: 9000
  25. published: 9000
  26. protocol: tcp
  27. mode: host
  28. - target: 9870
  29. published: 9870
  30. protocol: tcp
  31. mode: host
  32. volumes:
  33. # 映射xml配置文件
  34. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  35. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  36. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  37. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  38. # 映射workers文件
  39. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  40. # 映射数据目录
  41. - /hadoopdata/1:/hadoopdata:wr
  42. pignode2:
  43. image: pig/hadoop:cluster
  44. deploy:
  45. endpoint_mode: dnsrr
  46. restart_policy:
  47. condition: on-failure
  48. placement:
  49. # 将Second Namenode限制部署在第二个节点上
  50. constraints:
  51. - node.hostname==pighost2
  52. networks:
  53. - pig
  54. hostname: pignode2
  55. environment:
  56. - NODE_COUNT=12
  57. ports:
  58. # 第二名字服务器接口
  59. - target: 22
  60. published: 9012
  61. protocol: tcp
  62. mode: host
  63. - target: 9890
  64. published: 9890
  65. protocol: tcp
  66. mode: host
  67. - target: 8088
  68. published: 8088
  69. protocol: tcp
  70. mode: host
  71. volumes:
  72. # 映射xml配置文件
  73. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  74. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  75. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  76. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  77. # 映射workers文件
  78. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  79. # 映射数据目录
  80. - /hadoopdata/2:/hadoopdata:wr
  81. pignode3:
  82. image: pig/hadoop:cluster
  83. deploy:
  84. endpoint_mode: dnsrr
  85. restart_policy:
  86. condition: on-failure
  87. placement:
  88. # 将Mapreduce限制部署在第三个节点上
  89. constraints:
  90. - node.hostname==pighost3
  91. networks:
  92. - pig
  93. hostname: pignode3
  94. environment:
  95. - NODE_COUNT=12
  96. ports:
  97. - target: 22
  98. published: 9013
  99. protocol: tcp
  100. mode: host
  101. - target: 10020
  102. published: 10020
  103. protocol: tcp
  104. mode: host
  105. - target: 19888
  106. published: 19888
  107. protocol: tcp
  108. mode: host
  109. volumes:
  110. # 映射xml配置文件
  111. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  112. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  113. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  114. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  115. # 映射workers文件
  116. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  117. # 映射数据目录
  118. - /hadoopdata/3:/hadoopdata:wr
  119. #------------------------------------------------------------------------------------------------
  120. #以下均为工作节点,可在除leader以外的主机上部署
  121. pignode4:
  122. image: pig/hadoop:cluster
  123. deploy:
  124. endpoint_mode: dnsrr
  125. restart_policy:
  126. condition: on-failure
  127. placement:
  128. # 将Mapreduce限制部署在第三个节点上
  129. constraints:
  130. # node.role==manager
  131. # node.role==worker
  132. - node.hostname==pighost3
  133. networks:
  134. - pig
  135. environment:
  136. - NODE_COUNT=12
  137. ports:
  138. - target: 22
  139. published: 9014
  140. protocol: tcp
  141. mode: host
  142. hostname: pignode4
  143. volumes:
  144. # 映射xml配置文件
  145. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  146. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  147. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  148. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  149. # 映射workers文件
  150. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  151. # 映射数据目录
  152. - /hadoopdata/4:/hadoopdata:wr
  153. pignode5:
  154. image: pig/hadoop:cluster
  155. deploy:
  156. endpoint_mode: dnsrr
  157. restart_policy:
  158. condition: on-failure
  159. placement:
  160. # 将Mapreduce限制部署在第三个节点上
  161. constraints:
  162. # node.role==manager
  163. - node.hostname==pighost3
  164. networks:
  165. - pig
  166. ports:
  167. - target: 22
  168. published: 9015
  169. protocol: tcp
  170. mode: host
  171. hostname: pignode5
  172. environment:
  173. - NODE_COUNT=12
  174. volumes:
  175. # 映射xml配置文件
  176. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  177. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  178. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  179. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  180. # 映射workers文件
  181. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  182. # 映射数据目录
  183. - /hadoopdata/5:/hadoopdata:wr
  184. pignode6:
  185. image: pig/hadoop:cluster
  186. deploy:
  187. endpoint_mode: dnsrr
  188. restart_policy:
  189. condition: on-failure
  190. placement:
  191. # 将Mapreduce限制部署在第三个节点上
  192. constraints:
  193. # node.role==manager
  194. - node.hostname==pighost3
  195. networks:
  196. - pig
  197. ports:
  198. - target: 22
  199. published: 9016
  200. protocol: tcp
  201. mode: host
  202. hostname: pignode6
  203. environment:
  204. - NODE_COUNT=12
  205. volumes:
  206. # 映射xml配置文件
  207. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  208. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  209. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  210. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  211. # 映射workers文件
  212. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  213. # 映射数据目录
  214. - /hadoopdata/6:/hadoopdata:wr
  215. pignode7:
  216. image: pig/hadoop:cluster
  217. deploy:
  218. endpoint_mode: dnsrr
  219. restart_policy:
  220. condition: on-failure
  221. placement:
  222. # 将Mapreduce限制部署在第三个节点上
  223. constraints:
  224. # node.role==manager
  225. - node.hostname==pighost4
  226. networks:
  227. - pig
  228. ports:
  229. - target: 22
  230. published: 9017
  231. protocol: tcp
  232. mode: host
  233. hostname: pignode7
  234. environment:
  235. - NODE_COUNT=12
  236. volumes:
  237. # 映射xml配置文件
  238. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  239. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  240. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  241. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  242. # 映射workers文件
  243. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  244. # 映射数据目录
  245. - /hadoopdata/7:/hadoopdata:wr
  246. pignode8:
  247. image: pig/hadoop:cluster
  248. deploy:
  249. endpoint_mode: dnsrr
  250. restart_policy:
  251. condition: on-failure
  252. placement:
  253. # 将Mapreduce限制部署在第三个节点上
  254. constraints:
  255. # node.role==manager
  256. - node.hostname==pighost4
  257. networks:
  258. - pig
  259. ports:
  260. - target: 22
  261. published: 9018
  262. protocol: tcp
  263. mode: host
  264. hostname: pignode8
  265. environment:
  266. - NODE_COUNT=12
  267. volumes:
  268. # 映射xml配置文件
  269. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  270. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  271. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  272. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  273. # 映射workers文件
  274. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  275. # 映射数据目录
  276. - /hadoopdata/8:/hadoopdata:wr
  277. pignode9:
  278. image: pig/hadoop:cluster
  279. deploy:
  280. endpoint_mode: dnsrr
  281. restart_policy:
  282. condition: on-failure
  283. placement:
  284. # 将Mapreduce限制部署在第三个节点上
  285. constraints:
  286. # node.role==manager
  287. - node.hostname==pighost4
  288. networks:
  289. - pig
  290. ports:
  291. - target: 22
  292. published: 9019
  293. protocol: tcp
  294. mode: host
  295. hostname: pignode9
  296. environment:
  297. - NODE_COUNT=12
  298. volumes:
  299. # 映射xml配置文件
  300. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  301. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  302. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  303. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  304. # 映射workers文件
  305. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  306. # 映射数据目录
  307. - /hadoopdata/9:/hadoopdata:wr
  308. pignode10:
  309. image: pig/hadoop:cluster
  310. deploy:
  311. endpoint_mode: dnsrr
  312. restart_policy:
  313. condition: on-failure
  314. placement:
  315. # 将Mapreduce限制部署在第三个节点上
  316. constraints:
  317. # node.role==manager
  318. - node.hostname==pighost5
  319. networks:
  320. - pig
  321. ports:
  322. - target: 22
  323. published: 9020
  324. protocol: tcp
  325. mode: host
  326. hostname: pignode10
  327. environment:
  328. - NODE_COUNT=12
  329. volumes:
  330. # 映射xml配置文件
  331. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  332. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  333. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  334. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  335. # 映射workers文件
  336. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  337. # 映射数据目录
  338. - /hadoopdata/10:/hadoopdata:wr
  339. pignode11:
  340. image: pig/hadoop:cluster
  341. deploy:
  342. endpoint_mode: dnsrr
  343. restart_policy:
  344. condition: on-failure
  345. placement:
  346. # 将Mapreduce限制部署在第三个节点上
  347. constraints:
  348. # node.role==manager
  349. - node.hostname==pighost5
  350. networks:
  351. - pig
  352. ports:
  353. - target: 22
  354. published: 9021
  355. protocol: tcp
  356. mode: host
  357. hostname: pignode11
  358. environment:
  359. - NODE_COUNT=12
  360. volumes:
  361. # 映射xml配置文件
  362. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  363. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  364. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  365. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  366. # 映射workers文件
  367. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  368. # 映射数据目录
  369. - /hadoopdata/11:/hadoopdata:wr
  370. pignode12:
  371. image: pig/hadoop:cluster
  372. deploy:
  373. endpoint_mode: dnsrr
  374. restart_policy:
  375. condition: on-failure
  376. placement:
  377. # 将Mapreduce限制部署在第三个节点上
  378. constraints:
  379. # node.role==manager
  380. - node.hostname==pighost5
  381. networks:
  382. - pig
  383. ports:
  384. - target: 22
  385. published: 9022
  386. protocol: tcp
  387. mode: host
  388. hostname: pignode12
  389. environment:
  390. - NODE_COUNT=12
  391. volumes:
  392. # 映射xml配置文件
  393. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  394. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  395. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  396. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  397. # 映射workers文件
  398. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  399. # 映射数据目录
  400. - /hadoopdata/12:/hadoopdata:wr
  401. networks:
  402. pig:




        排查了一个下午,才发现原因,是在某些datanode启动的时候,swarm名字服务给出了错误的namenode ip地址……犯罪现场是已经没有了,当时也百思不得其解,不过搞明白后还原现场的照片还在:



   三、 Hadoop HA集群部署

        高可用有两种方式,一种是使用Quorun Journal node管理器(QJM)进行活跃名字服务器和待机名字服务器间编辑信息的同步,另一种是使用传统的NFS共享存储来帮助编辑信息同步。鉴于把NFS的高可用性建立在另一个NFS上的这种奇怪逻辑,这里毫不犹豫选择了QJM模式,希望是对的。

        1. DFS部分配置





  1. <property>
  2. <name>fs.defaultFS</name>
  3. <value>hdfs://mycluster</value>
  4. </property>



  •         dfs.nameservices


  1. <property>
  2. <name>dfs.nameservices</name>
  3. <value>mycluster</value>
  4. </property>
  •         dfs.ha.namenodes.[nameservice ID]


  1. <property>
  2. <name>dfs.ha.namenodes.mycluster</name>
  3. <value>nn1,nn2, nn3</value>
  4. </property>
  •         dfs.namenode.rpc-address.[nameservice ID].[name node ID]


  1. <property>
  2. <name>dfs.namenode.rpc-address.mycluster.nn1</name>
  3. <value>machine1.example.com:8020</value>
  4. </property>
  5. <property>
  6. <name>dfs.namenode.rpc-address.mycluster.nn2</name>
  7. <value>machine2.example.com:8020</value>
  8. </property>
  9. <property>
  10. <name>dfs.namenode.rpc-address.mycluster.nn3</name>
  11. <value>machine3.example.com:8020</value>
  12. </property>
  •         dfs.namenode.http-address.[nameservice ID].[name node ID] 


  1. <property>
  2. <name>dfs.namenode.http-address.mycluster.nn1</name>
  3. <value>machine1.example.com:9870</value>
  4. </property>
  5. <property>
  6. <name>dfs.namenode.http-address.mycluster.nn2</name>
  7. <value>machine2.example.com:9870</value>
  8. </property>
  9. <property>
  10. <name>dfs.namenode.http-address.mycluster.nn3</name>
  11. <value>machine3.example.com:9870</value>
  12. </property>


  •         dfs.namenode.shared.edits.dir


  1. <property>
  2. <name>dfs.namenode.shared.edits.dir</name>
  3. <value>qjournal://node1.example.com:8485;node2.example.com:8485;node3.example.com:8485/mycluster</value>
  4. </property>


  •         dfs.client.failover.proxy.provider.[nameservice ID] 

        定义HDFS客户端用来确定活跃名字服务器的Java class,也就是客户端用这个来确定应该和哪个名字服务器通信。这里只有nameserviceID需要改一下,改成我们自己取的那个名字。

  1. <property>
  2. <name>dfs.client.failover.proxy.provider.mycluster</name>
  3. <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  4. </property>
  •         dfs.ha.fencing.methods

        一组java class或脚本的列表,用于在故障迁移期间用于锁定活跃的名字服务器。比如sshfence,使用ssh链接到活跃的名字服务器并kill进程。所以被杀节点上应该有发起节点的公钥和authkey文件,发起节点的私钥存储位置如下定义。

  1. <property>
  2. <name>dfs.ha.fencing.methods</name>
  3. <value>sshfence</value>
  4. </property>
  5. <property>
  6. <name>dfs.ha.fencing.ssh.private-key-files</name>
  7. <value>/home/exampleuser/.ssh/id_rsa</value>
  8. </property>
  •         dfs.journalnode.edits.dir


  1. <property>
  2. <name>dfs.journalnode.edits.dir</name>
  3. <value>/path/to/journal/node/local/data</value>
  4. </property>

        2. Zookeeper部分配置


        这个可以通过在每个zookeeper节点上运行zkServer.sh status来查看,进一步可使用zkCli.sh -ls /来查看目录,判断是否工作正常。


        3.  配置自动故障迁移




  1. <property>
  2. <name>ha.zookeeper.quorum</name>
  3. <value>zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181</value>
  4. </property>



  1. <property>
  2. <name>dfs.ha.automatic-failover.enabled</name>
  3. <value>true</value>
  4. </property>

        4. 启动dfs



        在所有的3个名字服务器上,执行:hdfs --daemon start journalnode

  1. [root@pignode1 ~]# hdfs --daemon start journalnode
  2. WARNING: /root/hadoop/logs does not exist. Creating.
  3. [root@pignode1 ~]# jps
  4. 75 JournalNode
  5. 123 Jps
  6. [root@pignode1 ~]#
  7. [root@pignode2 ~]# hdfs --daemon start journalnode
  8. WARNING: /root/hadoop/logs does not exist. Creating.
  9. [root@pignode2 ~]# jps
  10. 75 JournalNode
  11. 123 Jps
  12. [root@pignode3 ~]# hdfs --daemon start journalnode
  13. WARNING: /root/hadoop/logs does not exist. Creating.
  14. [root@pignode3 ~]# jps
  15. 75 JournalNode
  16. 123 Jps


        这里只介绍安装完全新鲜的HA集群的做法,升级HA或者迁移数据什么的,请参考官网描述Apache Hadoop 3.3.5 – HDFS High Availability Using the Quorum Journal Manager

       在其中一个名字服务器节点上,进行格式化,比如pignode1: hdfs namenode -format

  1. [root@pignode1 ~]# hdfs namenode -format
  2. 2023-05-11 09:39:06,842 INFO namenode.NameNode: STARTUP_MSG:
  3. /************************************************************
  4. STARTUP_MSG: Starting NameNode
  5. ……………
  6. ……………
  7. 2023-05-11 09:39:09,145 INFO namenode.FSImage: Allocated new BlockPoolId: BP-1327835470-
  8. 2023-05-11 09:39:09,159 INFO common.Storage: Storage directory /hadoopdata/hdfs_name has been successfully formatted.
  9. 2023-05-11 09:39:09,320 INFO namenode.FSImageFormatProtobuf: Saving image file /hadoopdata/hdfs_name/current/fsimage.ckpt_0000000000000000000 using no compression
  10. 2023-05-11 09:39:09,403 INFO namenode.FSImageFormatProtobuf: Image file /hadoopdata/hdfs_name/current/fsimage.ckpt_0000000000000000000 of size 396 bytes saved in 0 seconds .
  11. 2023-05-11 09:39:09,409 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
  12. 2023-05-11 09:39:09,456 INFO namenode.FSNamesystem: Stopping services started for active state
  13. 2023-05-11 09:39:09,457 INFO namenode.FSNamesystem: Stopping services started for standby state
  14. 2023-05-11 09:39:09,466 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown.
  15. 2023-05-11 09:39:09,467 INFO namenode.NameNode: SHUTDOWN_MSG:
  16. /************************************************************
  17. SHUTDOWN_MSG: Shutting down NameNode at pignode1/
  18. ************************************************************/
  19. [root@pignode1 ~]#

       启动该名字服务器 ,否则后面在其它服务器上同步元数据会因为连不上服务器而失败   

  1. [root@pignode1 ~]# hdfs --daemon start namenode
  2. [root@pignode1 ~]# jps
  3. 259 NameNode
  4. 341 Jps
  5. 75 JournalNode

        在其它名字服务器节点上,执行:hdfs namenode -bootstrapStandby,以确保将已格式化节点的元数据通过Journalnode同步到没有格式化的名字服务器上。这也就是为什么必须首先启动journalnode的原因。

  1. [root@pignode2 ~]# hdfs namenode -bootstrapStandby
  2. 2023-05-11 09:43:54,097 INFO namenode.NameNode: STARTUP_MSG:
  3. /************************************************************
  4. STARTUP_MSG: Starting NameNode
  5. STARTUP_MSG: host = pignode2/
  6. STARTUP_MSG: args = [-bootstrapStandby]
  7. STARTUP_MSG: version = 3.3.5
  8. …………
  9. …………
  10. 2023-05-11 09:58:32,730 INFO common.Util: Assuming 'file' scheme for path /hadoopdata/hdfs_name in configuration.
  11. 2023-05-11 09:58:32,730 INFO common.Util: Assuming 'file' scheme for path /hadoopdata/hdfs_name in configuration.
  12. =====================================================
  13. About to bootstrap Standby ID pignamenode2 from:
  14. Nameservice ID: pignamenodecluster
  15. Other Namenode ID: pignamenode1
  16. Other NN's HTTP address: http://pignode1:9870
  17. Other NN's IPC address: pignode1/
  18. Namespace ID: 1898329509
  19. Block pool ID: BP-1342056252-
  20. Cluster ID: CID-ddaf258a-47c4-4dde-b681-2c9c70872ef1
  21. Layout version: -66
  22. isUpgradeFinalized: true
  23. =====================================================
  24. 2023-05-11 09:58:33,140 INFO common.Storage: Storage directory /hadoopdata/hdfs_name has been successfully formatted.
  25. 2023-05-11 09:58:33,171 INFO common.Util: Assuming 'file' scheme for path /hadoopdata/hdfs_name in configuration.
  26. 2023-05-11 09:58:33,172 INFO common.Util: Assuming 'file' scheme for path /hadoopdata/hdfs_name in configuration.
  27. 2023-05-11 09:58:33,200 INFO namenode.FSEditLog: Edit logging is async:true
  28. 2023-05-11 09:58:33,300 INFO namenode.TransferFsImage: Opening connection to http://pignode1:9870/imagetransfer?getimage=1&txid=0&storageInfo=-66:1898329509:1683799022316:CID-ddaf258a-47c4-4dde-b681-2c9c70872ef1&bootstrapstandby=true
  29. 2023-05-11 09:58:33,436 INFO common.Util: Combined time for file download and fsync to all disks took 0.00s. The file download took 0.00s at 0.00 KB/s. Synchronous (fsync) write to disk of /hadoopdata/hdfs_name/current/fsimage.ckpt_0000000000000000000 took 0.00s.
  30. 2023-05-11 09:58:33,437 INFO namenode.TransferFsImage: Downloaded file fsimage.ckpt_0000000000000000000 size 399 bytes.
  31. 2023-05-11 09:58:33,443 INFO ha.BootstrapStandby: Skipping InMemoryAliasMap bootstrap as it was not configured
  32. 2023-05-11 09:58:33,456 INFO namenode.NameNode: SHUTDOWN_MSG:
  33. /************************************************************
  34. SHUTDOWN_MSG: Shutting down NameNode at pignode2/
  35. ************************************************************/


  1. [root@pignode2 ~]# hdfs --daemon start namenode
  2. [root@pignode2 ~]# jps
  3. 75 JournalNode
  4. 251 NameNode
  5. 332 Jps


  1. [root@pignode3 ~]# hdfs namenode -bootstrapStandby
  2. 2023-05-11 09:46:55,393 INFO namenode.NameNode: STARTUP_MSG:
  3. /************************************************************
  4. STARTUP_MSG: Starting NameNode
  5. STARTUP_MSG: host = pignode3/
  6. STARTUP_MSG: args = [-bootstrapStandby]
  7. STARTUP_MSG: version = 3.3.5
  8. …………
  9. …………
  10. 2023-05-11 10:02:24,114 INFO common.Util: Assuming 'file' scheme for path /hadoopdata/hdfs_name in configuration.
  11. =====================================================
  12. About to bootstrap Standby ID pignamenode3 from:
  13. Nameservice ID: pignamenodecluster
  14. Other Namenode ID: pignamenode1
  15. Other NN's HTTP address: http://pignode1:9870
  16. Other NN's IPC address: pignode1/
  17. Namespace ID: 1898329509
  18. Block pool ID: BP-1342056252-
  19. Cluster ID: CID-ddaf258a-47c4-4dde-b681-2c9c70872ef1
  20. Layout version: -66
  21. isUpgradeFinalized: true
  22. =====================================================
  23. 2023-05-11 10:02:24,409 INFO common.Storage: Storage directory /hadoopdata/hdfs_name has been successfully formatted.
  24. 2023-05-11 10:02:24,420 INFO common.Util: Assuming 'file' scheme for path /hadoopdata/hdfs_name in configuration.
  25. 2023-05-11 10:02:24,421 INFO common.Util: Assuming 'file' scheme for path /hadoopdata/hdfs_name in configuration.
  26. 2023-05-11 10:02:24,450 INFO namenode.FSEditLog: Edit logging is async:true
  27. 2023-05-11 10:02:24,542 INFO namenode.TransferFsImage: Opening connection to http://pignode1:9870/imagetransfer?getimage=1&txid=0&storageInfo=-66:1898329509:1683799022316:CID-ddaf258a-47c4-4dde-b681-2c9c70872ef1&bootstrapstandby=true
  28. 2023-05-11 10:02:24,567 INFO common.Util: Combined time for file download and fsync to all disks took 0.00s. The file download took 0.00s at 0.00 KB/s. Synchronous (fsync) write to disk of /hadoopdata/hdfs_name/current/fsimage.ckpt_0000000000000000000 took 0.00s.
  29. 2023-05-11 10:02:24,568 INFO namenode.TransferFsImage: Downloaded file fsimage.ckpt_0000000000000000000 size 399 bytes.
  30. 2023-05-11 10:02:24,574 INFO ha.BootstrapStandby: Skipping InMemoryAliasMap bootstrap as it was not configured
  31. 2023-05-11 10:02:24,590 INFO namenode.NameNode: SHUTDOWN_MSG:
  32. /************************************************************
  33. SHUTDOWN_MSG: Shutting down NameNode at pignode3/
  34. ************************************************************/


  1. [root@pignode3 ~]# hdfs --daemon start namenode
  2. [root@pignode3 ~]# jps
  3. 249 NameNode
  4. 330 Jps
  5. 75 JournalNode



  1. [root@pignode3 ~]# hdfs haadmin -getAllServiceState
  2. pignode1:8020 standby
  3. pignode2:8020 standby
  4. pignode3:8020 standby


        从一个名字服务器节点执行:hdfs zkfc -formatZK

  1. [root@pignode1 ~]# hdfs zkfc -formatZK
  2. 2023-05-11 10:06:06,802 INFO tools.DFSZKFailoverController: STARTUP_MSG:
  3. /************************************************************
  4. STARTUP_MSG: Starting DFSZKFailoverController
  5. STARTUP_MSG: host = pignode1/
  6. STARTUP_MSG: args = [-formatZK]
  7. STARTUP_MSG: version = 3.3.5
  8. …………
  9. …………
  10. 2023-05-11 10:06:07,564 INFO ha.ActiveStandbyElector: Session connected.
  11. 2023-05-11 10:06:07,618 INFO ha.ActiveStandbyElector: Successfully created /hadoop-ha/pignamenodecluster in ZK.
  12. 2023-05-11 10:06:07,731 INFO zookeeper.ZooKeeper: Session: 0x300052118910000 closed
  13. 2023-05-11 10:06:07,731 WARN ha.ActiveStandbyElector: Ignoring stale result from old client with sessionId 0x300052118910000
  14. 2023-05-11 10:06:07,732 INFO zookeeper.ClientCnxn: EventThread shut down for session: 0x300052118910000
  15. 2023-05-11 10:06:07,736 INFO tools.DFSZKFailoverController: SHUTDOWN_MSG:
  16. /************************************************************
  17. SHUTDOWN_MSG: Shutting down DFSZKFailoverController at pignode1/
  18. ************************************************************/

        (5)启动ZKFC(Zookeeper failover Controller)

        按照官网的说法,从这里可以开始start-dfs.sh了;当然也可以手工 hdfs --daemon start zkfc


  1. [root@pignode1 hadoop]# sbin/start-dfs.sh
  2. Starting namenodes on [pignode1 pignode2 pignode3]
  3. Last login: Thu May 11 10:05:49 UTC 2023 from on pts/0
  4. pignode1: Warning: Permanently added 'pignode1,' (ECDSA) to the list of known hosts.
  5. pignode2: Warning: Permanently added 'pignode2,' (ECDSA) to the list of known hosts.
  6. pignode3: Warning: Permanently added 'pignode3,' (ECDSA) to the list of known hosts.
  7. pignode1: namenode is running as process 259. Stop it first and ensure /tmp/hadoop-root-namenode.pid file is empty before retry.
  8. pignode2: namenode is running as process 251. Stop it first and ensure /tmp/hadoop-root-namenode.pid file is empty before retry.
  9. pignode3: namenode is running as process 249. Stop it first and ensure /tmp/hadoop-root-namenode.pid file is empty before retry.
  10. Starting datanodes
  11. Last login: Thu May 11 10:07:40 UTC 2023 on pts/0
  12. pignode5: Warning: Permanently added 'pignode5,' (ECDSA) to the list of known hosts.
  13. pignode4: Warning: Permanently added 'pignode4,' (ECDSA) to the list of known hosts.
  14. pignode6: Warning: Permanently added 'pignode6,' (ECDSA) to the list of known hosts.
  15. pignode9: Warning: Permanently added 'pignode9,' (ECDSA) to the list of known hosts.
  16. pignode11: Warning: Permanently added 'pignode11,' (ECDSA) to the list of known hosts.
  17. pignode7: Warning: Permanently added 'pignode7,' (ECDSA) to the list of known hosts.
  18. pignode10: Warning: Permanently added 'pignode10,' (ECDSA) to the list of known hosts.
  19. pignode8: Warning: Permanently added 'pignode8,' (ECDSA) to the list of known hosts.
  20. pignode12: Warning: Permanently added 'pignode12,' (ECDSA) to the list of known hosts.
  21. pignode4: WARNING: /root/hadoop/logs does not exist. Creating.
  22. pignode5: WARNING: /root/hadoop/logs does not exist. Creating.
  23. pignode6: WARNING: /root/hadoop/logs does not exist. Creating.
  24. pignode7: WARNING: /root/hadoop/logs does not exist. Creating.
  25. pignode9: WARNING: /root/hadoop/logs does not exist. Creating.
  26. pignode8: WARNING: /root/hadoop/logs does not exist. Creating.
  27. pignode10: WARNING: /root/hadoop/logs does not exist. Creating.
  28. pignode11: WARNING: /root/hadoop/logs does not exist. Creating.
  29. pignode12: WARNING: /root/hadoop/logs does not exist. Creating.
  30. Starting journal nodes [pignode3 pignode2 pignode1]
  31. Last login: Thu May 11 10:07:40 UTC 2023 on pts/0
  32. pignode1: Warning: Permanently added 'pignode1,' (ECDSA) to the list of known hosts.
  33. pignode2: Warning: Permanently added 'pignode2,' (ECDSA) to the list of known hosts.
  34. pignode3: Warning: Permanently added 'pignode3,' (ECDSA) to the list of known hosts.
  35. pignode2: journalnode is running as process 75. Stop it first and ensure /tmp/hadoop-root-journalnode.pid file is empty before retry.
  36. pignode1: journalnode is running as process 74. Stop it first and ensure /tmp/hadoop-root-journalnode.pid file is empty before retry.
  37. pignode3: journalnode is running as process 75. Stop it first and ensure /tmp/hadoop-root-journalnode.pid file is empty before retry.
  38. Starting ZK Failover Controllers on NN hosts [pignode1 pignode2 pignode3]
  39. Last login: Thu May 11 10:07:47 UTC 2023 on pts/0
  40. pignode1: Warning: Permanently added 'pignode1,' (ECDSA) to the list of known hosts.
  41. pignode2: Warning: Permanently added 'pignode2,' (ECDSA) to the list of known hosts.
  42. pignode3: Warning: Permanently added 'pignode3,' (ECDSA) to the list of known hosts.
  43. [root@pignode1 hadoop]#


  1. [root@pignode1 hadoop]# hdfs haadmin -getAllServiceState
  2. pignode1:8020 active
  3. pignode2:8020 standby
  4. pignode3:8020 standby





        5. 配置Yarn  

        参考Apache Hadoop 3.3.5 – ResourceManager High Availability进行Yarn的高可靠配置,涉及的主要参数包括:


  • yarn.resourcemanager.ha.enabled


  1. <property>
  2. <name>yarn.resourcemanager.ha.enabled</name>
  3. <value>true</value>
  4. </property>

        (2)定义HA resource manager集群

  • yarn.resourcemanager.cluster-id


  • yarn.resourcemanager.ha.rm-ids


  • yarn.resourcemanager.hostname.rm-id


  • yarn.resourcemanager.webapp.address.rm-id


  1. <property><!--设置YARN服务器主机-->
  2. <name>yarn.resourcemanager.cluster-id</name>
  3. <value>pignode-ha</value>
  4. </property>
  5. <property>
  6. <name>yarn.resourcemanager.ha.rm-ids</name>
  7. <value>pigresourcemanager1,pigresourcemanager2,pigresourcemanager3</value>
  8. </property>
  9. <property>
  10. <name>yarn.resourcemanager.hostname.pigresourcemanager1</name>
  11. <value>pignode1</value>
  12. </property>
  13. ……
  14. <property>
  15. <name>yarn.resourcemanager.webapp.address.pigresourcemanager1</name>
  16. <value></value>
  17. </property>
  18. ……


  • hadoop.zk.address


  1. <property>
  2. <name>yarn.resourcemanager.zk-address</name>
  3. <value>zookeeper1:2181,zookeeper2:2181,zookeeper3:2181</value>
  4. </property>

        6. 启动Yarn

        yarn可以直接使用start-yarn.sh脚本启动。使用yarn rmadmin -getAllServiceState可以查看resourcemanager的状态。

  1. [root@pignode1 hadoop]# sbin/start-yarn.sh
  2. Starting resourcemanagers on [ pignode1 pignode2]
  3. Last login: Thu May 11 10:07:50 UTC 2023 on pts/0
  4. pignode2: Warning: Permanently added 'pignode2,' (ECDSA) to the list of known hosts.
  5. pignode1: Warning: Permanently added 'pignode1,' (ECDSA) to the list of known hosts.
  6. pignode2: WARNING: /root/hadoop/logs does not exist. Creating.
  7. Starting nodemanagers
  8. Last login: Thu May 11 13:32:28 UTC 2023 on pts/0
  9. pignode6: Warning: Permanently added 'pignode6,' (ECDSA) to the list of known hosts.
  10. pignode11: Warning: Permanently added 'pignode11,' (ECDSA) to the list of known hosts.
  11. pignode4: Warning: Permanently added 'pignode4,' (ECDSA) to the list of known hosts.
  12. pignode8: Warning: Permanently added 'pignode8,' (ECDSA) to the list of known hosts.
  13. pignode7: Warning: Permanently added 'pignode7,' (ECDSA) to the list of known hosts.
  14. pignode9: Warning: Permanently added 'pignode9,' (ECDSA) to the list of known hosts.
  15. pignode12: Warning: Permanently added 'pignode12,' (ECDSA) to the list of known hosts.
  16. pignode5: Warning: Permanently added 'pignode5,' (ECDSA) to the list of known hosts.
  17. pignode10: Warning: Permanently added 'pignode10,' (ECDSA) to the list of known hosts.
  18. pignode6: WARNING: /root/hadoop/logs does not exist. Creating.
  19. pignode11: WARNING: /root/hadoop/logs does not exist. Creating.
  20. pignode4: WARNING: /root/hadoop/logs does not exist. Creating.
  21. pignode12: WARNING: /root/hadoop/logs does not exist. Creating.
  22. pignode5: WARNING: /root/hadoop/logs does not exist. Creating.
  23. pignode10: WARNING: /root/hadoop/logs does not exist. Creating.
  24. Last login: Thu May 11 13:32:30 UTC 2023 on pts/0
  25. pignode3: Warning: Permanently added 'pignode3,' (ECDSA) to the list of known hosts.
  26. pignode3: WARNING: /root/hadoop/logs does not exist. Creating.
  27. [root@pignode1 hadoop]#

        启动后可以通过 yarn rmadmin命令查看resourcemanager的情况:

  1. [root@pignode1 hadoop]# yarn rmadmin -getAllServiceState
  2. pignode1:8033 standby
  3. pignode2:8033 active
  4. pignode3:8033 standby



         7. 启动MapReduce 


        四、Swarm上的Hadoop HA部署


1. 配置文件


  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <!--
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License. See accompanying LICENSE file.
  13. -->
  14. <!-- Put site-specific property overrides in this file. -->
  15. <configuration>
  16. <property>
  17. <!-- 设置名字服务器的URI,高可用配置下是逻辑名,也就是名字服务器群组的名字-->
  18. <name>fs.defaultFS</name>
  19. <value>hdfs://pignamenodecluster</value>
  20. </property>
  21. <property>
  22. <!-- 设置高可用集群的Zookeeper服务器 -->
  23. <name>ha.zookeeper.quorum</name>
  24. <value>zookeeper1:2181,zookeeper2:2181,zookeeper3:2181</value>
  25. </property>
  26. <property>
  27. <!-- 设置root为名字服务器的用户 -->
  28. <name>hadoop.http.staticuser.user</name>
  29. <value>root</value>
  30. </property>
  31. <property>
  32. <!-- 设置hadoop dfs的目录,会被映射到宿主机的对应目录上 -->
  33. <name>hadoop.tmp.dir</name>
  34. <value>/hadoopdata/data</value>
  35. </property>
  36. </configuration>
  37. ~


  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <!--
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License. See accompanying LICENSE file.
  13. -->
  14. <!-- Put site-specific property overrides in this file. -->
  15. <configuration>
  16. <property>
  17. <!-- 设置名字服务器群组 -->
  18. <name>dfs.nameservices</name>
  19. <value>pignamenodecluster</value>
  20. </property>
  21. <property>
  22. <name>dfs.ha.namenodes.pignamenodecluster</name>
  23. <value>pignamenode1,pignamenode2,pignamenode3</value>
  24. </property>
  25. <property><!--设置文件系统副本数 -->
  26. <name>dfs.replication</name>
  27. <value>3</value>
  28. </property>
  29. <!-- 配置namenode和datanode的工作目录-数据存储目录 -->
  30. <property>
  31. <name>dfs.namenode.name.dir</name>
  32. <value>/hadoopdata/hdfs_name</value>
  33. </property>
  34. <property>
  35. <name>dfs.datanode.data.dir</name>
  36. <value>/hadoopdata/hdfs_data</value>
  37. </property>
  38. <!-- 启用webhdfs -->
  39. <property>
  40. <name>dfs.webhdfs.enabled</name>
  41. <value>true</value>
  42. </property>
  43. <property>
  44. <name>dfs.namenode.rpc-address.pignamenodecluster.pignamenode1</name>
  45. <value>pignode1:8020</value>
  46. </property>
  47. <property>
  48. <name>dfs.namenode.rpc-address.pignamenodecluster.pignamenode2</name>
  49. <value>pignode2:8020</value>
  50. </property>
  51. <property>
  52. <name>dfs.namenode.rpc-address.pignamenodecluster.pignamenode3</name>
  53. <value>pignode3:8020</value>
  54. </property>
  55. <property>
  56. <name>dfs.namenode.http-address.pignamenodecluster.pignamenode1</name>
  57. <value></value>
  58. </property>
  59. <property>
  60. <name>dfs.namenode.http-address.pignamenodecluster.pignamenode2</name>
  61. <value></value>
  62. </property>
  63. <property>
  64. <name>dfs.namenode.http-address.pignamenodecluster.pignamenode3</name>
  65. <value></value>
  66. </property>
  67. <property>
  68. <!--设置名字服务器读写编辑条目的JournalNode集群-->
  69. <name>dfs.namenode.shared.edits.dir</name>
  70. <value>qjournal://pignode1:8485;pignode2:8485;pignode3:8485/pignamenodecluster</value>
  71. </property>
  72. <property>
  73. <name>dfs.journalnode.edits.dir</name>
  74. <value>/hadoopdata/journal</value>
  75. </property>
  76. <property>
  77. <name>dfs.ha.fencing.methods</name>
  78. <value>sshfence</value>
  79. </property>
  80. <property>
  81. <name>dfs.ha.fencing.ssh.private-key-files</name>
  82. <value>/root/.ssh/id_rsa</value>
  83. </property>
  84. <property>
  85. <name>dfs.ha.automatic-failover.enabled</name>
  86. <value>true</value>
  87. </property>
  88. <!-- HDFS客户端用来联系活跃名字服务器的Java Class -->
  89. <property>
  90. <name>dfs.client.failover.proxy.provider.pignamenodecluster</name>
  91. <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  92. </property>
  93. </configuration>


  1. <?xml version="1.0"?>
  2. <!--
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License. See accompanying LICENSE file.
  12. -->
  13. <configuration>
  14. <!-- Site specific YARN configuration properties -->
  15. <property>
  16. <name>yarn.resourcemanager.ha.enabled</name>
  17. <value>true</value>
  18. </property>
  19. <property>
  20. <name>yarn.resourcemanager.recovery.enabled</name>
  21. <value>true</value>
  22. </property>
  23. <property><!--设置YARN服务器主机-->
  24. <name>yarn.resourcemanager.cluster-id</name>
  25. <value>pignode-ha</value>
  26. </property>
  27. <property>
  28. <name>yarn.resourcemanager.ha.rm-ids</name>
  29. <value>pigresourcemanager1,pigresourcemanager2,pigresourcemanager3</value>
  30. </property>
  31. <property>
  32. <name>yarn.resourcemanager.hostname.pigresourcemanager1</name>
  33. <value>pignode1</value>
  34. </property>
  35. <property>
  36. <name>yarn.resourcemanager.hostname.pigresourcemanager2</name>
  37. <value>pignode2</value>
  38. </property>
  39. <property>
  40. <name>yarn.resourcemanager.hostname.pigresourcemanager3</name>
  41. <value>pignode3</value>
  42. </property>
  43. <property>
  44. <name>yarn.resourcemanager.webapp.address.pigresourcemanager1</name>
  45. <value></value>
  46. </property>
  47. <property>
  48. <name>yarn.resourcemanager.webapp.address.pigresourcemanager2</name>
  49. <value></value>
  50. </property>
  51. <property>
  52. <name>yarn.resourcemanager.webapp.address.pigresourcemanager3</name>
  53. <value></value>
  54. </property>
  55. <property>
  56. <name>yarn.nodemanager.aux-services</name>
  57. <value>mapreduce_shuffle</value>
  58. </property>
  59. <property>
  60. <name>yarn.nodemanager.env-whitelist</name>
  62. </property>
  63. <property>
  64. <name>yarn.resourcemanager.stored.class</name>
  65. <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  66. </property>
  67. <property>
  68. <name>yarn.resourcemanager.zk-address</name>
  69. <value>zookeeper1:2181,zookeeper2:2181,zookeeper3:2181</value>
  70. </property>
  71. </configuration>


  1. <?xml version="1.0"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <!--
  4. Licensed under the Apache License, Version 2.0 (the "License");
  5. you may not use this file except in compliance with the License.
  6. You may obtain a copy of the License at
  7. http://www.apache.org/licenses/LICENSE-2.0
  8. Unless required by applicable law or agreed to in writing, software
  9. distributed under the License is distributed on an "AS IS" BASIS,
  10. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. See the License for the specific language governing permissions and
  12. limitations under the License. See accompanying LICENSE file.
  13. -->
  14. <!-- Put site-specific property overrides in this file. -->
  15. <configuration>
  16. <property><!--指定MapReduce使用的集群框架(YARN)-->
  17. <name>mapreduce.framework.name</name>
  18. <value>yarn</value>
  19. </property>
  20. <property><!--设置Job History Server的地址-->
  21. <name>mapreduce.jobhistory.address</name>
  22. <value>pignode3:10020</value>
  23. </property>
  24. <property><!--设置Job History Server的网络接口-->
  25. <name>mapreduce.jobhistory.webapp.address</name>
  26. <value></value>
  27. </property>
  28. <property>
  29. <name>mapreduce.application.classpath</name>
  30. <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
  31. </property>
  32. </configuration>

        2. 启动初始化脚本



  1. #! /bin/bash
  2. index=1
  3. rm /hadoopdata/* -rf
  4. while(($index<=12));do
  5. file="/hadoopdata/${index}"
  6. mkdir $file
  7. mkdir ${file}/data
  8. mkdir ${file}/hdfs_name
  9. mkdir ${file}/hdfs_data
  10. mkdir ${file}/journal
  11. let "index++"
  12. done
  13. index=1
  14. while(($index<=3));do
  15. file="/hadoopdata/zoo/${index}"
  16. mkdir ${file}/data -p
  17. mkdir ${file}/datalog -p
  18. mkdir ${file}/logs -p
  19. let "index++"
  20. done



  1. #! /bin/bash
  2. # the NODE_COUNT param set by swarm config yml file, using endpoint_environment flag.
  4. TRYLOOP=50
  6. ############################################################################################################
  7. ## 1. get enviorenment param
  8. ############################################################################################################
  9. source /etc/profile
  10. source /root/.bashrc
  11. ############################################################################################################
  12. ## 2. for every node, init sshd service
  13. ############################################################################################################
  14. /sbin/sshd -D &
  15. ############################################################################################################
  16. ## 3. define functions
  17. ############################################################################################################
  18. #FUNCTION:to test all the nodes can be connected------------------------------------------------------------
  19. #param1: node's hostname prefix
  20. #param2: node count
  21. #param3: how many times the manager node try connect
  22. isAllNodesConnected(){
  26. tryloop=0
  27. ind=1
  28. #init pignode hostname array,and pignode status array
  29. while(( $ind <= $PIGNODE_COUNT ))
  30. do
  31. pignodes[$ind]="$PIGNODE_PRENAME$ind"
  32. pignodes_stat[$ind]=0
  33. let "ind++"
  34. done
  35. #check wether all the pignodes can be connected
  36. noactivecount=$PIGNODE_COUNT
  37. while(( $noactivecount > 0 ))
  38. do
  39. noactivecount=$PIGNODE_COUNT
  40. ind=1
  41. while(( $ind <= $PIGNODE_COUNT ))
  42. do
  43. if (( ${pignodes_stat[$ind]}==0 ))
  44. then
  45. ping -c 1 ${pignodes[$ind]} > /dev/null
  46. if (($?==0))
  47. then
  48. pignodes_stat[$ind]=1
  49. let "noactivecount-=1"
  50. echo "Try to connect ${pignodes[$ind]}:successed." >>init.log
  51. else
  52. echo "Try to connect ${pignodes[$ind]}: failed." >>init.log
  53. fi
  54. else
  55. let "noactivecount-=1"
  56. fi
  57. let "ind++"
  58. done
  59. if (( ${noactivecount}>0 ))
  60. then
  61. let "tryloop++"
  62. if (($tryloop>$TRYLOOP_COUNT))
  63. then
  64. echo "ERROR Tried ${TRYLOOP_COUNT} loops. ${noactivecount} nodes failed, exit." >>init.log
  65. break;
  66. fi
  67. echo "${noactivecount} left for ${PIGNODE_COUNT} nodes not connected, waiting for next try">>init.log
  68. sleep 5
  69. else
  70. echo "All nodes are connected.">>init.log
  71. fi
  72. done
  73. return $noactivecount
  74. }
  75. #----------------------------------------------------------------------------------------------------------
  76. #FUNCTION:get the hadoop data directory--------------------------------------------------------------------
  77. getDataDirectory(){
  78. #when use tmp data directory
  79. # configfiledir=`echo "${HADOOP_HOME}/etc/hadoop/core-site.xml"`
  80. # datadir=`cat ${configfiledir} | grep -A 2 'hadoop.tmp.dir' | grep '<value>' | sed 's/^[[:blank:]]*<value>//g' | sed 's/<\/value>$//g'`
  81. # echo $datadir
  82. #when use namenode.name.dir direcotry
  83. datadir=`cat ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml|grep -A 2 "dfs.namenode.name.dir"|grep "<value>"|sed -e "s/<value>//g"|sed -e "s/<\/value>//g"`
  84. echo $datadir
  85. }
  86. #---------------------------------------------------------------------------------------------------------
  87. #FUNCTION:init hadoop while dfs not formatted.------------------------------------------------------------
  88. initHadoop_format(){
  89. #init journalnode
  90. echo 'start all Journalnode' >> init.log
  91. journallist=`cat $HADOOP_HOME/etc/hadoop/hdfs-site.xml |grep -A 2 'dfs.namenode.shared.edits.dir'|grep '<value>'|sed -e "s/<value>qjournal:\/\/\(.*\)\/.*<\/value>/\1/g"|sed "s/;/ /g"|sed -e "s/:[[:digit:]]\{2,5\}/ /g"`
  92. for journalnode in $journallist;do
  93. ssh root@${journalnode} "hdfs --daemon start journalnode"
  94. done
  95. #format and start the main namenode
  96. echo 'format and start namenode 1'>>init.log
  97. hdfs namenode -format
  98. if (( $?!=0 )); then
  99. exit $?
  100. fi
  101. hdfs --daemon start namenode
  102. if (( $?!=0 )); then
  103. exit $?
  104. fi
  105. #sync and start other namenodes
  106. echo 'sync and start others.'>>init.log
  107. dosyncid=2
  108. while (($dosyncid<=3));do
  109. ssh root@$nodehostnameprefix$dosyncid "hdfs namenode -bootstrapStandby"
  110. if (( $?!=0 )); then
  111. exit $?
  112. fi
  113. ssh root@$nodehostnameprefix$dosyncid "hdfs --daemon start namenode"
  114. if (( $?!=0 )); then
  115. exit $?
  116. fi
  117. let "dosyncid++"
  118. done
  119. #format zookeeper directory
  120. hdfs zkfc -formatZK
  121. }
  122. #---------------------------------------------------------------------------------------------------------
  123. #FUNCTION:init hadoop while dfs formatted-----------------------------------------------------------------
  124. initHadoop_noformat(){
  125. echo 'name node formatted. go on to start dfs related nodes and service'>>init.log
  126. sbin/start-dfs.sh
  127. if (( $?!=0 )); then
  128. exit $?
  129. fi
  130. echo 'start yarn resourcemanager and node manager'>>init.log
  131. sbin/start-yarn.sh
  132. if (( $?!=0 )); then
  133. exit $?
  134. fi
  135. echo 'start mapreduce history server'>>init.log
  136. historyservernode=`cat $HADOOP_HOME/etc/hadoop/mapred-site.xml |grep -A 2 'mapreduce.jobhistory.address'|grep '<value>' |sed -e "s/^.*<value>//g"|sed -e "s/<\/value>//g"|sed -e "s/:[[:digit:]]*//g"`
  137. ssh root@$historyservernode "mapred --daemon start historyserver"
  138. if (( $?!=0 )); then
  139. exit $?
  140. fi
  141. }
  142. ############################################################################################################
  143. ## 4. test wether this is the main node ##
  144. ############################################################################################################
  145. #get the host node's name, name prefix, and name No.
  146. nodehostname=`hostname`
  147. nodehostnameprefix=`echo $nodehostname|sed -e 's|[[:digit:]]\+$||g'`
  148. nodeindex=`hostname | sed "s/${nodehostnameprefix}//g"`
  149. #get the zookeeper's name prefix from yarn-site.xml
  150. zookeepernameprefix=`cat ${HADOOP_HOME}/etc/hadoop/yarn-site.xml |grep -A 2 '<name>yarn.resourcemanager.zk-address</name>'|grep '<value>'|sed -e "s/[[:blank:]]\+<value>\([[:alpha:]]\+\)[[:digit:]]\+:.*/\1/g"`
  151. #1.ensure in working directory, only the first node can go on initiation.
  152. cd $HADOOP_HOME
  153. #check the NODECOUNT param,if it is less than 3, do notion and return err for 3 node can not support ha mode.
  154. if (($NODECOUNT<=3));then
  155. echo "Nodes count must more than 3.">>init.log
  156. exit 1
  157. fi
  158. #check node id,if node id not equal 1, do nothing.
  159. if (($nodeindex!=1));then
  160. echo $nodehostname waiting for init...>>init.log
  161. sleep 5m
  162. cd $HADOOP_HOME
  163. sbin/start-dfs.sh
  164. sbin/start-yarn.sh
  165. if (($nodeindex==3));then
  166. mapred --daemon start historyserver
  167. fi
  168. tail -f /dev/null
  169. exit 0
  170. fi
  171. #2.Try to connect to all host nodes and zookeeper nodes.
  172. echo $nodehostname is the init manager nodes...>>init.log
  173. #waiting for all the nodes connected
  174. isAllNodesConnected $nodehostnameprefix $NODECOUNT $TRYLOOP
  175. isHadoopOK=$?
  176. isAllNodesConnected $zookeepernameprefix $ZOOKEEPERNODECOUNT $TRYLOOP
  177. isZookeeperOK=$?
  178. if ([ $isHadoopOK != 0 ] || [ $isZookeeperOK != 0 ]);then
  179. echo "Not all the host nodes or not all the zookeeper nodes actived. exit 1">>init.log
  180. exit 0
  181. fi
  182. #3. whether dfs is formatted.
  183. datadirectory=`echo $(getDataDirectory)`
  184. if [ $datadirectory ];then
  185. datadircontent=`ls -A ${datadirectory}`
  186. if [ -z $datadircontent ];then
  187. echo "dfs is not formatted.">>init.log
  188. isDfsFormat=0
  189. else
  190. echo "dfs is already formatted.">>init.log
  191. isDfsFormat=1
  192. fi
  193. else
  194. echo "ERROR:Can not get hadoop tmp data directory.init can not be done. ">>init.log
  195. exit 1
  196. fi
  197. #4. if not fomatted, then do format and sync
  198. if (( $isDfsFormat == 0 ));then
  199. initHadoop_format
  200. fi
  201. if (( $? != 0 ));then
  202. echo "ERROR:Init Hadoop interruptted...">>init.log
  203. exit $?
  204. fi
  205. #5. start all dfs node, yarn node and mapreduce history server
  206. initHadoop_noformat
  207. if (( $? != 0 ));then
  208. echo "ERROR:Init Hadoop interruptted...">>init.log
  209. exit $?
  210. fi
  211. echo "hadoop init work has been done. hang up for swarm."
  212. tail -f /dev/null

        3. stack配置文件

  1. version: "3.7"
  2. services:
  3. pignode1:
  4. image: pig/hadoop:ha
  5. deploy:
  6. endpoint_mode: dnsrr
  7. restart_policy:
  8. condition: on-failure
  9. placement:
  10. constraints:
  11. - node.hostname==pighost1
  12. hostname: pignode1
  13. environment:
  14. - NODE_COUNT=12
  16. networks:
  17. - pig
  18. ports:
  19. - target: 22
  20. published: 9011
  21. protocol: tcp
  22. mode: host
  23. - target: 9000
  24. published: 9000
  25. protocol: tcp
  26. mode: host
  27. - target: 9870
  28. published: 9870
  29. protocol: tcp
  30. mode: host
  31. - target: 8088
  32. published: 8088
  33. protocol: tcp
  34. mode: host
  35. volumes:
  36. # 映射xml配置文件
  37. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  38. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  39. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  40. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  41. # 映射workers文件
  42. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  43. # 映射数据目录
  44. - /hadoopdata/1:/hadoopdata:wr
  45. pignode2:
  46. image: pig/hadoop:ha
  47. deploy:
  48. endpoint_mode: dnsrr
  49. restart_policy:
  50. condition: on-failure
  51. placement:
  52. # 将Second Namenode限制部署在第二个节点上
  53. constraints:
  54. - node.hostname==pighost2
  55. networks:
  56. - pig
  57. hostname: pignode2
  58. ports:
  59. # 第二名字服务器接口
  60. - target: 22
  61. published: 9012
  62. protocol: tcp
  63. mode: host
  64. - target: 9890
  65. published: 9890
  66. protocol: tcp
  67. mode: host
  68. - target: 9870
  69. published: 9871
  70. protocol: tcp
  71. mode: host
  72. - target: 8088
  73. published: 8089
  74. protocol: tcp
  75. mode: host
  76. volumes:
  77. # 映射xml配置文件
  78. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  79. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  80. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  81. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  82. # 映射workers文件
  83. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  84. # 映射数据目录
  85. - /hadoopdata/2:/hadoopdata:wr
  86. pignode3:
  87. image: pig/hadoop:ha
  88. deploy:
  89. endpoint_mode: dnsrr
  90. restart_policy:
  91. condition: on-failure
  92. placement:
  93. # 将Mapreduce限制部署在第三个节点上
  94. constraints:
  95. - node.hostname==pighost3
  96. networks:
  97. - pig
  98. hostname: pignode3
  99. ports:
  100. - target: 22
  101. published: 9013
  102. protocol: tcp
  103. mode: host
  104. - target: 9870
  105. published: 9872
  106. protocol: tcp
  107. mode: host
  108. - target: 8088
  109. published: 8087
  110. protocol: tcp
  111. mode: host
  112. - target: 8090
  113. published: 8090
  114. protocol: tcp
  115. mode: host
  116. - target: 10020
  117. published: 10020
  118. protocol: tcp
  119. mode: host
  120. - target: 19888
  121. published: 19888
  122. protocol: tcp
  123. mode: host
  124. volumes:
  125. # 映射xml配置文件
  126. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  127. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  128. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  129. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  130. # 映射workers文件
  131. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  132. # 映射数据目录
  133. - /hadoopdata/3:/hadoopdata:wr
  134. #------------------------------------------------------------------------------------------------
  135. #以下均为工作节点,可在除leader以外的主机上部署
  136. pignode4:
  137. image: pig/hadoop:ha
  138. deploy:
  139. endpoint_mode: dnsrr
  140. restart_policy:
  141. condition: on-failure
  142. placement:
  143. # 将Mapreduce限制部署在第三个节点上
  144. constraints:
  145. # node.role==manager
  146. # node.role==worker
  147. - node.hostname==pighost3
  148. networks:
  149. - pig
  150. ports:
  151. - target: 22
  152. published: 9014
  153. protocol: tcp
  154. mode: host
  155. hostname: pignode4
  156. volumes:
  157. # 映射xml配置文件
  158. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  159. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  160. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  161. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  162. # 映射workers文件
  163. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  164. # 映射数据目录
  165. - /hadoopdata/4:/hadoopdata:wr
  166. pignode5:
  167. image: pig/hadoop:ha
  168. deploy:
  169. endpoint_mode: dnsrr
  170. restart_policy:
  171. condition: on-failure
  172. placement:
  173. # 将Mapreduce限制部署在第三个节点上
  174. constraints:
  175. # node.role==manager
  176. - node.hostname==pighost3
  177. networks:
  178. - pig
  179. ports:
  180. - target: 22
  181. published: 9015
  182. protocol: tcp
  183. mode: host
  184. hostname: pignode5
  185. volumes:
  186. # 映射xml配置文件
  187. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  188. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  189. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  190. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  191. # 映射workers文件
  192. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  193. # 映射数据目录
  194. - /hadoopdata/5:/hadoopdata:wr
  195. pignode6:
  196. image: pig/hadoop:ha
  197. deploy:
  198. endpoint_mode: dnsrr
  199. restart_policy:
  200. condition: on-failure
  201. placement:
  202. # 将Mapreduce限制部署在第三个节点上
  203. constraints:
  204. # node.role==manager
  205. - node.hostname==pighost3
  206. networks:
  207. - pig
  208. ports:
  209. - target: 22
  210. published: 9016
  211. protocol: tcp
  212. mode: host
  213. hostname: pignode6
  214. volumes:
  215. # 映射xml配置文件
  216. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  217. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  218. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  219. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  220. # 映射workers文件
  221. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  222. # 映射数据目录
  223. - /hadoopdata/6:/hadoopdata:wr
  224. pignode7:
  225. image: pig/hadoop:ha
  226. deploy:
  227. endpoint_mode: dnsrr
  228. restart_policy:
  229. condition: on-failure
  230. placement:
  231. # 将Mapreduce限制部署在第三个节点上
  232. constraints:
  233. # node.role==manager
  234. - node.hostname==pighost4
  235. networks:
  236. - pig
  237. ports:
  238. - target: 22
  239. published: 9017
  240. protocol: tcp
  241. mode: host
  242. hostname: pignode7
  243. volumes:
  244. # 映射xml配置文件
  245. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  246. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  247. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  248. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  249. # 映射workers文件
  250. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  251. # 映射数据目录
  252. - /hadoopdata/7:/hadoopdata:wr
  253. pignode8:
  254. image: pig/hadoop:ha
  255. deploy:
  256. endpoint_mode: dnsrr
  257. restart_policy:
  258. condition: on-failure
  259. placement:
  260. # 将Mapreduce限制部署在第三个节点上
  261. constraints:
  262. # node.role==manager
  263. - node.hostname==pighost4
  264. networks:
  265. - pig
  266. ports:
  267. - target: 22
  268. published: 9018
  269. protocol: tcp
  270. mode: host
  271. hostname: pignode8
  272. volumes:
  273. # 映射xml配置文件
  274. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  275. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  276. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  277. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  278. # 映射workers文件
  279. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  280. # 映射数据目录
  281. - /hadoopdata/8:/hadoopdata:wr
  282. pignode9:
  283. image: pig/hadoop:ha
  284. deploy:
  285. endpoint_mode: dnsrr
  286. restart_policy:
  287. condition: on-failure
  288. placement:
  289. # 将Mapreduce限制部署在第三个节点上
  290. constraints:
  291. # node.role==manager
  292. - node.hostname==pighost4
  293. networks:
  294. - pig
  295. ports:
  296. - target: 22
  297. published: 9019
  298. protocol: tcp
  299. mode: host
  300. hostname: pignode9
  301. volumes:
  302. # 映射xml配置文件
  303. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  304. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  305. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  306. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  307. # 映射workers文件
  308. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  309. # 映射数据目录
  310. - /hadoopdata/9:/hadoopdata:wr
  311. pignode10:
  312. image: pig/hadoop:ha
  313. deploy:
  314. endpoint_mode: dnsrr
  315. restart_policy:
  316. condition: on-failure
  317. placement:
  318. # 将Mapreduce限制部署在第三个节点上
  319. constraints:
  320. # node.role==manager
  321. - node.hostname==pighost5
  322. networks:
  323. - pig
  324. ports:
  325. - target: 22
  326. published: 9020
  327. protocol: tcp
  328. mode: host
  329. hostname: pignode10
  330. volumes:
  331. # 映射xml配置文件
  332. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  333. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  334. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  335. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  336. # 映射workers文件
  337. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  338. # 映射数据目录
  339. - /hadoopdata/10:/hadoopdata:wr
  340. pignode11:
  341. image: pig/hadoop:ha
  342. deploy:
  343. endpoint_mode: dnsrr
  344. restart_policy:
  345. condition: on-failure
  346. placement:
  347. # 将Mapreduce限制部署在第三个节点上
  348. constraints:
  349. # node.role==manager
  350. - node.hostname==pighost5
  351. networks:
  352. - pig
  353. ports:
  354. - target: 22
  355. published: 9021
  356. protocol: tcp
  357. mode: host
  358. hostname: pignode11
  359. volumes:
  360. # 映射xml配置文件
  361. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  362. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  363. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  364. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  365. # 映射workers文件
  366. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  367. # 映射数据目录
  368. - /hadoopdata/11:/hadoopdata:wr
  369. pignode12:
  370. image: pig/hadoop:ha
  371. deploy:
  372. endpoint_mode: dnsrr
  373. restart_policy:
  374. condition: on-failure
  375. placement:
  376. # 将Mapreduce限制部署在第三个节点上
  377. constraints:
  378. # node.role==manager
  379. - node.hostname==pighost5
  380. networks:
  381. - pig
  382. ports:
  383. - target: 22
  384. published: 9022
  385. protocol: tcp
  386. mode: host
  387. hostname: pignode12
  388. volumes:
  389. # 映射xml配置文件
  390. - ./config/core-site.xml:/root/hadoop/etc/hadoop/core-site.xml:r
  391. - ./config/hdfs-site.xml:/root/hadoop/etc/hadoop/hdfs-site.xml:r
  392. - ./config/yarn-site.xml:/root/hadoop/etc/hadoop/yarn-site.xml:r
  393. - ./config/mapred-site.xml:/root/hadoop/etc/hadoop/mapred-site.xml:r
  394. # 映射workers文件
  395. - ./config/workers:/root/hadoop/etc/hadoop/workers:r
  396. # 映射数据目录
  397. - /hadoopdata/12:/hadoopdata:wr
  398. zookeeper1:
  399. image: zookeeper:latest
  400. deploy:
  401. endpoint_mode: dnsrr
  402. restart_policy:
  403. condition: on-failure
  404. placement:
  405. constraints:
  406. - node.hostname==pighost1
  407. networks:
  408. - pig
  409. ports:
  410. - target: 2181
  411. published: 2181
  412. protocol: tcp
  413. mode: host
  414. hostname: zookeeper1
  415. environment:
  416. - ZOO_MY_ID=1
  417. - ZOO_SERVERS=server.1=zookeeper1:2888:3888;2181 server.2=zookeeper2:2888:3888;2181 server.3=zookeeper3:2888:3888;2181
  418. volumes:
  419. - /hadoopdata/zoo/1/data:/data
  420. - /hadoopdata/zoo/1/datalog:/datalog
  421. - /hadoopdata/zoo/1/logs:/logs
  422. zookeeper2:
  423. image: zookeeper:latest
  424. deploy:
  425. endpoint_mode: dnsrr
  426. restart_policy:
  427. condition: on-failure
  428. placement:
  429. constraints:
  430. - node.hostname==pighost2
  431. networks:
  432. - pig
  433. ports:
  434. - target: 2181
  435. published: 2182
  436. protocol: tcp
  437. mode: host
  438. hostname: zookeeper2
  439. environment:
  440. - ZOO_MY_ID=2
  441. - ZOO_SERVERS=server.1=zookeeper1:2888:3888;2181 server.2=zookeeper2:2888:3888;2181 server.3=zookeeper3:2888:3888;2181
  442. volumes:
  443. - /hadoopdata/zoo/2/data:/data
  444. - /hadoopdata/zoo/2/datalog:/datalog
  445. - /hadoopdata/zoo/2/logs:/logs
  446. zookeeper3:
  447. image: zookeeper:latest
  448. deploy:
  449. endpoint_mode: dnsrr
  450. restart_policy:
  451. condition: on-failure
  452. placement:
  453. constraints:
  454. - node.hostname==pighost3
  455. networks:
  456. - pig
  457. ports:
  458. - target: 2181
  459. published: 2183
  460. protocol: tcp
  461. mode: host
  462. hostname: zookeeper3
  463. environment:
  464. - ZOO_MY_ID=3
  465. - ZOO_SERVERS=server.1=zookeeper1:2888:3888;2181 server.2=zookeeper2:2888:3888;2181 server.3=zookeeper3:2888:3888;2181
  466. volumes:
  467. - /hadoopdata/zoo/3/data:/data
  468. - /hadoopdata/zoo/3/datalog:/datalog
  469. - /hadoopdata/zoo/3/logs:/logs
  470. networks:
  471. pig:


