赞
踩
系统环境: Centos7
管理结点: 1个 (192.168.76.130)hostname: master
计算节点: 1个 (192.168.76.131)hostname: slave1
查看hostname
hostnamectl status
配置master机hostname
hostnamectl set-hostname master
配置slave1机hostname
hostnamectl set-hostname slave1
vi /etc/sysconfig/selinux
# SELINUX=disable
reboot
getenforce
systemctl stop firewalld.service
systemctl disable firewalld.service
vi /etc/hosts
添加映射关系:
192.168.76.133 master
192.168.76.132 slave1
export MUNGEUSER=991 && groupadd -g $MUNGEUSER munge
useradd -m -c "MUNGE Uid 'N' Gid Emporium" -d /var/lib/munge -u $MUNGEUSER -g munge -s /sbin/nologin munge
export SLURMUSER=992 && groupadd -g $SLURMUSER slurm
useradd -m -c "SLURM workload manager" -d /var/lib/slurm -u $SLURMUSER -g slurm -s /bin/bash slurm
注:uid和gid可以根据情况自行确定,但要保证集群中的各结点uid和gid一致。
yum install epel-release openssh-clients -y
yum install munge munge-libs munge-devel -y
yum install rng-tools -y
rngd -r /dev/urandom
/usr/sbin/create-munge-key -r
dd if=/dev/urandom bs=1 count=1024 > /etc/munge/munge.key
chown munge: /etc/munge/munge.key && chmod 400 /etc/munge/munge.key
scp /etc/munge/munge.key root@192.168.76.132:/etc/munge/
chown -R munge: /etc/munge/ /var/log/munge/ && chmod 0700 /etc/munge/ /var/log/munge/
systemctl enable munge
systemctl start munge
systemctl status munge
yum install openssl openssl-devel pam-devel numactl numactl-devel hwloc hwloc-devel lua lua-devel readline-devel rrdtool-devel ncurses-devel man2html libibmad libibumad -y
yum install python3-pip perl-ExtUtils-MakeMaker gcc rpm-build mysql-devel json-c json-c-devel http-parser http-parser-devel -y
# 安装下载工具
yum install wget -y
cd /usr/local/
wget https://download.schedmd.com/slurm/slurm-20.11.9.tar.bz2
rpmbuild -ta --with mysql slurm-20.11.9.tar.bz2
cd /root/rpmbuild/RPMS/x86_64
yum localinstall slurm-*.rpm -y
cp /etc/slurm/slurm.conf.example /etc/slurm/slurm.conf
cp /etc/slurm/slurmdbd.conf.example /etc/slurm/slurmdbd.conf
cp /etc/slurm/cgroup.conf.example /etc/slurm/cgroup.conf
修改/etc/slurm/slurm.conf
vi /etc/slurm/slurm.conf
# 注意,有两个路径要格外注意修改
...
修改/etc/slurm/slurmdbd.conf
vi /etc/slurm/slurmdbd.conf
...
通过scp发动到各个计算节点
scp -p /etc/slurm/slurm.conf root@192.168.76.132:/etc/slurm/
scp -p /etc/slurm/slurmdbd.conf root@192.168.76.132:/etc/slurm/
scp -p /etc/slurm/cgroup.conf root@192.168.76.132:/etc/slurm/
mkdir /var/spool/slurmctld && chown slurm: /var/spool/slurmctld && chmod 755 /var/spool/slurmctld
mkdir /var/log/slurm && touch /var/log/slurm/slurmctld.log && chown slurm: /var/log/slurm/slurmctld.log
touch /var/log/slurm/slurm_jobacct.log /var/log/slurm/slurm_jobcomp.log && chown slurm: /var/log/slurm/slurm_jobacct.log /var/log/slurm/slurm_jobcomp.log
chown slurm: /etc/slurm/slurmdbd.conf
chmod 600 /etc/slurm/slurmdbd.conf
touch /var/log/slurm/slurmdbd.log
chown slurm: /var/log/slurm/slurmdbd.log
mkdir /var/spool/slurmd && chown slurm: /var/spool/slurmd && chmod 755 /var/spool/slurmd
mkdir /var/log/slurm && touch /var/log/slurm/slurmd.log && chown slurm: /var/log/slurm/slurmd.log
# 启动slurmdbd
systemctl enable slurmdbd.service
systemctl start slurmdbd.service
systemctl status slurmdbd.service
# 启动slurmctld
systemctl enable slurmctld.service
systemctl start slurmctld.service
systemctl status slurmctld.service
systemctl enable slurmd.service
systemctl start slurmd.service
systemctl status slurmd.service
systemctl start munge
systemctl status munge
systemctl enable munge
us slurmctld.service
#### d. 在slave上执行
```bash
systemctl enable slurmd.service
systemctl start slurmd.service
systemctl status slurmd.service
systemctl start munge
systemctl status munge
systemctl enable munge
至此结束
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。