Linux并行集群的搭建(2)


17. 配置NIS客户端,在所有计算节点上安装ypbind,RHEL默认已经安装
[root@admin~]# for i in 1 2; do ssh node$i auth-config --enable-nis --nisdomain=linuxidcyf.com \
--nisserver=admin --update; done
18.验证NIS服务配置是否正确
[root@node1~]#ypcat passwd
linuxidc:$1$tsPKQvPP$Kwom9qG/DNR1w/Lq./cQV.:500:500::/home/linuxidc:/bin/bash
[root@admin ~]#for i in 1 2; do ssh node$i id linuxidc; done
uid=500(linuxidc) gid=500(linuxidc) groups=500(linuxidc)
uid=500(linuxidc) gid=500(linuxidc) groups=500(linuxidc)
有上面输出可知,NIS服务配置正确
 
二:安装和配置torque(管理节点)
1.首先安装openmpi
[root@adminparallel]#tar xjvf openmpi-1.8.1.tar.bz2 -C /usr/local/src/
[root@adminparallel]#cd /usr/local/src/openmpi-1.8.1/
[root@adminopenmpi-1.8.1]#./configure --prefix=/share/apps/openmpi
[root@adminopenmpi-1.8.1]#make
[root@adminopenmpi-1.8.1]#make install
[root@adminopenmpi-1.8.1]#cp -r examples/ /share/apps/openmpi


2.添加环境变量,在/share/scripts目录先建立了一个Path.sh,以后也方便计算节点添加环境变量
[root@adminscripts]#pwd
/share/scripts
[root@adminscripts]#cat Path.sh
#!/bin/bash
grep openmpi /etc/bashrc || cat >>/etc/bashrc <<EOF
export PATH=/share/apps/openmpi/bin:\$PATH
export LD_LIBRARY_PATH=/share/apps/openmpi/lib:\$LD_LIBRARY_PATH
EOF
[root@adminscripts]#
[root@adminscripts]#sh Path.sh
[root@adminscripts]#source /etc/bashrc


3.测试openmpi是否安装成功
[root@adminscripts]#which mpirun
/share/apps/openmpi/bin/mpirun
[root@adminscriptss]#which mpiexec
/share/apps/openmpi/bin/mpiexec


4.安装torque
[root@adminparallel]#tar xzvf torque-3.0.6.tar.gz -C /share/source/
[root@adminparallel]#cd /share/source/torque-3.0.6/
[root@admintorque-3.0.6]#./configure  --enable-syslog --enable-nvidia-gpus --enable-cpuset --disable-gui --with-rcp=scp --with-sendmail
[root@admintorque-3.0.6]#make
[root@admintorque-3.0.6]#make install
[root@admintorque-3.0.6]#pwd
/share/source/torque-3.0.6
[root@admintorque-3.0.6]#cat install.sh
cd /share/source/torque-3.0.6
make install
[root@admintorque-3.0.6]#


5.初始化torque创建默认队列
[root@admintorque-3.0.6]#./torque.setup root
initializingTORQUE(admin: root@admin)
PBS_Server admin:Create mode and server database exists,
do you wishtocontinue y/(n)?y
root    26351    1  0 06:44?        00:00:00 pbs_server -t create
Max openservers:10239
Max openservers:10239
[root@admintorque-3.0.6]#


6.查看创建的默认队列batch
[root@admintorque-3.0.6]#qmgr -c "p s"
#
# Create queues and set their attributes.
#
#
# Create and define queue batch
#
create queue batch
set queue batch queue_type = Execution
set queue batch resources_default.nodes = 1
set queue batch resources_default.walltime= 01:00:00
set queue batch enabled = True
set queue batch started = True
#
# Set server attributes.
#
set server scheduling = True
set server acl_hosts = admin
set server admins= root@admin
set server operators = root@admin
set server default_queue = batch
set server log_events = 511
set server mail_from = adm
set server scheduler_iteration = 600
set server node_check_rate = 150
set server tcp_timeout = 6
set server mom_job_sync = True
set server keep_completed = 300
[root@admintorque-3.0.6]#

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/3d48d476a41627ab24aebc9a71d239bc.html