7.更改队列batch部分属性,以满足实际需求
[root@admintorque-3.0.6]#qmgr -c "s q batch resources_default.walltime=24:00:00"
[root@admintorque-3.0.6]#qmgr -c "s s query_other_jobs=true"
8.建立mom配置文件,用于复制到所有计算节点
[root@adminmom_priv]#pwd
/var/spool/torque/mom_priv
[root@adminmom_priv]#cat config
$pbsserver admin
$logevent 225
9.创建节点信息文件
[root@adminserver_priv]#pwd
/var/spool/torque/server_priv
[root@adminserver_priv]#cat nodes
node1
node2
[root@adminserver_priv]#
10.查看目前节点信息均为down状态
[root@adminserver_priv]#pbsnodes -a
node1
state = down
np = 1
ntype = cluster
mom_service_port = 15002
mom_admin_port = 15003
gpus = 0
node2
state = down
np = 1
ntype = cluster
mom_service_port = 15002
mom_admin_port = 15003
gpus = 0
[root@adminserver_priv]#
11.复制pbs_server启动脚本,并设置开机自动启动
[root@admintorque-3.0.6]#pwd
/share/apps/torque-3.0.6
[root@admintorque-3.0.6]#cp contrib/init.d/pbs_server /etc/init.d/
[root@admintorque-3.0.6]#chmod 755 /etc/init.d/pbs_server
[root@admintorque-3.0.6]#chkconfig pbs_server on
12.复制pbs_mom脚本,方便复制到计算节点
[root@admintorque-3.0.6]#cp contrib/init.d/pbs_mom /etc/init.d/
13.安装maui
[root@adminparallel]#tar xzvf maui-3.3.1.tar.gz -C /usr/local/src/
[root@admin ~]#cd /usr/local/src/maui-3.3.1/
[root@adminmaui-3.3.1]#./configure --prefix=/usr/local/maui --with-pbs=/usr/local
[root@adminmaui-3.3.1]#make
[root@adminmaui-3.3.1]#make install
14.复制maui启动脚本,设置正确路径,并设置为开机启动
[root@adminmaui-3.3.1]#cp etc/maui.d /etc/init.d/mauid
[root@adminmaui-3.3.1]#vi /etc/init.d/mauid
更改MAUI_PREFIX=/opt/maui为MAUI_PREFIX=/usr/local/maui
[root@adminmaui-3.3.1]#chmod 755 /etc/init.d/mauid
[root@adminmaui-3.3.1]#chkconfig mauid on
15.启动maui调度服务
[root@adminmaui-3.3.1]#/etc/init.d/mauid start
StartingMAUIScheduler: [ OK ]
[root@adminmaui-3.3.1]#
16.添加maui命令环境变量
[root@adminmaui-3.3.1]#vi /etc/bashrc
export PATH=/share/apps/openmpi/bin:/usr/local/maui/bin:$PATH
[root@adminmaui-3.3.1]#source /etc/bashrc
17.安装并行软件到共享目录
[root@adminnamd]#tar xzvf NAMD_2.9_Linux-x86_64-multicore.tar.gz -C /share/apps/
[root@adminnamd]#tar xzvf apoa1.tar.gz -C /share/apps/
[root@adminapps]#pwd
/share/apps
[root@adminapps]#mv NAMD_2.9_Linux-x86_64-multicore/ namd
18.添加namd命令环境变量,同时也添加到Path.sh方便计算节点添加环境变量
[root@adminmaui-3.3.1]#vi /etc/bashrc
export PATH=/share/apps/openmpi/bin:/usr/local/maui/bin:/share/apps/namd:$PATH
[root@adminmaui-3.3.1]#source /etc/bashrc
[root@adminscripts]#which namd2
/share/apps/namd/namd2
[root@adminscripts]#cat Path.sh
#!/bin/bash
grep openmpi /etc/bashrc || cat >>/etc/bashrc <<EOF
export PATH=/share/apps/openmpi/bin:/share/apps/namd:\$PATH
EOF
[root@adminscripts]#
至此管理端配置完成
三:计算节点配置torque
1.计算节点安装torque
[root@admin ~]#for i in 1 2; do ssh node$i sh /share/source/torque-3.0.6/install.sh; done
2.复制mom配置文件到计算节点
[root@admin ~]#for i in 1 2; do scp /var/spool/torque/mom_priv/confignode$i:/var/spool/torque/mom_priv/; done
3.复制mom启动脚本到计算节点,启动pbs_mom服务,并设置开机启动
[root@admin ~]#for i in 1 2; do scp /etc/init.d/pbs_mom node$i:/etc/init.d/; done
[root@admin ~]#for i in 1 2; do ssh node$i /etc/init.d/pbs_mom start; done
StartingTORQUEMom: [ OK ]
StartingTORQUEMom: [ OK ]
[root@admin ~]#for i in 1 2; do ssh node$i chkconfig pbs_mom on; done
4.设置环境变量
[root@admin ~]#for i in 1 2; do ssh node$i sh /share/scripts/Path.sh; done
5.测试环境变量设置是否正确
[root@admin ~]#for i in 1 2; do ssh node$i which mpirun; done
/share/apps/openmpi/bin/mpirun
/share/apps/openmpi/bin/mpirun
[root@admin ~]#for i in 1 2; do ssh node$i which namd2; done
/share/apps/namd/namd2
/share/apps/namd/namd2
[root@admin ~]#
6.此时再观察计算节点状态,已经变成free了,即可以提交任务到计算节点了
[root@adminapps]#pbsnodes -a
node1
state = free
np = 1
ntype = cluster
status=rectime=1408751492,varattr=,jobs=,state=free,netload=12996103,gres=,loadave=0.01,ncpus=1,physmem=1024932kb,availmem=2082428kb,totmem=2165536kb,idletime=0,nusers=0,nsessions=0,uname=Linuxnode12.6.18-371.el5 #1 SMP Tue Oct 1 08:35:08 EDT 2013 x86_64,opsys=linux
mom_service_port = 15002
mom_admin_port = 15003
gpus = 0
node2
state = free
np = 1
ntype = cluster
status=rectime=1408751482,varattr=,jobs=,state=free,netload=12983275,gres=,loadave=0.03,ncpus=1,physmem=1024932kb,availmem=2082444kb,totmem=2165536kb,idletime=0,nusers=0,nsessions=0,uname=Linuxnode22.6.18-371.el5 #1 SMP Tue Oct 1 08:35:08 EDT 2013 x86_64,opsys=linux
mom_service_port = 15002
mom_admin_port = 15003
gpus = 0
[root@adminapps]#
四:验证并行集群是否搭建成功
1.在管理节点上以建立的linuxidc用户登录,首先设置节点间无密码互访,操作和root用户一样,只是不需要复制.ssh目录
2.复制namd用软件apoa1到当前目录下
[linuxidc@admin ~]$cp -r /share/apps/apoa1/ ./