- 论坛徽章:
- 0
|
本帖最后由 嗜血的猪 于 2010-04-13 10:51 编辑
1
硬件:集群节点:12台hs21 xm刀片,交换机,控制台,管理节点IBM System x3650
软件:系统rhel5.4 xcat2.3.3 下载地址 http://xcat.sourceforge.net/#download
2网络ip设置:
管理节点IBM System x3650 eth1 192.168.1.111 eth0 10.10.10.1 hostname ibm_hs
刀片ip eth0 10.10.10.80------10.10.10.92
刀片中心ip 10.10.10.254
3管理节点IBM System x3650安装
操作系统rhel5.4(系统安装不在介绍)最好完全安装防止后期麻烦。
4管理节点操作系统修改
4.1检查你的操作系统,如果selinux为 enabled 需要执行下面例子程序。
[root@ibm_hs ~]# vi /etc/selinux/config
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - SELinux is fully disabled.
SELINUX=disabled
# SELINUXTYPE= type of policy in use. Possible values are:
# targeted - Only targeted network daemons are protected.
# strict - Full SELinux protection.
SELINUXTYPE=targeted
4.2删除系统安装包 tftp-server,OpenIPMI-tools
[root@ibm_hs ~]# rpm -qa | grep tftp-server
tftp-server-0.42-3.1
[root@ibm_hs ~]# rpm -qa | grep OpenIPMI-tools
OpenIPMI-tools-2.0.6-5.el5.4
[root@ibm_hs ~]# rpm -e tftp-server; rpm -e OpenIPMI-tools
5刀片中心设置
5.1找台笔记本或其他机器连接到刀片中心。一般默认ip为192.168.70.125用户为USERID密码为PASSW0RD
Blade Tasks-->Configuration
这里修改blade名字为node1------node12
MM Control-->General Settings
下修改刀片中心名字为blademm
MM Control-->Network Interfaces
设置刀片中心ip为10.10.10.254
MM Control-->Security下激活ssh server
设置完成重启BLADECENTER
5.2测试
通过管理节点执行下面命令如果ssh配通会看到下面信息。(如果有错误比如ssh不能登录可能需要升级mm的firmware。到ibm官网下载就好。)
[root@ibm_hs ~]# ssh USERID@10.10.10.254
password:
Hostname: blademm
Static IP address: 10.10.10.254
Burned-in MAC address: 00:14:5E F:84:F0
DHCP: Disabled - Use static IP configuration.
Last login: Wednesday March 18 2009 0:06 from 10.10.10.1 (Web)
system>
6xcat2.3部署
mkdir -p /root/xcat2
解压所下载的文件
tar jxvf xcat-dep-2.4-200912091450.tar.bz2
tar jxvf xcat-core-2.3.3.tar.bz2
yum repository
挂载rhel5.4的DVD光盘到/root/xcat2/rhel5.4/,并在/etc/yum.repos.d/下创建文件:rhels5.4.repo,内容如下:
[rhels5.4]
name=Rhel Server 5.4
baseurl=file:///root/xcat2/rhel5.4/Server
enabled=1
gpgcheck=0
xCAT的repository文件:
cd /root/xcat2/xcat-dep/rh5/X86_64
./mklocalrepo.sh
cd /root/xcat2/xcat-core
./mklocalrepo.sh
yum install xCAT (根据提示y就行了)
安装完成测试
[root@big ibm_hs]# /etc/init.d/xcatd status
xCAT service is running
修改/etc/hosts
10.10.10.254 blademm.hs21.com blademm
10.10.10.80 node1.hs21.com node1
10.10.10.81 node2.hs21.com node2
10.10.10.82 node3.hs21.com node3
10.10.10.83 node4.hs21.com node4
10.10.10.84 node5.hs21.com node5
10.10.10.85 node6.hs21.com node6
10.10.10.86 node7.hs21.com node7
10.10.10.87 node8.hs21.com node8
10.10.10.88 node9.hs21.com node9
10.10.10.89 node10.hs21.com node10
10.10.10.90 node11.hs21.com node11
10.10.10.91 node12.hs21.com node12
..................................待续
..................................继续
设定root用户profile
cd /etc/profile.d/
./xcat.sh
这个操作目的设定xcat环境变量
[root@ibm_hs ~]# less /etc/profile.d/xcat.sh
XCATROOT=/opt/xcat
PATH=$PATH XCATROOT/bin XCATROOT/sbin
MANPATH=$MANPATH XCATROOT/share/man:
export XCATROOT PATH MANPATH
export PERL_BADLANG
验证安装
[root@ibm_hs~]# tabdump site
#key,value,comments,disable
"blademaxp","64",,
"fsptimeout","0",,
"installdir","/install",,
"ipmimaxp","64",,
"ipmiretries","3",,
"ipmitimeout","2",,
"consoleondemand","no",,
"master","10.10.10.1",,
"maxssh","8",,
"ppcmaxp","64",,
"ppcretry","3",,
"ppctimeout","0",,
"sharedtftp","1",,
"SNsyncfiledir","/var/xcat/syncfiles",,
"tftpdir","/tftpboot",,
"xcatdport","3001",,
"xcatiport","3002",,
"xcatconfdir","/etc/xcat",,
"timezone","Asia/Shanghai",,
"useNmapfromMN","no",,
"domain","hs21.com",,
"nameservers","10.10.10.1",,
"dhcpinterfaces","eth0",,
如果发现某些选项不对可以用tabedit site修改
用copycds命令拷贝镜像到管理节点
例。
[root@ibm_hs iso]# ls
rhel-server-5.4-x86_64-dvd.iso
[root@ibm_hs iso]# pwd
/root/iso
[root@ibm_hs iso]# copycds -n rhels5.4 -a x86_64 /root/iso/rhel-server-5.4-x86_64-dvd.iso
Restoring the predefined xcat table
cd /opt/xcat/share/xcat/templates/e1350
for i in *csv; do tabrestore $i; done
对网络配置表配置
[root@ibm_hs e1350]# chtab net=10.10.10.0
networks.nameservers=10.10.10.1
[root@ibm_hs e1350]# chtab net=10.10.10.0
networks.dynamicrange=10.10.10.80-10.10.10.92
[root@ibm_hs e1350]#tabdump networks (检查你的设置)
dns设置
[root@ibm_hs iso]# less /etc/resolv.conf
search clusters.com
nameserver 127.0.0.1
#nameserver 10.10.10.1
[root@ibm_hs iso]#makedns
[root@ibm_hs iso]#service named restart
dns测试正确情况下会看到下面输出
[root@ibm_hs iso]# host node1
node1.hs21.com has address 10.10.10.80
node1.hs21.com mail is handled by 10 node1.hs21.com.
[root@ibm_hs iso]# host node2
node2.hs21.com has address 10.10.10.81
node2.hs21.com mail is handled by 10 node2.hs21.com.
dhcp设置
[root@ibm_hs iso]# chtab net=10.10.10.0
networks.dhcpserver=10.10.10.1
[root@ibm_hs iso]# makedhcp -n
[root@ibm_hs iso]#service dhcpd start
TFTP设置
默认tftp文件在/tftpboot下
......................待续
[root@ibm_hs iso]mknb x86_64
[root@ibm_hs iso]service tftpd restart
定义bladecenter管理模块
[root@ibm_hs iso]nodeadd blademm groups=mm nodehm.mgt=blade mp.mpa=10.10.10.254
激活刀片管理模块远程控制 snmp和ssh
[root@ibm_hs iso]ssh USERID@blademm users -T mm[1] -1 -at set
system> users -T mm[1] -1 -at set
如果返回ok证明你没有问题。
[root@ibm_hs iso]# rspconfig mm snmpcfg=enable sshcfg=enable
blademm: SNMP enable: OK
blademm: SSH enable: OK
[root@ibm_hs iso]# rspconfig blademm pd1=redwoperf pd2=redwoperf
blademm: pd2: redwoperf
blademm: pd1: redwoperf
[root@ibm_hs iso]# rpower blademm reset
setup ssh key for Management Module
[root@ibm_hs iso]# rspconfig blademm snmpcfg=enable sshcfg=enable
blademm: SNMP enable: OK
blademm: SSH enable: OK
增加计算节点
[root@ibm_hs iso]#nodeadd node1-node12 groups=blade,compute mp.mpa=10.10.10.254 nodehm.power=blade nodehm.mgt=blade nodetype.os=rhels5.4 nodetype.arch=x86_64 nodetype.profile=compute nodetype.nodetype=osi noderes.nfsserver=ibm_hs.hs21.com noderes.netboot=pxe noderes.primarynic=eth0
通过rscan命令可以获取刀片位置信息。例如
[root@ibm_hs ~]# rscan blademm
type name id type-model serial-number address
mm blademm 0 8852-4XC 99A8988 10.10.10.254
blade node1 1 7995-I3B 99A9226
blade node2 2 7995-I3B 99A9233
blade node3 3 7995-I3B 99A9232
blade node4 4 7995-I3B 99A9235
blade node5 5 7995-I3B 99A9227
blade node6 6 7995-I3B 99A9236
blade node7 7 7995-I3B 99A9228
blade node8 8 7995-I3B 99A9234
blade node9 9 7995-I3B 99A9229
blade node10 10 7995-I3B 99A9230
blade node11 11 7995-I3B 99A9231
blade node12 12 7995-I3B 99A9225
如果需要修改id值可以用nodech nodexx mp.id=xx
测试管理模块
[root@ibm_hs ~]# rpower node1 stat
node1: off
[root@ibm_hs ~]# rpower node1 on
node1: on
[root@ibm_hs ~]# rpower node1 stat
node1: on
获取mac地址
[root@ibm_hs ~]#getmacs node1-node12
[root@ibm_hs ~]#makedhcp -n (注意有些文档用makedhcp --macs 请这这个版本xca不要用这个方法)
[root@ibm_hs ~]#service dhcpd restart
[root@ibm_hs~]# tabdump mac
#node,interface,mac,comments,disable
"node1",,"00:1A:64:58:1A:72",,
"node10",,"00:1A:64:58:51:98",,
"node11",,"00:1A:64:58:5A:FE",,
"node12",,"00:1A:64:58:1E:A4",,
"node2",,"00:1A:64:58:55:2E",,
"node3",,"00:1A:64:58:60:56",,
"node4",,"00:1A:64:58:5A:98",,
"node5",,"00:1A:64:58:16:0A",,
"node6",,"00:1A:64:58:55:34",,
"node7",,"00:1A:64:58:1A:BA",,
"node8",,"00:1A:64:58:5C:84",,
"node9",,"00:1A:64:58:51:9E",,
准备安装计算节点
调整刀片启动顺序
[root@ibm_hs~]# rbootseq node1-node12 list
node1: cdrom,hd0,floppy,net
.......
[root@ibm_hs~]# rbootseq node1-node12 n,h,c,f
node1: net,hd0,cdrom,floppy
......................
创建kickstart文件
[root@ibm_hs~]# nodeset node1-node12 install
node1: install rhels5.4-x86_54-compute
.....
开始安装
[root@ibm_hs~]# rinstall node1
监控安装
[root@ibm_hs~]# nodestat node1
node1: installing prep
[root@ibm_hs~] tail -f /var/log/messages
到这里位置xcat集群就建立完成
(参照ibm官方文档)如果对其他参数有兴趣可以去ibm网站下载。
下面介绍torque maui在xcat中应用
待续。。。。。。。。。
torque安装 |
|