- 论坛徽章:
- 0
|
Heartbeat/corosync+pacemaker+ldirectord 实现Nginx负载均衡
欢迎访问yoyotown.com一起讨论IT技术。
系统:CentOS 5.4
IP分配:
查看源代码
打印帮助
HA1 eth0:192.168.0.66 eth1:192.168.10.1
HA2 eth0:192.168.0.69 eth1:192.168.10.2
VIP 192.168.0.120
1. 安装pacemaker和heartbeat
[root@HA1 ~]# wget -O /etc/yum.repos.d/pacemaker.repo http://clusterlabs.org/rpm/epel-5/clusterlabs.repo
[root@HA1 ~]# wget ftp://ftp.pbone.net/mirror/cento ... 4-6.el5.kb.i386.rpm
[root@HA1 ~]# rpm -ivh libesmtp-1.0.4-6.el5.kb.i386.rpm
[root@HA1 ~]# yum install -y pacemaker heartbeat
2. 安装ldirectord
[root@HA1 ~]# yum install -y ldirectord
3. 配置
3.1 配置Heartbeat
[root@HA1 ~]# cp /usr/share/doc/heartbeat-3.0.1/{ha.cf,authkeys} /etc/ha.d/
[root@HA1 ~]# cat /etc/ha.d/authkeys
查看源代码
打印帮助
1 auth 1
2 1 crc
[root@HA1 ~]# cat /etc/ha.d/ha.cf |grep -v “#”
查看源代码
打印帮助
01 logfile /var/log/ha-log
02 logfacility local0
03 keepalive 2
04 deadtime 30
05 warntime 10
06 initdead 120
07 udpport 695
08 ucast eth1 192.168.10.2 # 注意此处HA2改为:ucast eth1 192.168.10.1
09 auto_failback on
10 watchdog /dev/watchdog
11 node HA1
12 node HA2
13 ping 192.168.0.1
14 respawn hacluster /usr/lib/heartbeat/ipfail
15 apiauth ipfail gid=haclient uid=hacluster
16 crm on
3.2 用corosync替换heartbeat(可选)
corosync是基于OpenAIS构建的集群引擎,可替代heartbeat进行心跳检测。
The Corosync Cluster Engine is an open source project Licensed under the BSD License derived from the OpenAIS project. OpenAIS uses a UDP multicast based communication protocol to periodically check for node availability.
[root@HA1 ~]# yum install -y corosync
[root@HA1 ~]# corosync-keygen
Corosync Cluster Engine Authentication key generator.
Gathering 1024 bits for key from /dev/random.
Press keys on your keyboard to generate entropy.
Writing corosync key to /etc/corosync/authkey.
[root@HA1 ~]# scp /etc/corosync/authkey HA2:/etc/corosync/
[root@HA1 ~]# cp /etc/corosync/corosync.conf.example /etc/corosync/corosync.conf
[root@HA1 ~]# vi !$
查看源代码
打印帮助
01 # Please read the corosync.conf.5 manual page
02 compatibility: whitetank
03
04 totem {
05 version: 2
06 secauth: off
07 threads: 0
08 interface {
09 ringnumber: 0
10 bindnetaddr: 192.168.10.0
11 mcastaddr: 226.94.1.1
12 mcastport: 5405
13 }
14 }
15
16 logging {
17 fileline: off
18 to_stderr: yes
19 to_logfile: yes
20 to_syslog: yes
21 logfile: /var/log/corosync.log
22 debug: off
23 timestamp: on
24 logger_subsys {
25 subsys: AMF
26 debug: off
27 }
28 }
29
30 amf {
31 mode: disabled
32 }
33
34 service {
35 # Load the Pacemaker Cluster Resource Manager
36 ver: 0
37 name: pacemaker
38 use_mgmtd: yes
39 }
[root@HA1 ~]# scp /etc/corosync/corosync.conf HA2:/etc/corosync/corosync.conf
[root@HA1 ~]# service corosync start
Starting Corosync Cluster Engine (corosync): [ OK ]
[root@HA1 ~]# chkconfig –level 2345 corosync on
[root@HA1 ~]# chkconfig –level 2345 heartbeat off
在HA2上执行:
[root@HA2 ~]# chown root:root /etc/corosync/authkey
[root@HA2 ~]# chmod 400 /etc/corosync/authkey
[root@HA2 ~]# service corosync start
Starting Corosync Cluster Engine (corosync): [ OK ]
[root@HA2 ~]# chkconfig –level 2345 corosync on
[root@HA2 ~]# chkconfig –level 2345 heartbeat off
3.3 安装配置ldirectord
[root@HA1 ~]# cat /etc/ha.d/ldirectord.cf
查看源代码
打印帮助
01 checktimeout=5
02 checkinterval=7
03 autoreload=yes
04 logfile="/var/log/ldirectord.log"
05 quiescent=yes
06 emailalert=shidl@baihe.com
07 # A server with a page at the main root of the site that displays "Nginx"
08 virtual=192.168.0.120:80
09 real=192.168.0.66:80 gate
10 real=192.168.0.69:80 gate
11 service=http
12 request="/lb.html" # 在根目录下编写lb.html,内容为live
13 receive="live"
14 scheduler=wlc
15 protocol=tcp
16 checktype=negotiate
3.4 配置hosts
[root@HA1 ~]# cat /etc/hosts
查看源代码
打印帮助
1 # Do not remove the following line, or various programs
2 # that require network functionality will fail.
3 127.0.0.1 vpc localhost.localdomain localhost
4 ::1 localhost6.localdomain6 localhost6
5 192.168.10.1 HA1
6 192.168.10.2 HA2
3.5 配置lo:0设备
查看源代码
打印帮助
1 [root@HA1 ~]# cat >>/etc/sysconfig/network-scripts/ifcfg-lo:0<<EOF
2 <pre>DEVICE=lo:0
3 IPADDR=192.168.0.120
4 NETMASK="255.255.255.255"
5 ONBOOT=yes
6 NAME=loopback
7
8 EOF
3.6 启用转发,禁用arp
[root@HA1 ~]# vi /etc/sysctl.conf
修改net.ipv4.ip_forward = 0为net.ipv4.ip_forward = 1
添加下面行:
查看源代码
打印帮助
1 net.ipv4.conf.all.arp_ignore = 1
2 net.ipv4.conf.eth0.arp_ignore = 1
3 net.ipv4.conf.all.arp_announce = 2
4 net.ipv4.conf.eth0.arp_announce = 2
[root@HA1 ~]# sysctl -p
# 将配置拷贝到HA2
[root@HA1 ~]# scp /etc/ha.d/{ha.cf,authkeys,ldirectord.cf} HA2:/etc/ha.d/
[root@HA1 ~]# scp /etc/{hosts,sysctl.conf} HA2:/etc/
[root@HA1 ~]# scp /etc/sysconfig/network-scripts/ifcfg-lo:0 HA2:/etc/sysconfig/network-scripts/
在HA2上修改/etc/ha.d/ha.cf
将ucast eth1 192.168.10.2 改为:ucast eth1 192.168.10.1
并使sysctl.conf配置生效:
[root@HA2~]# sysctl -p
3.7 在HA1和HA2上配置并安装好nginx
编写nginx lsb资源代理脚本(注意nginx安装路径):
[root@HA1 ~]# cat /etc/init.d/nginxd
查看源代码
打印帮助
01 #!/bin/sh
02
03 # source function library
04 . /etc/rc.d/init.d/functions
05
06 # Source networking configuration.
07 . /etc/sysconfig/network
08
09 # Check that networking is up.
10 [ ${NETWORKING} = "no" ] && exit 0
11
12 RETVAL=0
13 prog="nginx"
14
15 nginxDir=/usr/local/nginx
16 nginxd=$nginxDir/sbin/nginx
17 nginxConf=$nginxDir/conf/nginx.conf
18 nginxPid=$nginxDir/nginx.pid
19
20 nginx_check()
21 {
22 if [[ -e $nginxPid ]]; then
23 ps aux |grep -v grep |grep -q nginx
24 if (( $? == 0 )); then
25 echo "$prog already running..."
26 exit 1
27 else
28 rm -rf $nginxPid &> /dev/null
29 fi
30 fi
31 }
32
33 start()
34 {
35 nginx_check
36 if (( $? != 0 )); then
37 true
38 else
39 echo -n $"Starting $prog:"
40 daemon $nginxd -c $nginxConf
41 RETVAL=$?
42 echo
43 [ $RETVAL = 0 ] && touch /var/lock/subsys/nginx
44 return $RETVAL
45 fi
46 }
47
48 stop()
49 {
50 echo -n $"Stopping $prog:"
51 killproc $nginxd
52 RETVAL=$?
53 echo
54 [ $RETVAL = 0 ] && rm -f /var/lock/subsys/nginx $nginxPid
55 }
56
57 reload()
58 {
59 echo -n $"Reloading $prog:"
60 killproc $nginxd -HUP
61 RETVAL=$?
62 echo
63 }
64
65 case "$1" in
66 start)
67 start
68 ;;
69 stop)
70 stop
71 ;;
72 restart)
73 stop
74 start
75 ;;
76 reload)
77 reload
78 ;;
79 status)
80 status $prog
81 RETVAL=$?
82 ;;
83 *)
84 echo $"Usage: $0 {start|stop|restart|reload|status}"
85 RETVAL=1
86 esac
87 exit $RETVAL
[root@HA1 ~]# chmod +x /etc/init.d/nginxd
[root@HA1 ~]# scp /etc/init.d/nginxd HA2: /etc/init.d/nginxd
[root@HA1 ~]# service network restart
[root@HA1 ~]# service heartbeat start
[root@HA2 ~]# service network restart
[root@HA2 ~]# service heartbeat start
4. 配置集群资源:
Heartbeat和其他应用提供的ocf代理脚本或许会有错误,我们可以用下面方法排错:
要检查 OCF 脚本,请首先设置所需的环境变量。例如,当测试IPaddr OCF 脚本时,您必须通过设置一个变量名称前缀为 OCF_RESKEY_的环境变量来设置变量 ip 的值。对于此示例,可运行以下命令:
查看源代码
打印帮助
1 export OCF_RESKEY_ip=
2 /usr/lib/ocf/resource.d/heartbeat/IPaddr validate-all
3 /usr/lib/ocf/resource.d/heartbeat/IPaddr start
4 /usr/lib/ocf/resource.d/heartbeat/IPaddr stop
如果此操作不成功,很可能是您缺少某个必需变量或者只是输错了参数。
排错ldirectord ocf代理脚本:
export OCF_ROOT=/usr/lib/ocf
根据自己的环境设置修改下面两行:
[root@HA1 ~]# vi /usr/lib/ocf/resource.d/heartbeat/ldirectord
查看源代码
打印帮助
1 LDIRCONF=${OCF_RESKEY_configfile:-/etc/ha.d/ldirectord.cf}
2 LDIRECTORD=${OCF_RESKEY_ldirectord:-/usr/sbin/ldirectord}
[root@HA1 ~]# /usr/lib/ocf/resource.d/heartbeat/ldirectord monitor
[root@HA1 ~]# echo $?
7 # ldirectord未运行返回7,运行正常返回0
[root@HA1 ~]# crm
crm(live)# configure
crm(live)configure# node HA1
crm(live)configure# node HA2
crm(live)configure# primitive ldirectord ocf:heartbeat:ldirectord \
> params configfile=”/etc/ha.d/ldirectord.cf” \
> op monitor interval=”30s” timeout=”20s” \
> meta migration-threshold=”10″ target-role=”Started”
crm(live)configure# primitive vip ocf:heartbeat:IPaddr2 \
> params lvs_support=”true” ip=”192.168.0.120″ cidr_netmask=”24″ broadcast=”192.168.0.255″ \
> op monitor interval=”1m” timeout=”20s” \
> meta migration-threshold=”10″
crm(live)configure# primitive nginxd lsb:nginxd \
> op monitor interval=”30s” timeout=”20s” \
> meta migration-threshold=”10″ target-role=”Started”
crm(live)configure# group load-balancing vip ldirectord
crm(live)configure# clone cl-nginxd nginxd
crm(live)configure# location perfer-ha1 load-balancing \
> rule $id=”prefer-ha1-rule” 100: #uname eq HA1
crm(live)configure# property stonith-enabled=”false” \
> no-quorum-policy=”ignore” \
> start-failure-is-fatal=”false” \
> expected-quorum-votes=”2″
crm(live)configure# verify
crm(live)configure# commit
crm(live)configure# end
crm(live)# status
============
Last updated: Thu Nov 12 01:00:13 2009
Stack: Heartbeat
Current DC: HA2 – partition with quorum
Version: 1.0.6-f709c638237cdff7556cb6ab615f32826c0f8c06
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ HA2 HA1 ]
Clone Set: cl-nginxd
Started: [ HA2 HA1 ]
Resource Group: load-balancing
vip (ocf::heartbeat:IPaddr2): Started HA1
ldirectord (ocf::heartbeat:ldirectord): Started HA1
crm(live)# quit
bye
5. 验证
[root@HA1 ~]# ipvsadm -l
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress ort Scheduler Flags
-> RemoteAddress ort Forward Weight ActiveConn InActConn
TCP 192.168.0.120:http wlc
-> 192.168.0.69:http Route 1 0 0
-> 192.168.0.66:http Local 1 0 0
用浏览器访问网站看是否正常。
禁用HA1的eth1网卡,在HA2上看故障转移情况。
[root@HA2 ~]# crm
crm(live)# status
============
Last updated: Thu Nov 12 18:40:54 2009
Stack: Heartbeat
Current DC: HA2 – partition WITHOUT quorum
Version: 1.0.6-f709c638237cdff7556cb6ab615f32826c0f8c06
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ HA2 ]
OFFLINE: [ HA1 ]
Clone Set: cl-nginxd
Started: [ HA2 ]
Stopped: [ nginxd:0 ]
Resource Group: load-balancing
vip (ocf::heartbeat:IPaddr2): Started HA2
ldirectord (ocf::heartbeat:ldirectord): Started HA2
启用HA1的eth1网卡,在HA1上看故障转移情况。
[root@HA1 ~]# crm status
============
Last updated: Thu Nov 12 18:42:55 2009
Stack: Heartbeat
Current DC: HA1 – partition with quorum
Version: 1.0.6-f709c638237cdff7556cb6ab615f32826c0f8c06
2 Nodes configured, 2 expected votes
2 Resources configured.
============
Online: [ HA2 HA1 ]
Clone Set: cl-nginxd
Started: [ HA1 HA2 ]
Resource Group: load-balancing
vip (ocf::heartbeat:IPaddr2): Started HA1
ldirectord (ocf::heartbeat:ldirectord): Started HA1
6. 参考:
Load Balanced MySQL Replicated Cluster
Debian Lenny HowTo
相关文章:
* 2009/12/11 -- MySQL-Nginx-Pacemaker-corosync(openais)-drbd active/passive cluster
* 2009/12/09 -- Heartbeat实现Nginx高可用性(style 2.x)
* 2009/12/08 -- Heartbeat实现Nginx高可用性(style 1.x)
* 2009/12/10 -- 用CRM命令行工具配置集群资源
* 2009/12/08 -- HA体系架构及内部处理流程
[ 本帖最后由 toakee 于 2009-12-12 11:05 编辑 ] |
|