- 论坛徽章:
- 0
|
本帖最后由 forestiger 于 2013-06-04 13:45 编辑
CentOS6.2+heartbeat 3.0.4
释放VIP资源时,SSH连接eth0(IP:10.160.36.11或12)中断问题(vip在eth0:0上)
重新连接SSH可以再次连接上,哪位知道原因?
我的主要配置如下:
#vim /etc/hosts
192.168.0.101 priv01 master
192.168.0.102 priv02 slave
10.160.36.12 slave priv02
10.160.36.11 master priv01
10.160.36.10 vip
#vim /etc/ha.d/ha.cf
logfile /var/log/ha.log
logfacility none
keepalive 2
deadtime 10
warntime 5
initdead 180
udpport 11695
ucast eth1 192.168.0.2 #互相填写对方的心跳IP
#ucast eth1 192.168.0.1 #备机填主机的心跳IP
node master
node slave
auto_failback on
ping 10.160.32.1
respawn hacluster /usr/lib64/heartbeat/ipfail
apiauth ipfail gid=haclient uid=hacluster
#vim /etc/ha.d/authkeys
auth 5
5 sha1 heartbeat
#chmod -v 600 /etc/ha.d/authkeys
#vim /etc/ha.d/haresources
master IPaddr::10.160.36.10/20/eth0:0/10.160.47.255 httpd #主备机资源配置文件必须完全一样
在备机上手动切换了一下,产生了相应的日志如下:
[root@slave ha.d]# /usr/share/heartbeat/hb_takeover
-----备机ha日志
[root@slave ha.d]# cat /var/log/ha.log
harc(default)[3104]: 2013/05/29_21:16:49 info: Running /etc/ha.d//rc.d/hb_takeover hb_takeover
May 29 21:16:49 slave heartbeat: [1525]: info: master wants to go standby [all]
May 29 21:16:50 slave heartbeat: [1525]: info: standby: acquire [all] resources from master
May 29 21:16:50 slave heartbeat: [3120]: info: acquire all HA resources (standby).
ResourceManager(default)[3133]: 2013/05/29_21:16:50 info: Acquiring resource group: master IPaddr::10.160.36.10/20/eth0:0/10.160.47.255 httpd
/usr/lib/ocf/resource.d//heartbeat/IPaddr(IPaddr_10.160.36.10)[3160]: 2013/05/29_21:16:50 INFO: Resource is stopped
ResourceManager(default)[3133]: 2013/05/29_21:16:50 info: Running /etc/ha.d/resource.d/IPaddr 10.160.36.10/20/eth0:0/10.160.47.255 start
IPaddr(IPaddr_10.160.36.10)[3243]: 2013/05/29_21:16:50 INFO: Using calculated netmask for 10.160.36.10: 255.255.240.0
IPaddr(IPaddr_10.160.36.10)[3243]: 2013/05/29_21:16:51 INFO: eval ifconfig eth0:0 10.160.36.10 netmask 255.255.240.0 broadcast 10.160.47.255
/usr/lib/ocf/resource.d//heartbeat/IPaddr(IPaddr_10.160.36.10)[3217]: 2013/05/29_21:16:51 INFO: Success
ResourceManager(default)[3133]: 2013/05/29_21:16:51 info: Running /etc/init.d/httpd start
May 29 21:16:51 slave heartbeat: [3120]: info: all HA resource acquisition completed (standby).
May 29 21:16:51 slave heartbeat: [1525]: info: Standby resource acquisition done [all].
May 29 21:16:51 slave heartbeat: [1525]: info: remote resource transition completed.
[root@slave ha.d]#
-----主机ha日志
[root@master ~]# cat /var/log/ha.log
harc(default)[23959]: 2013/05/29_13:16:50 info: Running /etc/ha.d//rc.d/hb_takeover hb_takeover
hb_standby(default)[23975]: 2013/05/29_13:16:50 Going standby [all].
May 29 13:16:50 master heartbeat: [19064]: info: master wants to go standby [all]
May 29 13:16:51 master heartbeat: [19064]: info: standby: slave can take our all resources
May 29 13:16:51 master heartbeat: [23989]: info: give up all HA resources (standby).
ResourceManager(default)[24002]: 2013/05/29_13:16:51 info: Releasing resource group: master IPaddr::10.160.36.10/20/eth0:0/10.160.47.255 httpd
ResourceManager(default)[24002]: 2013/05/29_13:16:51 info: Running /etc/init.d/httpd stop
ResourceManager(default)[24002]: 2013/05/29_13:16:51 info: Running /etc/ha.d/resource.d/IPaddr 10.160.36.10/20/eth0:0/10.160.47.255 stop
IPaddr(IPaddr_10.160.36.10)[24082]: 2013/05/29_13:16:51 INFO: ifconfig eth0:0 down
/usr/lib/ocf/resource.d//heartbeat/IPaddr(IPaddr_10.160.36.10)[24056]: 2013/05/29_13:16:51 INFO: Success
May 29 13:16:51 master heartbeat: [23989]: info: all HA resource release completed (standby).
May 29 13:16:51 master heartbeat: [19064]: info: Local standby process completed [all].
May 29 13:16:52 master heartbeat: [19064]: WARN: 1 lost packet(s) for [slave] [33266:33268]
May 29 13:16:52 master heartbeat: [19064]: info: remote resource transition completed.
May 29 13:16:52 master heartbeat: [19064]: info: No pkts missing from slave!
May 29 13:16:52 master heartbeat: [19064]: info: Other node completed standby takeover of all resources.
[root@master ~]#
在备机上执行/usr/share/heartbeat/hb_takeover时,在主机上抓包SSH客户端与主机的连接数据包(10.160.32.1是SSH客户端被SNAT后的IP地址):
[root@master ~]# tcpdump -i eth0 host 10.160.32.1 and host 10.160.36.11 and port 22
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
18:40:58.204653 IP localhost.62513 > master.ssh: Flags [P.], seq 286881302:286881338, ack 2646507850, win 16268, length 36
18:40:58.204697 IP master.ssh > localhost.62513: Flags [.], ack 36, win 279, length 0
18:42:00.060948 IP localhost.62513 > master.ssh: Flags [P.], seq 36:72, ack 1, win 16268, length 36
18:42:00.060999 IP master.ssh > localhost.62513: Flags [.], ack 72, win 279, length 0
18:43:01.916416 IP localhost.62513 > master.ssh: Flags [P.], seq 72:108, ack 1, win 16268, length 36
18:43:01.916482 IP master.ssh > localhost.62513: Flags [.], ack 108, win 279, length 0
18:43:16.146187 IP localhost.62513 > master.ssh: Flags [P.], seq 108:160, ack 1, win 16268, length 52
18:43:16.146225 IP master.ssh > localhost.62513: Flags [.], ack 160, win 279, length 0
18:43:16.148266 IP master.ssh > localhost.62513: Flags [P.], seq 1:69, ack 160, win 279, length 68
18:43:16.322606 IP localhost.62513 > master.ssh: Flags [P.], seq 160:212, ack 69, win 16200, length 52
18:43:16.324234 IP master.ssh > localhost.62513: Flags [P.], seq 69:137, ack 212, win 279, length 68
18:43:16.481077 IP localhost.62513 > master.ssh: Flags [P.], seq 212:264, ack 137, win 16132, length 52
18:43:16.483196 IP master.ssh > localhost.62513: Flags [P.], seq 137:205, ack 264, win 279, length 68
18:43:16.684734 IP localhost.62513 > master.ssh: Flags [.], ack 205, win 16064, length 0
|
|