标题: RedHat Linux AS3 做集群的问题!!!! [打印本页] 作者: rootq 时间: 2005-08-02 11:13 标题: RedHat Linux AS3 做集群的问题!!!! 参考了:http://chinaunix.net/jh/4/325828.html
我在VM下装了两台RedHat Linux AS3:
linux1的IP:10.0.0.230
linux2的IP:10.0.0.231
安装 clumanager 和 redhat-config-cluster 后,作好了相关配置
输入:service clumanager start
一会儿就重新启动系统了 这是怎么回事呀?作者: 好好先生 时间: 2005-08-02 11:30 标题: RedHat Linux AS3 做集群的问题!!!! 看看日志先...作者: gridpc 时间: 2005-08-02 11:36 标题: RedHat Linux AS3 做集群的问题!!!! pls run "dmesg|tee log" and paste log file , paste /etc/init.d/clumanager script作者: rootq 时间: 2005-08-02 21:23 标题: RedHat Linux AS3 做集群的问题!!!! 以下是错误日志:
-------------------------------------------------------------------------------------
Aug 2 20:57:55 linux1 clumanager: [1345]: <notice>; Starting Red Hat Cluster Manager...
Aug 2 20:57:56 linux1 cludb[1362]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1363]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1364]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1365]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:56 linux1 cludb[1366]: <crit>; _clu_lock_init: unable to get local member ID
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: No drivers configured for host '10.0.0.230'!
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: Data integrity may be compromised!
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: No drivers configured for host '10.0.0.231'!
Aug 2 20:57:58 linux1 cluquorumd[1375]: <warning>; STONITH: Data integrity may be compromised!
Aug 2 20:58:12 linux1 cluquorumd[1375]: <notice>; Quorum Formed; Starting Service Manager
-------------------------------------------------------------------------------------
以下是/etc/init.d/clumanager 脚本:
-------------------------------------------------------------------------------------
#!/bin/sh
#
# Copyright (C) 2003 Red Hat, Inc.
#
# This program is Free Software. You may modify and/or redistribute it under
# the terms of the GNU General Public License version 2, or (at your option)
# any later version.
#
# description: Starts and stops Red Hat Cluster Manager
# chkconfig: 2345 99 01
#
# Source function library
. /etc/init.d/functions
# Grab the network config file
. /etc/sysconfig/network
SIMPLE_OPERATION=0
ID="Red Hat Cluster Manager"
CFG_DIR="/etc"
CFG_FILE="$CFG_DIR/cluster.xml"
FW_CHAIN=RH-Firewall-1-INPUT
#
# Only root wants to run this...
#
[ `id -u` = 0 ] || exit 0
#
# If we're not configured, then don't start anything.
#
[ "${NETWORKING}" = "yes" ] || exit 0
[ -f "/etc/cluster.xml" ] || exit 1
#
# Check the cluster configuration file and load the watchdog timer if
# necessary.
# XXX This loads the watchdog timer on ALL machines, even if they do not
# have 'watchdog=yes' specified.
#
watchdog_init()
{
#
# Check the cluster configuration for watchdog support
#
# XXX This creates and loads the watchdog module regardless of whether
# the local member uses it.
#
grep -q -i "watchdog=\"yes\"" $CFG_FILE
if [ $? -eq 1 ]; then
return 0
fi
# Check to ensure we have /dev/watchdog
if ! [ -c /dev/watchdog ]; then
if [ -f /dev/watchdog ]; then
action "Removing invalid /dev/watchdog:" \
rm -f /dev/watchdog
fi
action "Creating /dev/watchdog:" /dev/MAKEDEV watchdog
fi
# Check /etc/modules.conf for "alias watchdog xxxxxx" line; xxxxxx =
# the specific driver (see below) we're dealing with.
# If there is no alias, default to softdog.
_WDT=`grep "alias wdt" /etc/modules.conf | awk '{print $3}'`
if [ -z "$_WDT" ]; then
_PROBE=softdog
_WDT=softdog
else
_PROBE=wdt
fi
# Don't try to load the module a second time.
lsmod | grep -q $_WDT
if [ $? -ne 0 ]; then
action "Loading Watchdog Timer ($_WDT): " modprobe $_PROBE
fi
unset _WDT _PROBE
return 0
}
#
# open a port in our firewall
#
open_port()
{
declare PROTO=$1
declare -i PORT=$2
if [ -z "$1" -o -z "$2" ]; then
echo "usage: $0 <protocol>; <port>;"
return 1
fi
/sbin/iptables -I $FW_CHAIN -m state \
--state NEW -m $PROTO -p $PROTO --dport $PORT -j ACCEPT
}
#
# Close a port in the firewall.
#
close_port()
{
declare -i PORT=$1
while [ 0 ]; do
#
# Grab the rule number so we can delete it.
# - List our table w/ line numbers for each rule.
# - Grab the rule number from column 1 of the first line
#
declare rule=`iptables -L $FW_CHAIN -n --line-numbers | grep $PORT | head -1 | awk {'print $1'}`
[ -z "$rule" ] && break;
iptables -D $FW_CHAIN $rule
done
return 0
}
#
# Read the cluster configuration and open the default and/or configured
# ports.
#
cluster_firewall()
{
declare -i clumembd_addr=34001
declare -i clusvcmgrd_addr=34002
declare -i cluquorumd_addr=34003
declare -i clulockd_addr=34004
declare -i hb_bcast=1228
declare -i hb_mcast=1229
declare port
if ! iptables -L $FW_CHAIN &>; /dev/null; then
return 0
fi
port=`cludb -g clumembd%addr`
if [ "$port" != "not found" ]; then
clumembd_addr=$port
fi
port=`cludb -g clusvcmgrd%addr`
if [ "$port" != "not found" ]; then
clusvcmgrd_addr=$port
fi
port=`cludb -g cluquorumd%addr`
if [ "$port" != "not found" ]; then
cluquorumd_addr=$port
fi
port=`cludb -g clulockd%addr`
if [ "$port" != "not found" ]; then
clulockd_addr=$port
fi
port=`cludb -g clumembd%port`
if [ "$port" != "not found" ]; then
hb_bcast=$port
hb_mcast=$((port+1))
fi
#
# Bring down the cluster on a node.
#
stop_cluster()
{
#
# Sometimes, people stop the cluster before the service manager
# is running - this causes the cluster stop script to hang; since
# the service manager never actually receives the shutdown signal.
# In this case, we need to resend the TERM singal to the quorum
# daemon - so that it can notify everyone properly.
#
while [ 0 ]; do
echo -n $"Initiating shutdown of Quorum Services: "
killproc $QUORUMD -TERM
echo
if [ -n "`pidof $SVCMGRD`" ]; then
echo -n $"Waiting for User Services to stop: "
while [ -n "`pidof $SVCMGRD`" ]; do
sleep 1
done
echo_success
echo
else
echo $"User Services are stopped."
fi
# Ensure all NFS rmtab daemons are dead.
killall $RMTABD &>; /dev/null
# Just in case the service manager blew up during shutdown...
killproc $QUORUMD -TERM &>; /dev/null
if [ -n "`pidof $QUORUMD`" ]; then
echo -n $"Waiting for Quorum Services to stop: "
while [ -n "`pidof $QUORUMD`" ]; do
sleep 1
if [ -n "`pidof $SVCMGRD`" ]; then
echo_failure
echo
echo "Retrying..."
continue 2
fi
done
echo_success
echo
else
echo $"Quorum Services are stopped."
fi
if [ -n "`pidof $MEMBD`" ]; then
echo -n $"Waiting for Membership Services to stop: "
while [ -n "`pidof $MEMBD`" ]; do
sleep 1
done
echo_success
echo
else
echo $"Membership Services are stopped."
fi
if [ $SIMPLE_OPERATION -eq 0 ]; then
update_status $LOCKD $rv
rv=$?
update_status $SVCMGRD $rv
svcmgr=$?
#
# Check to see if, when the service manager is dead
# but everything else is running
#
if [ $rv -ne 5 -a $svcmgr -eq 5 ]; then
clustat -Q
# No Quorm + No Service Manager ->; OK!
if [ $? -eq 1 ]; then
echo "Note: Service manager is not running because this member"
echo " is not participating in the cluster quorum."
exit 0;
fi
fi
rv=$svcmgr
fi
[ $rv -eq 5 ] && exit 1;
exit $rv
;;
stop)
if [ -n "`pidof $QUORUMD`" ]; then
log_and_print $LOG_NOTICE "Shutting down $ID..."
stop_cluster
elif [ -n "`pidof $MEMBD`" ]; then
log_and_print $LOG_NOTICE "Shutting down $ID..."
echo $"Stopping Cluster Membership Daemon: "
killproc $MEMBD -KILL
echo
fi