- 论坛徽章:
- 0
|
先贴出我的代码,供大家评点。
- #!/bin/sh
- ###############################################################################
- # Program name: log_monitor.sh
- # Chinese name: 日志监控
- # Return value: 0, 成功; 1,参数检测失败
- # Author :
- # Create date : 20070619
- # Modify date : 20070705
- # 新增日志监控类型域:ERROR或RUNNING,分别表示对日志报错的监控和
- # 对某关键字长久没有写入的监控
- ###############################################################################
- ################################### 配置项 ###################################
- # ##
- MONITOR_GROUP=1
- #注意相应修改下文的/usr/local/ORACLE_indb.profile引用
- # 监控类型##
- MONITOR_TYPE=2
- # 监控主机或数据库##
- MONITOR_MACHINE="##主机"
- ##############################################################################
- # 检测MONITOR_GROUP的合法性
- if [ L${MONITOR_GROUP}R = "LR" ]
- then
- echo "ERROR:监控模块MONITOR_GROUP必须配置"
- exit 1
- fi
- if [ ${MONITOR_GROUP} -ne 0 -a ${MONITOR_GROUP} -ne 1 -a ${MONITOR_GROUP} -ne 2 ]
- then
- echo "ERROR:监控模块MONITOR_GROUP配置不合法:0, ##;1, ##;2, ###"
- exit 1
- fi
- # 检测MONITOR_TYPE的合法性
- if [ L${MONITOR_TYPE}R = "LR" ]
- then
- echo "ERROR:监控类型MONITOR_TYPE必须配置"
- exit 1
- fi
- if [ ${MONITOR_TYPE} -ne 1 -a ${MONITOR_TYPE} -ne 2 -a ${MONITOR_TYPE} -ne 3 ]
- then
- echo "ERROR:监控类型MONITOR_TYPE配置不合法:1, 主机目录积压; 2, 进程监控; 3, 数据库积压监控"
- exit 1
- fi
- # 检测MONITOR_MACHINE的合法性
- if [ L${MONITOR_MACHINE}R = "LR" ]
- then
- echo "ERROR:监控主机MONITOR_MACHINE必须配置"
- exit 1
- fi
- MONITOR_DATE=$(date "+%Y%m%d[%H:%M:%S]")
- RUNNING_PATH=$(dirname $0)
- ERROR_MSG_LOG=${RUNNING_PATH}/log_monitor_error_msg.log
- LOG_CONFIG_FILE=${RUNNING_PATH}/log_monitor.ini
- touch $0
- #数据库环境变量的引入
- if [ -f /usr/local/ORACLE_bill.profile ];then
- . /usr/local/ORACLE_bill.profile
- fi
- #引入系统环境变量
- if [ -f /usr/local/Runtime_32bit.profile ];then
- . /usr/local/Runtime_32bit.profile
- fi
- # 报警函数
- # 用法:FUNC_ALARM 报错信息(不能包含空格)
- FUNC_ALARM()
- {
- ALARM_MSG=$1
- # sh ${RUNNING_PATH}/bf_alarm_p.sh ${MONITOR_MACHINE} ${MONITOR_TYPE} ${MONITOR_GROUP} ${ALARM_MSG} |
- # tee -a $ERROR_MSG_LOG 2>&1
- #
- # # 记录操作日志
- # echo "sh ${RUNNING_PATH}/bf_alarm_p.sh ${MONITOR_MACHINE} ${MONITOR_TYPE} ${MONITOR_GROUP} ${ALARM_MSG}" |
- # tee -a $ERROR_MSG_LOG
- $EXECPROC_PATH/execproc $MONITOR_PATH/alarm ${MONITOR_MACHINE} ${MONITOR_TYPE} ${MONITOR_GROUP} ${ALARM_MSG} |
- tee -a $ERROR_MSG_LOG
- # 记录操作日志
- echo $ALARM_MSG | tee -a $ERROR_MSG_LOG
- }
- ################################ 进程监控 ###############################
- # 日志文件大于10M,则备份日志文件
- if [ $(wc -c $ERROR_MSG_LOG|awk '{print $1}') -ge 10485760 ]
- then
- mv $ERROR_MSG_LOG ${ERROR_MSG_LOG}.$(date "+%Y%m%d%H%M")
- echo > $ERROR_MSG_LOG
- export PATH=$PATH:/usr/contrib/bin/
- gzip -f ${ERROR_MSG_LOG}*
- fi
- if [ ! -r $LOG_CONFIG_FILE ]
- then
- echo "ERROR:${MONITOR_DATE}\n\t${LOG_CONFIG_FILE}配置文件不存在" | tee -a $ERROR_MSG_LOG
- exit 1
- fi
- while read LOG_INFO
- do
- # 参数检测
- # 参数个数检测
- FIELD_NUM=$(echo $LOG_INFO|awk -F"|" '{print NF}')
- if [ $FIELD_NUM -lt 2 ]
- then
- FUNC_ALARM "ERROR:${MONITOR_DATE}\n\t${LOG_INFO}\n\t${LOG_INFO}不合法,请至少配置日志名关键字、日志监控识别关键字、日志说明和监控类型"
- continue
- fi
- # 读取日志名称
- LOG_NAME=$(echo $LOG_INFO|awk -F"|" '{print $1}')
- if [ "L${LOG_NAME}R" = "LR" ]
- then
- FUNC_ALARM "ERROR:${MONITOR_DATE}\n\t${LOG_INFO}\n\t日志名称不可为空"
- continue
- fi
- LOG_PATH=$(dirname ${LOG_NAME})
- LOG_KEYWORD=$(basename ${LOG_NAME})
- if [ ! -r ${LOG_PATH}/*${LOG_KEYWORD}* ]
- then
- FUNC_ALARM "ERROR:${MONITOR_DATE}\n\t${LOG_INFO}\n\t所监控的日志不存在"
- continue
- fi
- DESCRIPT=$(echo $LOG_INFO|awk -F"|" '{print $3}')
- # 检测监控类型的合法性
- LOG_TYPE=$(echo $LOG_INFO|awk -F"|" '{print $4}')
- if [ L${LOG_TYPE}R = LR ]
- then
- # 兼容前面版本,前面版本没有日志监控类型这个域
- LOG_TYPE="ERROR"
- fi
- if [ ${LOG_TYPE} != "ERROR" -a ${LOG_TYPE} != "RUNNING" ]
- then
- FUNC_ALARM "ERROR:${MONITOR_DATE}\n\t${LOG_INFO}\n\t配置的日志监控类型应该为ERROR或者RUNNING"
- continue
- fi
- # 日志监控检测
- if [ ${LOG_TYPE} = "ERROR" ]
- then
- # 检测异常识别关键字的合法性
- ERROR_KEYWORD=$(echo $LOG_INFO|awk -F"|" '{print $2}'|sed 's/,/|/g')
- if [ L${ERROR_KEYWORD}R = LR ]
- then
- FUNC_ALARM "ERROR:${MONITOR_DATE}\n\t${LOG_INFO}\n\t配置的异常识别关键字不能为空"
- continue
- fi
- # 判断日志中是否有异常
- EXCEPTION_INFO=$(find ${LOG_PATH} -name *${LOG_KEYWORD}* -ctime 0 -exec cat {} \; |
- tail -n500 | grep -iE "${ERROR_KEYWORD}" | head -n1 | sed 's/[ ]/|/g')
- if [ L${EXCEPTION_INFO}R != LR ]
- then
- FUNC_ALARM "ALARM:[${MONITOR_DATE}]${DESCRIPT}:发现异常${EXCEPTION_INFO};"
- fi
- elif [ ${LOG_TYPE} = "RUNNING" ] # 监控阀值配置格式是: RUNNING_KEYWORD:mins
- then
- # 检测运行识别关键字的合法性
- RUNNING_KEYWORD=$(echo $LOG_INFO|awk -F"|" '{print $2}'|awk -F: '{print $1}')
- if [ L${RUNNING_KEYWORD}R = LR ]
- then
- FUNC_ALARM "ERROR:${MONITOR_DATE}\n\t${LOG_INFO}\n\t配置的运行识别关键字不能为空"
- continue
- fi
- # 检测时间阀值的合法性
- VALUE=$(echo $LOG_PATH|awk -F"|" '{print $2}'|awk -F: '{print $2}'|sed -n '/^[0-9].*$/p')
- if [ L${VALUE}R = "LR" ]
- then
- FUNC_ALARM "ERROR:${MONITOR_DATE}\n\t${LOG_INFO}\n\t配置的监控阀值${VALUE}必须是自然数"
- continue
- fi
- # 判断日志中是否对于某关键字没有写入的时间超过阀值
- # 读取上次写入的时间,把时分分别赋值给LAST_HOUR,LAST_MIN
- eval $(find ${LOG_PATH} -name *${LOG_KEYWORD}* -ctime 0 -exec cat {} \; |
- tail -n5000 | grep -iE "${RUNNING_KEYWORD}" | tail -n1 |
- awk -F: '{print "LAST_HOUR=" $1 ";" "LAST_MIN=" $2}') # 日志该行须以:分隔前三个域分别表示时分秒
- # 读取当前监控时间,把时分秒分别赋值给MONITOR_HOUR,MONITOR_MIN
- eval $(date "+MONITOR_HOUR=%H;MONITOR_MIN=%M")
- if [ "L${LAST_HOUR}R" = "LR" -a "L${LAST_MIN}R" = "LR" ]
- then
- FUNC_ALARM "ALARM:[${MONITOR_DATE}]${DESCRIPT}:哇塞!居然没有写日志!"
- continue
- fi
- if [ ${MONITOR_HOUR} -lt ${LAST_HOUR} ]
- then
- # 处理跨1天的情况
- MONITOR_HOUR=$(expr ${MONITOR_HOUR} + 1440 )
- fi
- # LAST_TOTAL_MIN=LAST_HOUR*60+LAST_MIN
- eval $(echo | awk -v LAST_HOUR=$LAST_HOUR -v LAST_MIN=$LAST_MIN '
- { print "LAST_TOTAL_MIN=" LAST_HOUR*60+LAST_MIN}')
- # MONITOR_TOTAL_MIN=MONITOR_HOUR*60+MONITOR_MIN
- eval $(echo | awk -v MONITOR_HOUR=$MONITOR_HOUR -v MONITOR_MIN=$MONITOR_MIN '
- { print "MONITOR_TOTAL_MIN=" MONITOR_HOUR*60+MONITOR_MIN}')
- INTERVAL_MIN=$(expr $MONITOR_TOTAL_MIN - $LAST_TOTAL_MIN)
- if [ ${INTERVAL_MIN} -gt ${VALUE} ]
- then
- FUNC_ALARM "ALARM:[${MONITOR_DATE}]${DESCRIPT}:长达${INTERVAL_MIN}分钟没有有效运行,超出设定阀值${VALUE}"
- fi
- fi # RUNNING
- done < $LOG_CONFIG_FILE
- exit 0
复制代码
[ 本帖最后由 Cion 于 2008-8-28 10:06 编辑 ] |
|