- 论坛徽章:
- 0
|
#!/usr/bin/env python
import sys
import re
re_hour = re.compile(r'^\d\d:')
re_sp_split = re.compile(r'\s+')
#extract the day from time line
make_day = lambda d:d[4] + '|' + d[1] + '|' + d[2]
make_key = lambda d,c:d + '|' + c[0][0:2] + '|' + c[1]
cur_day = 'n/a|n/a|n/a'
log_cnt=dict()
for line in sys.stdin:
m = re_hour.match(line)
cols = re_sp_split.split(line)
if m is not None: #this is log line ,not date
key = make_key(cur_day, cols)
#date | hour | err_type
if key not in log_cnt:
log_cnt[key] = 1
else:
log_cnt[key] += 1
else:
cur_day=make_day(cols)
sorted_key = log_cnt.keys()
sorted_key.sort()
for key in sorted_key:
print '%s|%d' % (key, log_cnt[key])
|
[of@lbaby py]$ cat tuc
09:55:54: ERROR1 /tmp/error/log.3 50 times
Mon Jun 28 00:00:53 2009
09:55:54: ERROR1 /tmp/error/log.3 50 times
09:56:09: ERROR1 /tmp/error/log.14 50 times
10:56:12: ERROR1 /tmp/error/log.14 100 times
10:56:23: ERROR2 /tmp/error/log.5 50 times
11:56:26: ERROR2 /tmp/error/log.1 50 times
11:56:27: ERROR2 /tmp/error/log.5 100 times
Mon Jun 29 00:00:53 2009
15:56:29: ERROR3 /tmp/error/log.1 100 times
15:56:32: ERROR3 /tmp/error/log.1 150 times
16:56:33: ERROR4 /tmp/error/log.6 50 times
16:56:36: ERROR4 /tmp/error/log.6 100 times
16:56:40: ERROR4 /tmp/error/log.12 50 times
Mon Jun 30 00:00:53 2009
[of@lbaby py]$ cat tuc | ./ec.py
2009|Jun|28|09|ERROR1|2
2009|Jun|28|10|ERROR1|1
2009|Jun|28|10|ERROR2|1
2009|Jun|28|11|ERROR2|2
2009|Jun|29|15|ERROR3|2
2009|Jun|29|16|ERROR4|3
n/a|n/a|n/a|09|ERROR1|1
[of@lbaby py]$
看起来效果不错,看来是离不开re 模块了,可怜啊
[ 本帖最后由 lbaby 于 2009-7-7 18:47 编辑 ] |
|