- 论坛徽章:
- 0
|
10可用积分
部分系统信息如下:
[root@e6900a etc]# cat redhat-release
Red Hat Enterprise Linux AS release 4 (Nahant Update 4)
[root@e6900a etc]# cat fstab
# This file is edited by fstab-sync - see 'man fstab-sync' for details
LABEL=/ / ext3 defaults 1 1
LABEL=/boot /boot ext3 defaults 1 2
none /dev/pts devpts gid=5,mode=620 0 0
none /dev/shm tmpfs defaults 0 0
none /proc proc defaults 0 0
none /sys sysfs defaults 0 0
LABEL=SWAP-sda2 swap swap defaults 0 0
LABEL=SWAP-sda3 swap swap defaults 0 0
/dev/hda /media/cdrecorder auto pamconsole,exec,noauto,managed 0 0
[root@e6900a etc]# df -h
Filesystem 容量 已用 可用 已用% 挂载点
/dev/sda5 1.6T 395G 1.1T 27% /
/dev/sda1 99M 12M 83M 13% /boot
none 8.0G 0 8.0G 0% /dev/shm
top - 17:20:26 up 8 days, 5:08, 1 user, load average: 0.14, 0.12, 0.09
Tasks: 690 total, 1 running, 689 sleeping, 0 stopped, 0 zombie
Cpu0 : 0.3% us, 0.0% sy, 0.0% ni, 99.7% id, 0.0% wa, 0.0% hi, 0.0% si
Cpu1 : 0.3% us, 0.3% sy, 0.0% ni, 96.0% id, 3.3% wa, 0.0% hi, 0.0% si
Cpu2 : 0.0% us, 0.0% sy, 0.0% ni, 100.0% id, 0.0% wa, 0.0% hi, 0.0% si
Cpu3 : 0.0% us, 0.0% sy, 0.0% ni, 100.0% id, 0.0% wa, 0.0% hi, 0.0% si
Cpu4 : 0.0% us, 0.0% sy, 0.0% ni, 100.0% id, 0.0% wa, 0.0% hi, 0.0% si
Cpu5 : 0.0% us, 0.0% sy, 0.0% ni, 100.0% id, 0.0% wa, 0.0% hi, 0.0% si
Cpu6 : 0.0% us, 0.0% sy, 0.0% ni, 100.0% id, 0.0% wa, 0.0% hi, 0.0% si
Cpu7 : 0.0% us, 0.0% sy, 0.0% ni, 100.0% id, 0.0% wa, 0.0% hi, 0.0% si
Mem: 16621728k total, 11666284k used, 4955444k free, 64412k buffers
Swap: 4096552k total, 875136k used, 3221416k free, 7677404k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
7737 root 16 0 3936 1324 760 R 1 0.0 0:00.10 top
11471 root 17 0 977m 407m 75m S 1 2.5 19:59.27 java
4513 oracle 16 0 795m 99m 97m S 0 0.6 0:12.90 oracle
1 root 16 0 1988 556 472 S 0 0.0 0:16.66 init
2 root RT 0 0 0 0 S 0 0.0 0:00.09 migration/0
3 root 34 19 0 0 0 S 0 0.0 0:00.01 ksoftirqd/0
4 root RT 0 0 0 0 S 0 0.0 0:00.10 migration/1
5 root 34 19 0 0 0 S 0 0.0 0:00.41 ksoftirqd/1
6 root RT 0 0 0 0 S 0 0.0 0:00.08 migration/2
7 root 34 19 0 0 0 S 0 0.0 0:00.00 ksoftirqd/2
8 root RT 0 0 0 0 S 0 0.0 0:00.07 migration/3
9 root 34 19 0 0 0 S 0 0.0 0:00.00 ksoftirqd/3
10 root RT 0 0 0 0 S 0 0.0 0:00.07 migration/4
11 root 34 19 0 0 0 S 0 0.0 0:00.00 ksoftirqd/4
12 root RT 0 0 0 0 S 0 0.0 0:00.04 migration/5
13 root 34 19 0 0 0 S 0 0.0 0:00.00 ksoftirqd/5
14 root RT 0 0 0 0 S 0 0.0 0:00.03 migration/6
15 root 34 19 0 0 0 S 0 0.0 0:00.00 ksoftirqd/6
16 root RT 0 0 0 0 S 0 0.0 0:00.05 migration/7
17 root 34 19 0 0 0 S 0 0.0 0:00.00 ksoftirqd/7
18 root 5 -10 0 0 0 S 0 0.0 0:00.03 events/0
19 root 5 -10 0 0 0 S 0 0.0 0:00.01 events/1
20 root 6 -10 0 0 0 S 0 0.0 0:00.05 events/2
21 root 5 -10 0 0 0 S 0 0.0 0:00.02 events/3
22 root 5 -10 0 0 0 S 0 0.0 0:00.02 events/4
23 root 5 -10 0 0 0 S 0 0.0 0:00.01 events/5
24 root 5 -10 0 0 0 S 0 0.0 0:00.03 events/6
25 root 5 -10 0 0 0 S 0 0.0 0:00.02 events/7
26 root 5 -10 0 0 0 S 0 0.0 0:00.00 khelper
27 root 14 -10 0 0 0 S 0 0.0 0:00.00 kacpid
运行了大概半年时间,最近系统挂了几次,间隔时间一个月左右,请高手帮忙分析。
系统运行有5个Oracle实例,还有30来个其他服务。
宕机后,然后重启,查看日志,宕机前后没有任何异常记录。
下面的红字部分重启服务器了。
挂:停止相应,像Windows蓝屏一样,只是屏幕没有任何显示。【这里是抓图】
Jan 20 23:57:28 www vsftpd(pam_unix)[1697]: check pass; user unknown
Jan 20 23:57:28 www vsftpd(pam_unix)[1697]: authentication failure; logname= uid=0 euid=0 tty= ruser= rhost=208.91.131.2
Jan 20 23:57:31 www vsftpd(pam_unix)[1697]: check pass; user unknown
Jan 20 23:57:31 www vsftpd(pam_unix)[1697]: authentication failure; logname= uid=0 euid=0 tty= ruser= rhost=208.91.131.2
Jan 20 23:57:34 www vsftpd(pam_unix)[1697]: check pass; user unknown
Jan 20 23:57:34 www vsftpd(pam_unix)[1697]: authentication failure; logname= uid=0 euid=0 tty= ruser= rhost=208.91.131.2
Jan 20 23:57:37 www vsftpd(pam_unix)[1697]: check pass; user unknown
Jan 20 23:57:37 www vsftpd(pam_unix)[1697]: authentication failure; logname= uid=0 euid=0 tty= ruser= rhost=208.91.131.2
Jan 20 23:57:40 www vsftpd(pam_unix)[1697]: check pass; user unknown
Jan 20 23:57:40 www vsftpd(pam_unix)[1697]: authentication failure; logname= uid=0 euid=0 tty= ruser= rhost=208.91.131.2
Jan 20 23:57:43 www vsftpd(pam_unix)[1697]: check pass; user unknown
Jan 21 12:48:29 www syslogd 1.4.1: restart.
Jan 21 12:48:29 www syslog: syslogd startup succeeded
Jan 21 12:48:29 www kernel: klogd 1.4.1, log source = /proc/kmsg started.
Jan 21 12:48:29 www kernel: Linux version 2.6.9-42.ELsmp (bhcompile@hs20-bc1-1.build.redhat.com) (gcc version 3.4.6 20060404 (
Red Hat 3.4.6-2)) #1 SMP Wed Jul 12 23:27:17 EDT 2006
Jan 21 12:48:29 www kernel: BIOS-provided physical RAM map:
Jan 21 12:48:29 www kernel: ********************************************************
Jan 21 12:48:29 www kernel: * This system has more than 16 Gigabyte of memory. *
Jan 21 12:48:29 www kernel: * It is recommended that you read the release notes *
Jan 21 12:48:29 www kernel: * that accompany your copy of Red Hat Enterprise Linux *
Jan 21 12:48:29 www kernel: * about the recommended kernel for such configurations *
Jan 21 12:48:29 www kernel: ********************************************************
Jan 21 12:48:29 www kernel: BIOS-e820: 0000000000000000 - 000000000009ac00 (usable)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000000009ac00 - 0000000000100000 (reserved)
Jan 21 12:48:29 www kernel: BIOS-e820: 0000000000100000 - 000000009e25e000 (usable)
Jan 21 12:48:29 www syslog: klogd startup succeeded
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009e25e000 - 000000009e320000 (ACPI NVS)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009e320000 - 000000009fa32000 (usable)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009fa32000 - 000000009fa9a000 (reserved)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009fa9a000 - 000000009faac000 (usable)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009faac000 - 000000009fb1a000 (ACPI NVS)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009fb1a000 - 000000009fb27000 (usable)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009fb27000 - 000000009fb3a000 (ACPI data)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009fb3a000 - 000000009fc00000 (usable)
Jan 21 12:48:29 www kernel: BIOS-e820: 000000009fc00000 - 00000000b0000000 (reserved)
[ 本帖最后由 zhumao 于 2009-2-2 09:27 编辑 ] |
最佳答案
查看完整内容
日志太少了,如果可以就多贴一些。查看一下cpu的温度,很多死机可能与系统本身关系不大。看看日志cpu部分有‘Temperature above threshold’不?
|