- 论坛徽章:
- 0
|
如题,请指教
一台V40z的机器,安装有Solaris10操作系统,系统报crash在sched进程,汗
相关信息如下:
root@SUN9 # mdb 6
Loading modules: [ unix krtld genunix specfs dtrace ufs ip sctp usba uhci s1394 fcp fctl emlxs nca lofs md cpc fcip random crypto logindmux ptm sppp nfs ipc ]
> ::status
debugging crash dump vmcore.6 (64-bit) from SUN9
operating system: 5.10 Generic_Patch (i86pc)
panic message:
BAD TRAP: type=e (#pf Page fault) rp=fffffe8001fed970 addr=100000027 occurred in module "genunix" due to an illegal access to a user
address
dump content: kernel pages only
> ::cpuinfo
ID ADDR FLG NRUN BSPL PRI RNRN KRNRN SWITCH THREAD PROC
0 fffffffffbc22ca0 1f 0 0 52 no no t-1 fffffe82f4280a60 dtlogin
1 ffffffff973ea000 1f 0 0 59 no no t-0 ffffffff97b21b20 init
2 fffffffffbc2a160 1b 0 0 60 no no t-0 fffffe8001fedc80 sched
3 ffffffff97ac2000 1f 1 0 -1 no no t-0 fffffe800069ac80 (idle)
4 ffffffff97ab2800 1f 0 0 -1 no no t-0 fffffe80006eec80 (idle)
5 ffffffff97ab2000 1f 0 0 -1 no no t-0 fffffe8000742c80 (idle)
6 ffffffff97a0c800 1f 0 0 0 no no t-2 ffffffff9ae7fc80 CTRR.exe
7 ffffffff97a0c000 1f 0 0 10 no no t-0 fffffe818b37ac60 Xsun
>::msgbuf
......
......
NOTICE: alloc: /export/home: file system full
NOTICE: alloc: /export/home: file system full
panic[cpu2]/thread=fffffe8001fedc80:
BAD TRAP: type=e (#pf Page fault) rp=fffffe8001fed970 addr=100000027 occurred in module "genunix" due to an illegal access to a user
address
sched:
#pf Page fault
Bad kernel fault at addr=0x100000027
pid=0, pc=0xfffffffffb9d37e8, sp=0xfffffe8001feda50, eflags=0x10206
cr0: 8005003b<pg,wp,ne,et,ts,mp,pe> cr4: 6f8<xmme,fxsr,pge,mce,pae,pse,de>
cr2: 100000027 cr3: df03000 cr8: c
rdi: 1 rsi: fffffe82a9551140 rdx: fffffe8001fedc80
rcx: 0 r8: 0 r9: 0
rax: ffffffff9764d800 rbx: ffffffff rbp: fffffe8001feda70
r10: fb0082c445e001ff r11: fffffffffbcc2de0 r12: fffffe82a9551140
r13: b r14: ffffffff975e7de0 r15: 0
fsb: ffffffff80000000 gsb: ffffffff9764d800 ds: 43
es: 43 fs: 0 gs: 1c3
trp: e err: 0 rip: fffffffffb9d37e8
cs: 28 rfl: 10206 rsp: fffffe8001feda50
ss: 30
fffffe8001fed880 unix:die+da (ffffffff975f2b48, 1fb94701a)
fffffe8001fed960 unix:trap+5ea ()
fffffe8001fed970 unix:_cmntrap+11b ()
fffffe8001feda70 genunix:freemsg+28 ()
fffffe8001fedab0 vuid3ps2:vuidmice_miocdata+108 ()
fffffe8001fedae0 vuid3ps2:vuidmice_wput+b5 ()
fffffe8001fedb40 unix:putnext+1f1 ()
fffffe8001fedb60 consms:consmslwserv+28 ()
fffffe8001fedb80 genunix:runservice+5a ()
fffffe8001fedba0 genunix:queue_service+3e ()
fffffe8001fedbd0 genunix:stream_service+63 ()
fffffe8001fedc60 genunix:taskq_d_thread+1ba ()
syncing file systems...
216
196
......
......
> freemsg::dis
freemsg: pushq %rbp
freemsg+1: movq %rsp,%rbp
freemsg+4: pushq %r14
freemsg+6: pushq %r13
freemsg+8: pushq %r12
freemsg+0xa: pushq %rbx
freemsg+0xb: movq %rdi,%rbx //%rbx应为传递的mp值
freemsg+0xe: movq %gs:0x10,%rax
freemsg+0x17: testb $0x2,0x430(%rax)
freemsg+0x1e: je +0x60 <freemsg+0x7e>
freemsg+0x20: jmp +0x6c <freemsg+0x8c>
freemsg+0x22: movl 0x28dd1c(%rip),%edi
freemsg+0x28: movq 0x28(%rbx),%r12 //dblk_t *dbp = mp->b_datap;
freemsg+0x2c: movq 0x10(%rbx),%r14 //mblk_t *mp_cont = mp->b_cont;
freemsg+0x30: testl %edi,%edi
freemsg+0x32: sete %dl
freemsg+0x35: xorl %eax,%eax
freemsg+0x37: testq %rbx,%rbx
freemsg+0x3a: setne %al
freemsg+0x3d: testl %eax,%edx
freemsg+0x3f: je +0x31 <freemsg+0x70>
freemsg+0x41: movq 0x60(%r12),%r13
freemsg+0x46: testq %r13,%r13
freemsg+0x49: je +0x27 <freemsg+0x70>
freemsg+0x4b: call -0x1ab2db <caller>
freemsg+0x50: movzbl 0x18(%r12),%ecx
freemsg+0x56: movq %rax,%rsi
freemsg+0x59: xorl %r8d,%r8d
freemsg+0x5c: movl $0x8,%edx
freemsg+0x61: movq %r13,%rdi
freemsg+0x64: call +0x4b2c <str_ftevent>
freemsg+0x69: nop
freemsg+0x6d: nop
freemsg+0x70: movq %rbx,%rdi //传递mp
freemsg+0x73: movq %r14,%rbx //mp = mp_cont,此时%rbx的值与%r14的值应该相等
freemsg+0x76: movq %r12,%rsi //传递dbp,此时%rsi与%r12的值相等,事实亦如此
freemsg+0x79: call *0x30(%r12) //(*db_free)(struct msgb *, struct datab *);
freemsg+0x7e: testq %rbx,%rbx
freemsg+0x81: jne -0x5f <freemsg+0x22>
freemsg+0x83: popq %rbx
freemsg+0x84: popq %r12
freemsg+0x86: popq %r13
freemsg+0x88: popq %r14
freemsg+0x8a: leave
freemsg+0x8b: ret
freemsg+0x8c: movq %rdi,%rsi
freemsg+0x8f: movq $0xfffffffffba5873c,%rdi
freemsg+0x96: call -0x67106 <ftrace_1>
freemsg+0x9b: jmp -0x1d <freemsg+0x7e>
>
在opensolaris上查到freemsg的源码如下(stream.c):
462 void
463 freemsg(mblk_t *mp)
464 {
465 FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
466 while (mp) {
467 dblk_t *dbp = mp->b_datap;
468 mblk_t *mp_cont = mp->b_cont;
469
470 ASSERT(dbp->db_ref > 0);
471 ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
472
473 STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
474
475 dbp->db_free(mp, dbp);
476 mp = mp_cont;
477 }
478 }
479
mblk_t与dblk_t在系统中的定义如下:
typedef struct msgb {
struct msgb *b_next;
struct msgb *b_prev;
struct msgb *b_cont;
unsigned char *b_rptr;
unsigned char *b_wptr;
struct datab *b_datap;
unsigned char b_band;
unsigned char b_ftflag; /* flow trace flag */
unsigned short b_flag;
queue_t *b_queue; /* for sync queues */
} mblk_t;
typedef struct datab {
frtn_t *db_frtnp;
unsigned char *db_base;
unsigned char *db_lim;
unsigned char db_ref;
unsigned char db_type;
unsigned char db_flags;
unsigned char db_struioflag;
pid_t db_cpid; /* cached pid, needs verification */
void *db_cache; /* kmem cache descriptor */
struct msgb *db_mblk;
void (*db_free)(struct msgb *, struct datab *);
void (*db_lastfree)(struct msgb *, struct datab *);
intptr_t db_cksumstart;
intptr_t db_cksumend;
intptr_t db_cksumstuff;
union {
double enforce_alignment;
unsigned char data[8];
struct {
union {
uint32_t u32;
uint16_t u16;
} cksum_val; /* used to store calculated cksum */
uint16_t flags;
uint16_t pad;
} cksum;
/*
* Union used for future extensions (pointer to data ?).
*/
} db_struioun;
fthdr_t *db_fthdr;
cred_t *db_credp; /* credential */
} dblk_t;
分析汇编和C的代码,我没有找到%rbx可以被赋予0x00000000ffffffff的地方和理由
其它的相关信息罗列如下:
> ::regs
%rax = 0xffffffff9764d800 %r9 = 0x0000000000000000
%rbx = 0x00000000ffffffff %r10 = 0xfb0082c445e001ff
%rcx = 0x0000000000000000 %r11 = 0xfffffffffbcc2de0 apic_cr8pri
%rdx = 0xfffffe8001fedc80 %r12 = 0xfffffe82a9551140
%rsi = 0xfffffe82a9551140 %r13 = 0x000000000000000b
%rdi = 0x0000000000000001 %r14 = 0xffffffff975e7de0
%r8 = 0x0000000000000000 %r15 = 0x0000000000000000
%rip = 0xfffffffffb9d37e8 freemsg+0x28
%rbp = 0xfffffe8001feda70
%rsp = 0xfffffe8001feda50
%rflags = 0x00010206
id=0 vip=0 vif=0 ac=0 vm=0 rf=1 nt=0 iopl=0x0
status=<of,df,IF,tf,sf,zf,af,PF,cf>
%cs = 0x0028 %ds = 0x0043 %es = 0x0043
%trapno = 0xe %fs = 0x0000 fsbase = 0x000000009764d800
%err = 0x0 %gs = 0x01c3 gsbase = 0x0000000000000000
> 0xfffffe82a9551140::print dblk_t //%r12指向的结构体,应该是dblk_t
{
db_frtnp = 0
db_base = 0
db_lim = 0
db_ref = 0x70
db_type = 0x1c
db_flags = 0xb
db_struioflag = 0xaa
db_cpid = 0xffffffff
db_cache = 0xffffffffaa0b1c98
db_mblk = 0xffffffffaa0b1c00
db_free = 0xbaddcafe00000000
db_lastfree = 0
db_cksumstart = 0
db_cksumend = 0
db_cksumstuff = 0
db_struioun = {
enforce_alignment = -NaN
data = [ 0xb0, 0x5, 0x7c, 0xd0, 0x81, 0xfe, 0xff, 0xff ]
cksum = {
cksum_val = {
u32 = 0xd07c05b0
u16 = 0x5b0
}
flags = 0xfe81
pad = 0xffff
}
}
db_fthdr = 0xfffffe81d07c05d4
db_credp = 0xfffffe81d07c0540
}
> 0xffffffff975e7de0::print mblk_t //%r14指向的结构体,应该是mblk_t
{
b_next = vuidmice_winit
b_prev = 0
b_cont = 0
b_rptr = 0xffffffff975f2108
b_wptr = 0
b_datap = 0xffffffff975f6c98
b_band = 0
b_ftflag = 0
b_flag = 0
b_queue = 0xbaddcafe00000822
}
请求帮助 |
|