do_page_fault函数处理流程 - Chinaunix

//*********************************缺页异常处理函数*******************************************
do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
//获取当前cpu正在运行的进程的进程描述符
//然后获取该进程的内存描述符
tsk = current;
mm = tsk->mm;
/* Get the faulting address: */
//获取出错的地址
address = read_cr2();
/*
* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
*
* NOTE! We MUST NOT take any locks for this case. We may
* be in an interrupt or a critical region, and should
* only copy the information from the master page table,
* nothing more.
*
* This verifies that the fault happens in kernel space
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 9) == 0.
*/
//页访问出错地址address在内核空间
if (unlikely(fault_in_kernel_space(address))) {
//检查标志位确定访问发生在"内核态"
if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
//如果是内核空间"非连续内存"的访问,
//则直接拷贝"内核页表项"到"用户页表项"
//如果"内核页表项"为null,说明内核有BUG,返回-1
if (vmalloc_fault(address) >= 0)
return;
}
//如果在"用户态"则直接进入"非法访问"处理函数
//如果vmalloc_fault返回-1,则表示内核BUG
bad_area_nosemaphore(regs, error_code, address);
//错误处理函数
// 1 "用户态"错误-->直接终止进程
// 2 "内核态"错误
// 系统调用参数错误 ---->终止进程/返回系统调用错误码
// 内核BUG ---->内核panic
return;
}
/*
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
*/
// 1 在中断中,此时没有进程上下文
// 2 在原子操作流程中
// 都不允许处理缺页异常
if (unlikely(in_atomic() || !mm)) {
bad_area_nosemaphore(regs, error_code, address);
return;
}
/*
* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in
* the kernel and should generate an OOPS. Unfortunately, in the
* case of an erroneous fault occurring in a code path which already
* holds mmap_sem we will deadlock attempting to validate the fault
* against the address space. Luckily the kernel only validly
* references user space from well defined areas of code, which are
* listed in the exceptions table.
*
* As the vast majority of faults will be valid we will only perform
* the source reference check when there is a possibility of a
* deadlock. Attempt to lock the address space, if we cannot we then
* validate the source. If this is invalid we can skip the address
* space check, thus avoiding the deadlock:
*/
//此时可以确定缺页地址address在"用户空间"了
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
//错误发生在"内核态",查看异常表
//如果在内核态引起缺页,则引起缺页的"指令地址"一定在"异常表"中
//如果"异常表"中返回指令地址,则说明可能是"请求调页",也可能是"非法访问"
//如果"异常表"中无地址,则肯定是内核错误
if ((error_code & PF_USER) == 0 &&
!search_exception_tables(regs->ip)) {
//内核panic
bad_area_nosemaphore(regs, error_code, address);
return;
}
down_read(&mm->mmap_sem);
} else {
/*
* The above down_read_trylock() might have succeeded in
* which case we'll have missed the might_sleep() from
* down_read():
*/
might_sleep();
}
//寻找address所在的vma
vma = find_vma(mm, address);
//如果address之后无vma,则肯定是非法访问
if (unlikely(!vma)) {
bad_area(regs, error_code, address);
return;
}
// 1 如果vma->start_address<=address,则直接跳到 "合法访问"阶段
// 2 如果vma->start_address>address,则也有可能是用户的"入栈行为"导致缺页
if (likely(vma->vm_start <= address))
goto good_area;
// "入栈"操作,则该vma的标志为 "向下增长"
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
bad_area(regs, error_code, address);
return;
}
// 确定缺页发生在"用户态"
if (error_code & PF_USER) {
/*
* Accessing the stack below %sp is always a bug.
* The large cushion allows instructions like enter
* and pusha to work. ("enter $65535, $31" pushes
* 32 pointers and then decrements %sp by 65535.)
*/
//验证缺页address和栈顶sp的关系
if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
bad_area(regs, error_code, address);
return;
}
}
//扩展栈
if (unlikely(expand_stack(vma, address))) {
bad_area(regs, error_code, address);
return;
}
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
good_area:
write = error_code & PF_WRITE;
// 再次验证"权限"
if (unlikely(access_error(error_code, write, vma))) {
bad_area_access_error(regs, error_code, address);
return;
}
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault:
*/
//分配新"页框"
fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
up_read(&mm->mmap_sem);
}
//*******************************访问权限验证函数********************************************
access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
{
//如果是"写操作"引起的缺页,则该vma必须可写
if (write) {
/* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
return 0;
}
/* read, present: */
//检查该页是否已经在RAM中,如果"特权位"置位表示页框在RAM中
//表示进程访问"有特权" 页框
if (unlikely(error_code & PF_PROT))
return 1;
/* read, not present: */
//如果该页不在内存中,该线性区必须可"读"
if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
return 1;
return 0;
}

复制代码