- 论坛徽章:
- 0
|
下面是2.6内核中抢占部分的相关代码,其实抢占主要是实现了中断返回到进程的内核空间时的任务调度。
File:arch/i386/kernel/entry.S
140 # userspace resumption stub bypassing syscall exit tracing
141 ALIGN
142 ret_from_exception:
143 preempt_stop
在抢占打开的情况下,上面的语句只是关闭当前cpu的中断。不仔细看,此处可能和下面cli重复,其实不然,这保证了此处到下面的cli之间进程不被抢占。
144 ret_from_intr:
145 GET_THREAD_INFO(%ebp)
146 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
147 movb CS(%esp), %al
取得当前进程的一些信息,其中包括EFLAGS和CS
148 testl $(VM_MASK | 3), %eax
测试系统进入中断或者是异常的时候是否是用户空间
149 jz resume_kernel
如果不是将跳转到resume_kernel继续执行
150 ENTRY(resume_userspace)
151 cli # make sure we don't miss an interrupt
152 # setting need_resched or sigpending
153 # between sampling and the iret
154 movl TI_flags(%ebp), %ecx
155 andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
156 # int/exception return?
157 jne work_pending
158 jmp restore_all
159
160 #ifdef CONFIG_PREEMPT
161 ENTRY(resume_kernel)
162 cli
关闭中断
163 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
测试当前的抢占计数器是否为零,只有为零的时候才允许调度。
164 jnz restore_nocheck
不为零,不进行调度,这个时候很可能是此进程持有某个锁,处于临界区,或者说是原子上下文中,比如说他返回的是软中断上下文,或者是中断嵌套中的上一级中断。
165 need_resched:
166 movl TI_flags(%ebp), %ecx # need_resched set ?
167 testb $_TIF_NEED_RESCHED, %cl
168 jz restore_all
测试当前进程的需要被重新调度位是否设置,如果没有设置不进行调度。
169 testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
170 jz restore_all
测试当前进程在进入中断之前,中断是否是开着的。有人可能问了,关了中断,他就不能走到这步了。其实不然,我们不要忘记了那些不可屏蔽的中断,或者称他们为异常,他们即使是在关闭中断的条件下,仍然可能跑到这里。但是为什么开着中断的情况下引起的异常是能够引起调度的?因为,这表明那个程序没有处于原子上下文,所以,还是可以安全地被调度的。
171 call preempt_schedule_irq
运行调度程序
172 jmp need_resched
再次测试是否需要调度
173 #endif
File:kernel/sched.c
3174 /*
3175 * this is is the entry point to schedule() from kernel preemption
3176 * off of irq context.
3177 * Note, that this is called and return with irqs disabled. This will
3178 * protect us against recursive calling from irq.
3179 */
3180 asmlinkage void __sched preempt_schedule_irq(void)
3181 {
3182 struct thread_info *ti = current_thread_info();
3183 #ifdef CONFIG_PREEMPT_BKL
3184 struct task_struct *task = current;
3185 int saved_lock_depth;
3186 #endif
3187 /* Catch callers which need to be fixed*/
3188 BUG_ON(ti->preempt_count || !irqs_disabled());
再次检查此函数是否被正确调用。
3189
3190 need_resched:
3191 add_preempt_count(PREEMPT_ACTIVE);
将抢占计数器加上一个数,禁止抢占再次发生。
3192 /*
3193 * We keep the big kernel semaphore locked, but we
3194 * clear ->lock_depth so that schedule() doesnt
3195 * auto-release the semaphore:
3196 */
3197 #ifdef CONFIG_PREEMPT_BKL
3198 saved_lock_depth = task->lock_depth;
3199 task->lock_depth = -1;
3200 #endif
3201 local_irq_enable();
因为我们是关闭了中断走到这里的,所以,运行调度程序之前,需要先打开中断。
3202 schedule();
等本进程再次被调度执行的时候,仍然需要禁止中断。
3203 local_irq_disable();
3204 #ifdef CONFIG_PREEMPT_BKL
3205 task->lock_depth = saved_lock_depth;
3206 #endif
3207 sub_preempt_count(PREEMPT_ACTIVE);
与前面的抢占计数器的加法相呼应。
3208
3209 /* we could miss a preemption opportunity between schedule and now */
3210 barrier();
3211 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
3212 goto need_resched;
3213 }
如果这个进程再次需要被调度,那么重新调度,不过如果真是这样,那么这个进程真是点被极了,只开了中断那么一小会就被抢占了。按上面来看,只是barrier一个语句;)
3214
3215 #endif /* CONFIG_PREEMPT */
由于引入了抢占,那么在持有某个锁的时候,如果此进程被调度出CPU,是相当危险的,很可能引起死锁,为此在每个加锁函数中都有禁止抢占的操作:
File:kernel/spinlock.c
153 void __lockfunc _spin_lock(spinlock_t *lock)
154 {
155 preempt_disable();
156 _raw_spin_lock(lock);
157 }
当然解锁函数中会有打开抢占的操作与之呼应:
File:kernel/spinlock.c
260 void __lockfunc _spin_unlock(spinlock_t *lock)
261 {
262 _raw_spin_unlock(lock);
263 preempt_enable();
264 }除了上面的“强制性”的抢占,2.6内核还有自愿的抢占:
File:include/linux/kernel.h
63 # define might_resched() cond_resched()
64 #else
65 # define might_resched() do { } while (0)
66 #endif
67
68 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
69 void __might_sleep(char *file, int line);
70 # define might_sleep() \
71 do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
72 #else
73 # define might_sleep() do { might_resched(); } while (0)
74 #endif
File:kernel/sched.c
4086 static inline void __cond_resched(void)
4087 {
4088 /*
4089 * The BKS might be reacquired before we have dropped
4090 * PREEMPT_ACTIVE, which could trigger a second
4091 * cond_resched() call.
4092 */
4093 if (unlikely(preempt_count()))
4094 return;
4095 do {
4096 add_preempt_count(PREEMPT_ACTIVE);
4097 schedule();
4098 sub_preempt_count(PREEMPT_ACTIVE);
4099 } while (need_resched());
4100 }
4101
4102 int __sched cond_resched(void)
4103 {
4104 if (need_resched()) {
4105 __cond_resched();
4106 return 1;
4107 }
4108 return 0;
4109 }
可以看见might_sleep宏就是检查是否需要重新调度,如果是,则进行调度,无论这个时候进程是位于内核空间还是用户空间(哈哈,实际上,这个宏肯定是位于内核空间的,什么原因,自己想吧)。那么这个宏有都是插在那些地方呢?用cscope简单的查看一下:
1 34 arch/i386/lib/usercopy.c >
might_sleep(); \
2 124 arch/i386/lib/usercopy.c >
might_sleep(); \
3 156 arch/i386/lib/usercopy.c >
might_sleep();
4 198 arch/i386/lib/usercopy.c >
might_sleep();
5 607 arch/i386/lib/usercopy.c >
might_sleep();
6 634 arch/i386/lib/usercopy.c >
might_sleep();
7 6 arch/i386/mm/highmem.c >
might_sleep();
8 2838 block/ll_rw_blk.c >
might_sleep();
9 1943 drivers/char/vt.c >
might_sleep();
10 2871 drivers/char/vt.c >
might_sleep();
11 2939 drivers/char/vt.c >
might_sleep();
12 693 drivers/infiniband/hw/mthca/mthca_cq.c >
might_sleep();
13 824 drivers/infiniband/hw/mthca/mthca_cq.c >
might_sleep();
14 343 drivers/infiniband/hw/mthca/mthca_mr.c >
might_sleep();
15 470 drivers/infiniband/hw/mthca/mthca_mr.c >
might_sleep();
16 498 drivers/infiniband/hw/mthca/mthca_mr.c >
might_sleep();
17 46 drivers/infiniband/hw/mthca/mthca_pd.c >
might_sleep();
18 69 drivers/infiniband/hw/mthca/mthca_pd.c >
might_sleep();
19 1420 drivers/usb/core/hcd.c >
might_sleep ();
20 1468 fs/buffer.c >
might_sleep();
21 1706 fs/buffer.c >
might_sleep();
22 153 fs/dcache.c >
might_sleep();
23 71 fs/ext3/inode.c >
might_sleep();
24 2951 fs/ext3/inode.c >
might_sleep();
25 135 fs/file_table.c >
might_sleep();
26 417 fs/fs-writeback.c >
might_sleep();
27 567 fs/fs-writeback.c >
might_sleep();
28 248 fs/inode.c >
might_sleep();
29 336 fs/jbd/revoke.c >
might_sleep();
30 982 fs/locks.c >
might_sleep ();
31 1460 fs/locks.c >
might_sleep();
32 2545 fs/nfs/nfs4proc.c >
might_sleep();
33 2567 fs/nfs/nfs4proc.c >
might_sleep();
34 751 fs/nfs/nfs4state.c >
might_sleep();
35 589 fs/nfs/write.c >
might_sleep();
36 1944 fs/proc/base.c >
might_sleep();
37 51 include/asm-i386/checksum.h >
might_sleep();
38 184 include/asm-i386/checksum.h >
might_sleep();
39 99 include/asm-i386/semaphore.h >
might_sleep();
40 123 include/asm-i386/semaphore.h >
might_sleep();
41 438 include/asm-i386/uaccess.h >
might_sleep();
42 483 include/asm-i386/uaccess.h >
might_sleep();
43 76 include/linux/kernel.h >
#define 6 #define might_sleep_if(cond...ely(cond)) might_sleep(); } while (0)
44 202 mm/mmap.c >
might_sleep();
45 85 mm/rmap.c >
might_sleep();
哈哈,抢占点还是蛮多的,不是么?
本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u/5251/showart_142902.html |
|