论坛徽章:: 0

电梯直达

1楼 [收藏(0)] [报告]

发表于 2008-06-12 22:59 |只看该作者 |倒序浏览

softirq,tasklet和workqueue的分析
creator
sz111@126.com
creatorwu.cublog.cn

今天看了《深入理解Linux内核》的中断和异常一章，对Softirq,tasklet和workqueue做一个简单的总结和分析。

其实softirq和taskled都属于软中断，而工作队列是和软中断无关，仅仅是内核中的一个内核线程在等待工作任务，工作队列可以发送工作任务。不过他们还是有个共同点，就是都有延后执行的作用。
[color="#ff0000"] 1.首先看
do_softirq[color="#ff0000"]如何被调用和如何处理。
#ifdef
__ARCH_IRQ_EXIT_IRQS_DISABLED
#
define invoke_softirq() __do_softirq()
#else
#
define invoke_softirq() do_softirq()
#endif
/*

Exit
an interrupt context. Process softirqs if needed and possible
当从硬件中断退出的时候，会调用
invoke_softirq，其实也就是do_softirq()，不过arm是__do_softirq().这个在下面分析。

*/
void
irq_exit(void)
{
account_system_vtime(current);
trace_hardirq_exit();
sub_preempt_count(IRQ_EXIT_OFFSET);
/*
判断当前是否有硬件中断嵌套，并且软件中断是否处在pending状态，
in_interrupt：#define
irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
如果等于0说明是允许抢占。非0不允许。
注意：必须同时满足不在嵌套同时软件中断有pending（即：安装了软件处理）
*/
if
(!in_interrupt() && local_softirq_pending())
invoke_softirq();
#ifdef
CONFIG_NO_HZ
/*
Make sure that timer wheel updates are propagated */
if
(!in_interrupt() && idle_cpu(smp_processor_id()) &&
!need_resched())
tick_nohz_stop_sched_tick();
rcu_irq_exit();
#endif
preempt_enable_no_resched();
}
asmlinkage
void do_softirq(void)
{
__u32
pending;
unsigned
long flags;
      //
      // 这个函数判断，如果当前有硬件中断嵌套，或者
      // 有软中断正在执行时候，则马上返回。在这个
      // 入口判断主要是为了与 ksoftirqd 互斥。
      //
if (in_interrupt())
return;
      //
      // 关中断执行以下代码
      //
    local_irq_save(flags);
//判断是否有softirq pending
pending =
local_softirq_pending();
//如果有就进行真正的处理
if
(pending) {

__do_softirq();
}
local_irq_restore(flags);
}
/*
*
We restart softirq processing MAX_SOFTIRQ_RESTART times,
*
and we fall back to softirqd after that.
*
*
This number has been established via experimentation.
*
The two things to balance is latency against fairness -
*
we want to handle softirqs as soon as possible, but they
*
should not be able to lock up the box.
*/
//定义最大的软中断调用次数
#define
MAX_SOFTIRQ_RESTART 10
asmlinkage
void __do_softirq(void)
{
struct
softirq_action *h;
__u32
pending;
int
max_restart = MAX_SOFTIRQ_RESTART;
int
cpu;
//判断是否有softirq
pending
pending
= local_softirq_pending();
account_system_vtime(current);
//屏蔽其他软中断，所以软中断仅仅能一个在执行。
//static
inline void __local_bh_disable(unsigned long ip)
//{

//add_preempt_count(SOFTIRQ_OFFSET);这个知道吧，还记得刚才
in_interrupt的判断吧。
//barrier();
//}
__local_bh_disable((unsigned
long)__builtin_return_address(0));
trace_softirq_enter();
cpu
= smp_processor_id();
restart:
/*
Reset the pending bitmask before enabling irqs */
      //
      // 每次循环在允许硬件 ISR 强占前，首先重置软中断
      // 的标志位。
      //
set_softirq_pending(0);
local_irq_enable();//开中断
//static
struct softirq_action softirq_vec[32] 定义在此，为32，其实真正用到的仅仅6个。
//当然，后续Linux版本有新加入
//HI_SOFTIRQ=0,
//TIMER_SOFTIRQ,
//NET_TX_SOFTIRQ,
//NET_RX_SOFTIRQ,
//BLOCK_SOFTIRQ,
//TASKLET_SOFTIRQ,
      //
      // 这里要注意，以下代码运行时可以被硬件中断抢占，但
      // 这个硬件 ISR 执行完成后，它的所注册的软中断无法马上运行，
      // 别忘了，现在虽是开硬件中断执行，但前面的 __local_bh_disable()
      // 函数屏蔽了软中断。所以这种环境下只能被硬件中断抢占，但这
      // 个硬中断注册的软中断回调函数无法运行。要问为什么，那是因为
      // __local_bh_disable() 函数设置了一个标志当作互斥量，而这个
      // 标志正是上面的 irq_exit() 和 do_softirq() 函数中的
      // in_interrupt() 函数判断的条件之一，也就是说 in_interrupt()
      // 函数不仅检测硬中断而且还判断了软中断。所以在这个环境下触发
      // 硬中断时注册的软中断，根本无法重新进入到这个函数中来，只能
      // 是做一个标志，等待下面的重复循环（最大 MAX_SOFTIRQ_RESTART）
      // 才可能处理到这个时候触发的硬件中断所注册的软中断。
      //
      //
      // 得到软中断向量表。
      //
h = softirq_vec;
      //
      // 循环处理所有 softirq 软中断注册函数。
      //
do {
            //
            // 如果对应的软中断设置 pending 标志则表明
            // 需要进一步处理它所注册的函数
if (pending & 1) {
         // 在这里执行了这个软中断所注册的回调函数
h->action(h);
rcu_bh_qsctr_inc(cpu);
}
   //
      // 继续找，直到把软中断向量表中所有 pending 的软
      // 中断处理完成。
H++;
   //
从代码里可以看出按位操作，表明一次循环只
      // 处理 32 个软中断的回调函数
pending >>= 1;
}
while (pending);
      //
      // 关中断执行以下代码。注意：这里又关中断了，下面的
      // 代码执行过程中硬件中断无法抢占。
      //
local_irq_disable();
      //
      // 前面提到过，在刚才开硬件中断执行环境时只能被硬件中断
      // 抢占，在这个时候是无法处理软中断的，因为刚才开中
      // 断执行过程中可能多次被硬件中断抢占，每抢占一次就有可
      // 能注册一个软中断，所以要再重新取一次所有的软中断。
      // 以便下面的代码进行处理后跳回到 restart 处重复执行。
      //
pending =
local_softirq_pending();
      //
      // 如果在上面的开中断执行环境中触发了硬件中断，且每个都
      // 注册了一个软中断的话，这个软中断会设置 pending 位，
      // 但在当前一直屏蔽软中断的环境下无法得到执行，前面提
      // 到过，因为 irq_exit() 和 do_softirq() 根本无法进入到
      // 这个处理过程中来。这个在上面详细的记录过了。那么在
      // 这里又有了一个执行的机会。注意：虽然当前环境一直是
      // 处于屏蔽软中断执行的环境中，但在这里又给出了一个执行
      // 刚才在开中断环境过程中触发硬件中断时所注册的软中断的
      // 机会，其实只要理解了软中断机制就会知道，无非是在一些特
      // 定环境下调用 ISR 注册到软中断向量表里的函数而已。
      //
      //
      // 如果刚才触发的硬件中断注册了软中断，并且重复执行次数
      // 没有到 10 次的话，那么则跳转到 restart 标志处重复以上
      // 所介绍的所有步骤：设置软中断标志位，重新开中断执行...
      // 注意：这里是要两个条件都满足的情况下才可能重复以上步骤。
      //
if
(pending && --max_restart)
goto
restart;
      //
      // 如果以上步骤重复了 10 次后还有 pending 的软中断的话，
      // 那么系统在一定时间内可能达到了一个峰值，为了平衡这点。
      // 系统专门建立了一个 ksoftirqd 线程来处理，这样避免在一
      // 定时间内负荷太大。这个 ksoftirqd 线程本身是一个大循环，
      // 在某些条件下为了不负载过重，它是可以被其他进程抢占的，
      // 但注意，它是显示的调用了 preempt_xxx() 和 schedule()
      // 才会被抢占和切换的。这么做的原因是因为在它一旦调用
      // local_softirq_pending() 函数检测到有 pending 的软中断
      // 需要处理的时候，则会显示的调用 do_softirq() 来处理软中
      // 断。也就是说，下面代码唤醒的 ksoftirqd 线程有可能会回
      // 到这个函数当中来，尤其是在系统需要响应很多软中断的情况
      // 下，它的调用入口是 do_softirq()，这也就是为什么在 do_softirq()
      // 的入口处也会用 in_interrupt()  函数来判断是否有软中断
      // 正在处理的原因了，目的还是为了防止重入。ksoftirqd 实现
      // 看下面对 ksoftirqd() 函数的分析。
      //
if
(pending)
            //
            // 此函数实际是调用 wake_up_process() 来唤醒 ksoftirqd
            //
wakeup_softirqd();
trace_softirq_exit();
account_system_vtime(current);
      //
      // 到最后才开软中断执行环境，允许软中断执行。注意：这里
      // 使用的不是 local_bh_enable()，不会再次触发 do_softirq()
      // 的调用。
      //
_local_bh_enable();
}
static int ksoftirqd(void * __bind_cpu)
{
      //
      // 显示调用此函数设置当前进程的静态优先级。当然，
      // 这个优先级会随调度器策略而变化。
      //
      set_user_nice(current, 19);
      //
      // 设置当前进程不允许被挂启
      //
      current->flags |= PF_NOFREEZE;
      //
      // 设置当前进程状态为可中断的状态，这种睡眠状
      // 态可响应信号处理等。
      //
      set_current_state(TASK_INTERRUPTIBLE);
      //
      // 下面是一个大循环，循环判断当前进程是否会停止，
      // 不会则继续判断当前是否有 pending 的软中断需
      // 要处理。
      //
      while (!kthread_should_stop()) {
            //
            // 如果可以进行处理，那么在此处理期间内禁止
            // 当前进程被抢占。
            //
            preempt_disable();
            //
            // 首先判断系统当前没有需要处理的 pending 状态的
            // 软中断
            //
            if (!local_softirq_pending()) {
                     //
                     // 没有的话在主动放弃 CPU 前先要允许抢占，因为
                     // 一直是在不允许抢占状态下执行的代码。
                     //
                     preempt_enable_no_resched();
                     //
                     // 显示调用此函数主动放弃 CPU 将当前进程放入睡眠队列，
                     // 并切换新的进程执行（调度器相关不记录在此）
                     //
                     schedule();
                     //
                     // 注意：如果当前显示调用 schedule() 函数主动切换的进
                     // 程再次被调度执行的话，那么将从调用这个函数的下一条
                     // 语句开始执行。也就是说，在这里当前进程再次被执行的
                     // 话，将会执行下面的 preempt_disable() 函数。
                     //
                     //
                     // 当进程再度被调度时，在以下处理期间内禁止当前进程
                     // 被抢占。
                     //
                     preempt_disable();
            }
            //
            // 设置当前进程为运行状态。注意：已经设置了当前进程不可抢占
            // 在进入循环后，以上两个分支不论走哪个都会执行到这里。一是
            // 进入循环时就有 pending 的软中断需要执行时。二是进入循环时
            // 没有 pending 的软中断，当前进程再次被调度获得 CPU 时继续
            // 执行时。
            //
            __set_current_state(TASK_RUNNING);
            //
            // 循环判断是否有 pending 的软中断，如果有则调用 do_softirq()
            // 来做具体处理。注意：这里又是一个 do_softirq() 的入口点，
            // 那么在 __do_softirq() 当中循环处理 10 次软中断的回调函数
            // 后，如果还有 pending 的话，会又调用到这里。那么在这里则
            // 又会有可能去调用 __do_softirq() 来处理软中断回调函数。在前
            // 面介绍 __do_softirq() 时已经提到过，处理 10 次还处理不完的
            // 话说明系统正处于繁忙状态。根据以上分析，我们可以试想如果在
            // 系统非常繁忙时，这个进程将会与 do_softirq() 相互交替执行，
            // 这时此进程占用 CPU 应该会很高，虽然下面的 cond_resched()
            // 函数做了一些处理，它在处理完一轮软中断后当前处理进程可能会
            // 因被调度而减少 CPU 负荷，但是在非常繁忙时这个进程仍然有可
            // 能大量占用 CPU。
            //
            while (local_softirq_pending()) {
                     /* Preempt disable stops cpu going offline.
                        If already offline, we'll be on wrong CPU:
                        don't process */
                     if (cpu_is_offline((long)__bind_cpu))
                              //
                              // 如果当前被关联的 CPU 无法继续处理则跳转
                              // 到 wait_to_die 标记出，等待结束并退出。
                              //
                              goto wait_to_die;
                     //
                     // 执行 do_softirq() 来处理具体的软中断回调函数。注
                     // 意：如果此时有一个正在处理的软中断的话，则会马上
                     // 返回，还记得前面介绍的 in_interrupt() 函数么。
                     //
                     do_softirq();
                     //
                     // 允许当前进程被抢占。
                     //
                     preempt_enable_no_resched();

                     //
                     // 这个函数有可能间接的调用 schedule() 来切换当前
                     // 进程，而且上面已经允许当前进程可被抢占。也就是
                     // 说在处理完一轮软中断回调函数时，有可能会切换到
                     // 其他进程。我认为这样做的目的一是为了在某些负载
                     // 超标的情况下不至于让这个进程长时间大量的占用 CPU，
                     // 二是让在有很多软中断需要处理时不至于让其他进程
                     // 得不到响应。
                     //
                     cond_resched();
                     //
                     // 禁止当前进程被抢占。
                     //
                     preempt_disable();
                     //
                     // 处理完所有软中断了吗？没有的话继续循环以上步骤
                     //
            }
            //
            // 待一切都处理完成后，允许当前进程被抢占，并设置
            // 当前进程状态为可中断状态，继续循环以上所有过程。
            //
            preempt_enable();
            set_current_state(TASK_INTERRUPTIBLE);
      }

      //
      // 如果将会停止则设置当前进程为运行状态后直接返回。
      // 调度器会根据优先级来使当前进程运行。
      //
      __set_current_state(TASK_RUNNING);
      return 0;
//
// 一直等待到当前进程被停止
//
wait_to_die:
      //
      // 允许当前进程被抢占。
      //
      preempt_enable();
      /* Wait for kthread_stop */
      //
      // 设置当前进程状态为可中断的状态，这种睡眠状
      // 态可响应信号处理等。
      //
      set_current_state(TASK_INTERRUPTIBLE);
      //
      // 判断当前进程是否会被停止，如果不是的话
      // 则设置进程状态为可中断状态并放弃当前 CPU
      // 主动切换。也就是说这里将一直等待当前进程
      // 将被停止时候才结束。
      //
      while (!kthread_should_stop()) {
            schedule();
            set_current_state(TASK_INTERRUPTIBLE);
      }
      //
      // 如果将会停止则设置当前进程为运行状态后直接返回。
      // 调度器会根据优先级来使当前进程运行。
      //
      __set_current_state(TASK_RUNNING);
      return 0;
}
[color="#ff0000"]2.[color="#ff0000"]再来看tasklet[color="#ff0000"]如何被调用和如何处理。

[color="#000000"]tasklet[color="#000000"]主要是I/O[color="#000000"]驱动程序中实现可延迟函数的首选方法。tasklet[color="#000000"]是建立在HI_SOFTIRQ[color="#000000"]和TASKLET_SOFTIRQ[color="#000000"]软中断之上。几个tasklet[color="#000000"]可以同时与一个软中断关联，
[color="#000000"]每个tasklet[color="#000000"]执行自己的函数。

tasklet[color="#000000"]和高优先级的tasklet[color="#000000"]分别存放在tasklet_vec[color="#000000"]和tasklet_hi_vec[color="#000000"]数组中。
struct
tasklet_struct
[color="#000000"]{
struct
tasklet_struct *next;
unsigned
long state;
atomic_t
count;
void
(*func)(unsigned long);
unsigned
long data;
[color="#000000"]};
static
void tasklet_action(struct softirq_action *a)
[color="#000000"]{
struct
tasklet_struct *list;
[color="#000000"] //[color="#000000"]禁止本地中断
[color="#000000"] local_irq_disable();

//[color="#000000"]把tasklet_vec[n][color="#000000"]（n[color="#000000"]为cpu[color="#000000"]号）指向的链表的地址存入局部变量list
list
= __get_cpu_var(tasklet_vec).list;

//[color="#000000"]把tasklet_vec[n][color="#000000"]（n[color="#000000"]为cpu[color="#000000"]号）的值设定为NULL[color="#000000"]，因此已经调度的tasklet[color="#000000"]描述符的链表被清空。
__get_cpu_var(tasklet_vec).list
= NULL;
[color="#000000"] //[color="#000000"]打开中断
[color="#000000"] local_irq_enable();
while
(list) {
struct
tasklet_struct *t = list;
list
= list->next;
[color="#000000"]//[color="#000000"]查看count[color="#000000"]字段，检查tasklet是否被禁止，如果是，就清
  TASKLET_STATE_SCHED
[color="#000000"]//[color="#000000"]同时执行tasklet[color="#000000"]函数
if
(tasklet_trylock(t)) {
if
(!atomic_read(&t->count)) {
if
(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
[color="#000000"] BUG();
[color="#000000"] t->func(t->data);
[color="#000000"] tasklet_unlock(t);
[color="#000000"] continue;
[color="#000000"] }
[color="#000000"] tasklet_unlock(t);
[color="#000000"] }
[color="#000000"] local_irq_disable();
t->next
= __get_cpu_var(tasklet_vec).list;
__get_cpu_var(tasklet_vec).list
= t;
[color="#000000"] __raise_softirq_irqoff(TASKLET_SOFTIRQ);
[color="#000000"] local_irq_enable();
[color="#000000"] }
[color="#000000"]}
[color="#ff0000"]3.[color="#ff0000"]最后是看workqueue[color="#ff0000"]处理。

[color="#000000"]工作队列其实比较单纯，并不在中断的上下文进行处理，是在进程上下文进行处理的，这点非常重要。很多情况需要在进程上下文进行处理的。因为中断上下文不可以做进程切换。工作队列里面的函数是通过内核线程来执行的。
[color="#000000"]主要的一个数据结构struct
workqueue_struct[color="#000000"]，[color="#000000"]它包含了
cpu_workqueue_struct [color="#000000"]如下：
[color="#000000"]/*
*
The per-CPU workqueue (if single thread, we always use the first
*
possible cpu).
[color="#000000"] */
struct
cpu_workqueue_struct {
spinlock_t
lock;//[color="#000000"]保护数据的lock
struct
list_head worklist;//[color="#000000"]挂起链表的头结点，集中了工作队列中所有挂起函数。
wait_queue_head_t
more_work;//[color="#000000"]等待队列，其中的工作者线程因为等待工作而处于休眠
struct
work_struct *current_work;//[color="#000000"]等待队列，其中的进程由于等待工作队列被刷新而处于休眠。
struct
workqueue_struct *wq;//指向workqueue_struct
[color="#000000"]结构的指针。
struct
task_struct *thread;//[color="#000000"]指向工作者线程的描述符。
int
run_depth; /* Detect run_workqueue() recursion depth */
}
____cacheline_aligned;
struct
work_struct {
atomic_long_t
data;  //[color="#000000"]传给挂起函数的参数，是个指针。
#define
WORK_STRUCT_PENDING 0 /* T if work item pending execution */
#define
WORK_STRUCT_FLAG_MASK (3UL)
#define
WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
struct
list_head entry; //[color="#000000"]如果函数已经在工作队列链表中，它为1[color="#000000"]，否则为0
work_func_t
func;    //[color="#000000"]挂起函数的指针
#ifdef
CONFIG_LOCKDEP
struct
lockdep_map lockdep_map;
[color="#000000"]#endif
[color="#000000"]};
[color="#000000"]

[color="#ff0000"]创建一个工作队列create_workqueue.

[color="#ff0000"]把函数插入工作队列：queue_work,[color="#ff0000"]它同时会唤醒内核线程（worker_thread[color="#ff0000"]）
[color="#000000"]//[color="#000000"]内核线程是一直在执行的。
static
int worker_thread(void *__cwq)
[color="#000000"]{
struct
cpu_workqueue_struct *cwq = __cwq;
[color="#000000"] DEFINE_WAIT(wait);
if
(cwq->wq->freezeable)
[color="#000000"] set_freezable();
set_user_nice(current,
-5);
for
(;;) {
prepare_to_wait(&cwq->more_work,
&wait, TASK_INTERRUPTIBLE);
if
(!freezing(current) &&

!kthread_should_stop() &&

list_empty(&cwq->worklist))
[color="#000000"] schedule();
finish_wait(&cwq->more_work,
&wait);
[color="#000000"] try_to_freeze();
if
(kthread_should_stop())
[color="#000000"] break;
[color="#000000"] run_workqueue(cwq);
[color="#000000"] }
return
0;
[color="#000000"]}

工作队列的应用：我在分析block[color="#000000"]的时候，就发现它使用了一个kblockd（static
struct workqueue_struct *kblockd_workqueue;[color="#000000"]）的工作队列。
[color="#000000"]
[color="#000000"]注：本文部分参考和引用了《Linux[color="#000000"]的内核软中断（softirq）执行分析》，作者：skid
[color="#000000"]http://linux.ccidnet.com/art/741/20070612/1110075_1.html

本文来自ChinaUnix博客，如果查看原文请点：http://blog.chinaunix.net/u1/49088/showart_973671.html

文库|博客

返回列表

Chinaunix › 论坛 › 操作系统 › Linux新手园地 › Linux文档专区 › softirq,tasklet和workqueue的分析

softirq,tasklet和workqueue的分析 [复制链接]