浅析Linux的软中断的实现

独孤九贱 发表于 2011-06-15 11:12

作者：独孤九贱
平台：2.6.31.13 + x86 32位
供仅讨论学习之处，不对错误之处负责，转载请注明出处。

1、软中断

软中断的原理就略过了，讲内核的书上都有，此处省略1500字。。。。。。

1.1 注册
还是以我最熟悉的两个老朋友做为开篇：

open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
open_softirq向内核注册一个软中断，其实质是设置软中断向量表相应槽位，注册其处理函数：void open_softirq(int nr, void (*action)(struct softirq_action *))
{
softirq_vec.action = action;
}softirq_vec是整个软中断的向量表:struct softirq_action
{
void (*action)(struct softirq_action *);
};

static struct softirq_action softirq_vec __cacheline_aligned_in_smp;NR_SOFTIRQS是最大软中断向量数，内核支持的所有软中断如下：enum
{
HI_SOFTIRQ=0,
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */

NR_SOFTIRQS
};好像后为为RPS新增了一个，不过这我的内核版本偏低。

1.2 激活

当需要调用软中断时，需要调用raise_softirq函数激活软中断，这里使用术语“激活”而非“调用”，
是因为在很多情况下不能直接调用软中断。所以只能快速地将其标志为“可执行”，等待未来某一时刻调用。
为什么“在很多情况下不能直接调用软中断”?试想一下下半部引入的理念，就是为了让上半部更快地执行。
如果在中断程序代码中直接调用软中断函数，那么就失去了上半部与下半部的区别，也就是失去了其存在的意义。

内核使用一个名为__softirq_pending的位图来描述软中断，每一个位对应一个软中断,位图包含在结构irq_stat中：typedef struct {
unsigned int __softirq_pending;
……
} ____cacheline_aligned irq_cpustat_t;

DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);宏or_softirq_pending用于设置相应的位（位或操作）：#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))local_softirq_pending用于取得整个位图（而非某一位）：#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)宏__raise_softirq_irqoff是or_softirq_pending的包裹：#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)raise_softirq_irqoff通过调用__raise_softirq_irqoff实现激活软中断，它的参数nr即位软中断对应的位图槽位：/*
* This function must run with irqs disabled!
*/
inline void raise_softirq_irqoff(unsigned int nr)
{
//置位图，即标记为可执行状态
__raise_softirq_irqoff(nr);

/*
* If we're in an interrupt or softirq, we're done
* (this also catches softirq-disabled code). We will
* actually run the softirq once we return from
* the irq or softirq.
*
* Otherwise we wake up ksoftirqd to make sure we
* schedule the softirq soon.
*/
//设置了位图后，可以判断是否已经没有在中断上下文中了，如果没有，则是一个立即调用软中断的好时机。
//in_interrupt另一个作用是判断软中断是否被禁用。
//wakeup_softirqd唤醒软中断的守护进程ksoftirq。
if (!in_interrupt())
wakeup_softirqd();
}现在可以来看"激活"软中断的所有含义了，raise_softirq函数完成这一操作：
void raise_softirq(unsigned int nr)
{
unsigned long flags;

//所有操作，应该关闭中断，避免嵌套调用
local_irq_save(flags);
raise_softirq_irqoff(nr);
local_irq_restore(flags);
}可见，激活的操作，主要是两点：
<1>、最重要的，就是置相应的位图，等待将来被处理；
<2>、如果此时已经没有在中断上下文中，则立即调用(其实是内核线程的唤醒操作)，现在就是将来；

2、调度时机
是的，除了raise_softirq在，可能会（嗯，重要的是“可能”）通过wakeup_softirqd唤醒ksoftirqd外，还得明白软中断的其它调用时机。

A、当do_IRQ完成了I/O中断时调用irq_exit:
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
# define invoke_softirq() __do_softirq()
#else
# define invoke_softirq() do_softirq()
#endif

void irq_exit(void)
{
account_system_vtime(current);
trace_hardirq_exit();
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq(); //调用软中断B、如果系统使用I/O APIC，在处理完本地时钟中断时：void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
{
……
irq_exit();
……
}C、local_bh_enable

local_bh_enable就是打开下半部，当然重中之中就是软中断了：void local_bh_enable(void)
{
_local_bh_enable_ip((unsigned long)__builtin_return_address(0));
}

static inline void _local_bh_enable_ip(unsigned long ip)
{
……

if (unlikely(!in_interrupt() && local_softirq_pending()))
do_softirq();

……
}D、在SMP中，当CPU处理完被CALL_FUNCTION_VECTOR处理器间中断所触发的函数时：
唔，对多核中CPU的之间的通信不熟，不太清楚这个机制……:em06:

3、do_softirq

不论是哪种调用方式，最终都会触发到软中断的核心处理函数do_softirq，它处理当前CPU上的所有软中断。
内核将软中断设计尽量与平台无关，但是在某些情况下，它们还是会有差异，先来看一个x86 32位的do_softirq版本：asmlinkage void do_softirq(void)
{
unsigned long flags;
struct thread_info *curctx;
union irq_ctx *irqctx;
u32 *isp;

//软中断不能在中断上下文内嵌套调用。中断处理程序或下半部采用的是"激活"方式。
if (in_interrupt())
return;

//禁止中断，保存中断标志
local_irq_save(flags);
//内核使用一个CPU位图，确实几个软中断可以同时在不同的CPU上运行，包括相同的软中断。例如，
//NET_RX_SOFTIRQ可以同时跑在多个处理器上。
//local_softirq_pending用于确定当前CPU的所有位图是否被设置。即是否有软中断等待处理。
//回想一下经常发生的网卡接收数据处理：当网卡中断落在哪一个CPU上时，与之相应的软中断函数就会在其上执行。
//从这里来看，实质就是哪个网卡中断落在相应的CPU上，CPU置其软中断位图，这里做相应的检测（这里local_softirq_pending只
//是一个总的判断，后面还有按位的判断），检测到有相应的位，执行之
if (local_softirq_pending()) {
//取得线程描述符
curctx = current_thread_info();
//构造中断上下文结构，softirq_ctx是每个CPU的软中断上下文
//static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
//这里先取得当前CPU的软中断上下文，然后为其赋初始值——保存当前进程和栈指针
irqctx = __get_cpu_var(softirq_ctx);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;

/* build the stack frame on the softirq stack */
//构造中断栈帧
isp = (u32 *) ((char *)irqctx + sizeof(*irqctx));

//call_on_stack切换内核栈，并在中断上下文上执行函数__do_softirq
call_on_stack(__do_softirq, isp);
/*
* Shouldnt happen, we returned above if in_interrupt():
*/
WARN_ON_ONCE(softirq_count());
}

//恢复之
local_irq_restore(flags);
}当配置了CONFIG_4KSTACKS，每个进程的thread_union只有4K，而非8K。发生中断时，内核栈将不使用进程的内核栈，而使用每个 cpu的中断请求栈。
内核栈将使用每个 cpu的中断请求栈，而非进程的内核栈来执行软中断函数：
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n" //交换栈指针，中断栈帧的指针stack做为传入参数（%ebx），交换后esp是irq_ctx的栈顶，ebx是进程内核栈的栈
      "call *%%edi \n" //调用软中断函数
      "movl %%ebx,%%esp \n" //恢复之，直接使用movl，而非xchgl是因为函数执行完毕，中断的栈帧指针已经没有用处了
      : "=b" (stack)
      : "0" (stack),
      "D"(func)
      : "memory", "cc", "edx", "ecx", "eax");
}PS：所有的这些执行，应该都是在定义4K栈的基础上的：#ifdef CONFIG_4KSTACKS
/*
* per-CPU IRQ handling contexts (thread information and stack)
*/
union irq_ctx {
struct thread_info    tinfo;
u32                   stack;
} __attribute__((aligned(PAGE_SIZE)));

static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
……

static void call_on_stack(void *func, void *stack)
……是的，这个版本相对复杂，但是如果看了复杂的，再来看简单的，就容易多了,当平台没有定义do_softirq函数时(__ARCH_HAS_DO_SOFTIRQ)，
内核提供了一个通用的：#ifndef __ARCH_HAS_DO_SOFTIRQ

asmlinkage void do_softirq(void)
{
__u32 pending;
unsigned long flags;

if (in_interrupt())
return;

local_irq_save(flags);

pending = local_softirq_pending();

if (pending)
__do_softirq();

local_irq_restore(flags);
}

#endif无需更多的解释，它非常的简洁。

不论是哪个版本，都将调用__do_softirq函数：asmlinkage void __do_softirq(void)
{
struct softirq_action *h;
__u32 pending;
int max_restart = MAX_SOFTIRQ_RESTART;
int cpu;

//保存位图
pending = local_softirq_pending();
//进程记帐
account_system_vtime(current);

//关闭本地CPU下半部。为了保证同一个CPU上的软中断以串行方式执行。
__local_bh_disable((unsigned long)__builtin_return_address(0));
lockdep_softirq_enter();

//获取本地CPU
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
//清除位图
set_softirq_pending(0);

//锁中断，只是为了保持位图的互斥，位图处理完毕。后面的代码可以直接使用保存的pending，
//而中断处理程序在激活的时候，也可以放心地使用irq_stat.__softirq_pending。
//所以，可以开中断了
local_irq_enable();

//取得软中断向量
h = softirq_vec;

//循环处理所有的软中断
do {
//逐步取位图的每一位，判断该位上是否有软中断被设置。若有，处理之
if (pending & 1) {
//保存抢占计数器
int prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(h - softirq_vec);

trace_softirq_entry(h, softirq_vec);
//调用软中断
h->action(h);
trace_softirq_exit(h, softirq_vec);
//判断软中断是否被抢占，如果是，则输出一段错误信息
if (unlikely(prev_count != preempt_count())) {
printk(KERN_ERR "huh, entered softirq %td %s %p"
      "with preempt_count %08x,"
      " exited with %08x?\n", h - softirq_vec,
      softirq_to_name,
      h->action, prev_count, preempt_count());
preempt_count() = prev_count;
}
//？？qsctr,这个是啥东东
rcu_bh_qsctr_inc(cpu);
}
//指向下一个软中断槽位
h++;
//移位，取下一个软中断位
pending >>= 1;
} while (pending);

//当软中断处理完毕后，因为前面已经开了中断了，所以有可能新的软中断已经又被设置，
//软中断调度程序会尝试重新软中断，其最大重启次数由max_restart决定。
//所以，这里必须再次关闭中断，再来一次……
local_irq_disable();

//取位图
pending = local_softirq_pending();
//有软中断被设置，且没有超过最大重启次数，再来一次先
if (pending && --max_restart)
goto restart;

//超过最大重启次数，还有软中断待处理，调用wakeup_softirqd。其任处是唤醒软中断守护进程ksoftirqd。
if (pending)
wakeup_softirqd();

lockdep_softirq_exit();

account_system_vtime(current);
//恢复下半部
_local_bh_enable();
}中断跟踪
如果中断跟踪CONFIG_TRACE_IRQFLAGS被定义，lockdep_softirq_enter/lockdep_softirq_exit用于递增/递减当前进程的软中断上下文计数器softirq_context：# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)trace_softirq_entry与trace_softirq_exit配合使用，可以用于判断软中断的延迟。

好像软中断不太难，没有更多的内容了。欢迎大家回贴补充。

phoxia 发表于 2011-06-15 11:52

精力旺盛啊～～

tuibo 发表于 2011-06-15 12:52

恩，才几百行代码

Godbach 发表于 2011-06-15 13:40

九贱兄的大作，一定要认真拜读。

VIP_fuck 发表于 2011-06-15 14:00

九贱兄又发新帖拜读

呵呵本来也想发个学习贴来着不过九贱兄牛贴已在俺就不敢发啦

期待新作

Godbach 发表于 2011-06-15 16:57

回复 5# VIP_fuck
呵呵，非常欢迎 LZ 分享。
再说，不同的人，侧重问题的重点也不一样。不用担心雷同，重在交流。

hidensu 发表于 2011-06-15 17:24

回复 1# 独孤九贱

好文章再次拜读，景仰一下

linuxxtz 发表于 2011-06-15 21:11

九贱大侠的一定要看

goter 发表于 2011-06-16 12:25

九贱兄，请问软中断和后半段的关系是怎样呢？

独孤九贱 发表于 2011-06-16 18:12

九贱兄，请问软中断和后半段的关系是怎样呢？
goter 发表于 2011-06-16 12:25 http://bbs1.chinaunix.net/images/common/back.gif

前者是后者的一种实现方式。前者是一种技术实现，是实体，后者是一种技术定义，是虚的。

页: [1] 2 3 4

Chinaunix's Archiver

浅析Linux的软中断的实现