linux -- 中断管理 -- softirq机制

softirq的起始

do_IRQ();
	--> irq_enter();	//HARDIRQ部分的开始  更新系统中的一些统计量  标识出HARDIRQ上下文
	--> generic_irq_handler();	
	--> irq_exit();		//softirq部分的起始

irq_exit

/*
 * Exit an interrupt context. Process softirqs if needed and possible:
 * 退出中断上下文，如果需要，处理softirqs
 */
void irq_exit(void)
{
	account_system_vtime(current);
	trace_hardirq_exit();
	//# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
	//#define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
	sub_preempt_count(IRQ_EXIT_OFFSET);
	if (!in_interrupt() && local_softirq_pending())
		invoke_softirq();

	rcu_irq_exit();
#ifdef CONFIG_NO_HZ
	/* Make sure that timer wheel updates are propagated */
	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
		tick_nohz_stop_sched_tick(0);
#endif
	preempt_enable_no_resched();
}

//为了做到减小preempt_count这个操作，可是花了很大的劲
#define sub_preempt_count do { preempt_count() -= (val); } while (0)
#define preempt_count()	(current_thread_info()->preempt_count)
static inline struct thread_info *current_thread_info(void) __attribute_const__;
static inline struct thread_info *current_thread_info(void)
{
	register unsigned long sp asm ("sp");
	return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}

/*
 * low level task data that entry.S needs immediate access to.
 * __switch_to() assumes cpu_context follows immediately after cpu_domain.
 */
struct thread_info {
	unsigned long		flags;		/* low level flags */
	int			preempt_count;	/* 0 => preemptable, <0 => bug */
	mm_segment_t		addr_limit;	/* address limit */
	struct task_struct	*task;		/* main task structure */
	struct exec_domain	*exec_domain;	/* execution domain */
	__u32			cpu;		/* cpu */
	__u32			cpu_domain;	/* cpu domain */
	struct cpu_context_save	cpu_context;	/* cpu context */
	__u32			syscall;	/* syscall number */
	__u8			used_cp[16];	/* thread used copro */
	unsigned long		tp_value;
	struct crunch_state	crunchstate;
	union fp_state		fpstate __attribute__((aligned(8)));
	union vfp_state		vfpstate;
#ifdef CONFIG_ARM_THUMBEE
	unsigned long		thumbee_state;	/* ThumbEE Handler Base register */
#endif
	struct restart_block	restart_block;
};

获取进程描述的过程，详见《linux内核设计与实现》：
在这里插入图片描述
为什么要做这一步？
减去IRQ_EXIT_OFFSET是用于标识一个HARDIRQ中断上下文的结束，这一步动作与do_IRQ中的irq_enter遥相呼应。

invoke_softirq是真正处理SOFTIRQ部分的函数，但进入调用必须满足两个条件：

处于非中断上下文(!in_interrupt())
local_softirq_pending

in_interrupt宏根据preempt_count变量来判断当前是否处在一个中断上下文执行：

#define in_interrupt()		(irq_count())

#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))

linux内核中HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK的组合，为中断上下文。(nmi应该是x86的APIC的概念，non-mask-interrupt中断，不可屏蔽中断）

既然减去IRQ_EXIT_OFFSET可以表示HARDIRQ中断上下文结束，那么这个值应该是在进入HARDIRQ时设置的，以表示当前处于HARDIRQ上下文，不出所料：

#define __irq_enter()					\
	do {						\
		account_system_vtime(current);		\
		add_preempt_count(HARDIRQ_OFFSET);	\	//here！
		trace_hardirq_enter();			\
	} while (0)

linux内核对preempt_count的使用：
在这里插入图片描述

第二条件是local_softirq_pending
这个宏用于判断__softirq_pending中有等待的softirq：

#define local_softirq_pending()	percpu_read(irq_stat.__softirq_pending)

#define percpu_read(var)		percpu_from_op("mov", var, "m" (var))

irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
EXPORT_SYMBOL(irq_stat);


typedef struct {
	unsigned int __softirq_pending;
#ifdef CONFIG_LOCAL_TIMERS
	unsigned int local_timer_irqs;
#endif
#ifdef CONFIG_SMP
	unsigned int ipi_irqs[NR_IPI];
#endif
} ____cacheline_aligned irq_cpustat_t;

irq_stat是一个per-CPU变量，系统中每个CPU都拥有各自的副本。
unsigned int __softirq_pending;成员用于标识当前正在等待被处理的softirq，每一种softirq都在__softirq_pending中占据一个bit。每个CPU都有自己的__softirq_pending变量。

总结一些两个条件：
当前不在interrupt上下文中，且__softirq_pending中有等待的softirq。当前不在interrupt上下文，保证了如果代码在执行softirq部分执行时，如果发生了一个外部中断，那么中断处理函数结束HARDIRQ部分时，不会去处理softirq，而是直接返回，这样，此前被中断的SOFTIRQ部分将继续被执行。

满足此二者，即可调用invoke_softirq：

#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
static inline void invoke_softirq(void)
{
	if (!force_irqthreads)
		__do_softirq();
	else
		wakeup_softirqd();
}
#else
static inline void invoke_softirq(void)
{
	if (!force_irqthreads)
		do_softirq();
	else
		wakeup_softirqd();
}
#endif

__ARCH_IRQ_EXIT_IRQS_DISABLED 宏是体系结构相关的，用来决定在HARDIRQ部分结束时有没有关闭处理器响应外部中断的能力。有些体系结构，可以在HARDIRQ结束时，进入SOFTIRQ之前，就能保证外部中断是被屏蔽的状态，这就可以直接调用__do_softirq() 否则就调用do_softirq();

do_softirq要比__do_softirq()多做一些事情，主要就是中断屏蔽，以确保开始执行时中断是关闭的。

/*
 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
 * and we fall back to softirqd after that.
 *
 * This number has been established via experimentation.
 * The two things to balance is latency against fairness -
 * we want to handle softirqs as soon as possible, but they
 * should not be able to lock up the box.
 */
#define MAX_SOFTIRQ_RESTART 10

asmlinkage void __do_softirq(void)
{
	struct softirq_action *h;
	__u32 pending;
	int max_restart = MAX_SOFTIRQ_RESTART;
	int cpu;

	pending = local_softirq_pending();
	account_system_vtime(current);

	__local_bh_disable((unsigned long)__builtin_return_address(0),
				SOFTIRQ_OFFSET);
	lockdep_softirq_enter();

	cpu = smp_processor_id();
restart:
	/* Reset the pending bitmask before enabling irqs */
	set_softirq_pending(0);

	local_irq_enable();

	h = softirq_vec;

	do {
		if (pending & 1) {
			unsigned int vec_nr = h - softirq_vec;
			int prev_count = preempt_count();

			kstat_incr_softirqs_this_cpu(vec_nr);

			trace_softirq_entry(vec_nr);
			h->action(h);
			trace_softirq_exit(vec_nr);
			if (unlikely(prev_count != preempt_count())) {
				printk(KERN_ERR "huh, entered softirq %u %s %p"
				       "with preempt_count %08x,"
				       " exited with %08x?\n", vec_nr,
				       softirq_to_name[vec_nr], h->action,
				       prev_count, preempt_count());
				preempt_count() = prev_count;
			}

			rcu_bh_qs(cpu);
		}
		h++;
		pending >>= 1;
	} while (pending);

	local_irq_disable();

	pending = local_softirq_pending();
	if (pending && --max_restart)
		goto restart;

	if (pending)
		wakeup_softirqd();

	lockdep_softirq_exit();

	account_system_vtime(current);
	__local_bh_enable(SOFTIRQ_OFFSET);
}

#ifndef __ARCH_HAS_DO_SOFTIRQ

asmlinkage void do_softirq(void)
{
	__u32 pending;
	unsigned long flags;

	if (in_interrupt())
		return;

	local_irq_save(flags);

	pending = local_softirq_pending();

	if (pending)
		__do_softirq();

	local_irq_restore(flags);
}

#endif

软中断类型

之前说过每个softirq枚举类型都在__softirq_pending变量上占据一个bit，softirq类型都有哪些？

enum
{
	HI_SOFTIRQ=0,
	TIMER_SOFTIRQ,
	NET_TX_SOFTIRQ,
	NET_RX_SOFTIRQ,
	BLOCK_SOFTIRQ,
	BLOCK_IOPOLL_SOFTIRQ,
	TASKLET_SOFTIRQ,
	SCHED_SOFTIRQ,
	HRTIMER_SOFTIRQ,
	RCU_SOFTIRQ,	/* Preferable RCU should always be the last softirq */

	NR_SOFTIRQS
};

其中，
HI_SOFTIRQ和TASKLET_SOFTIRQ用于实现tasklet
TIMER_SOFTIRQ和HRTIMER_SOFTIRQ用于实现定时器
NET_TX_SOFTIRQ和NET_RX_SOFTIRQ用于网络设备的发送和接收操作。
BLOCK_SOFTIRQ和BLOCK_IOPOLL_SOFTIRQ用于块设备的操作。
SCHED_SOFTIRQ用于调度器

内核中还有一个数组，保存了每一个软中断的handler

static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;

struct softirq_action
{
	void	(*action)(struct softirq_action *);
};

char *softirq_to_name[NR_SOFTIRQS] = {
	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
	"TASKLET", "SCHED", "HRTIMER",	"RCU"
};

铺垫到这里，再看do_softirq函数就轻松了，do{} while()遍历本地的pending的每一位，由低到高，看哪一bit为1，再到本地的softirq_vec中找到对应的handler去执行。

注意：

__local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET);里面做了一次preempt_count() += cnt; 用于标识SOFTIRQ_OFFSET上下文。__local_bh_enable反之。
local_irq_save(flags)和local_irq_restore(flags)。local_irq_save的调用把当前的中断状态（开或关）保存到flags中，然后禁用当前处理器上的中断。注意, flags 被直接传递, 而不是通过指针来传递，这是由于 local_irq_save被实现为宏。local_irq_disable不保存状态而关闭本地处理器上的中断发送; 只有我们知道中断并未在其他地方被禁用的情况下，才能使用这个版本。SOFTIRQ处理期间，可以接收外部中断。
__softirq_pending 低位先被scan到，所以低位对应的action会先被执行
do while循环之后再次检测__softirq_pending 是否为0，因为softirq可能会被外设中断打断，驱动在实现该中断处理函数时可能使用了一个softirq，do while循环后要再次检查是否有新的softirq加入。这里有个max_restart变量值的判断，其初始化int max_restart = MAX_SOFTIRQ_RESTART; 待到max_restart为0 或者 pending值读出来为0时，才停止RESTART操作。
如果4中从RESTART逻辑走出来后，pending值还不为0，说明执行次数太多了，需要唤醒ksoftirq来处理了。不能在softirq中耗费太久，这会导致一个中断处理流程迟迟无法结束，意味着被中断前的任务无法得到执行。ksoftirq的诞生就是为了解决这个问题，在linux内核初始化期间，生成了一个叫做ksoftirq的新进程，该进程在运行时主要的任务就是调用do_softirq来执行等待中的softirq。如果没有softirq要处理，就使其进入睡眠。通过wakeup_softirqd()来唤醒ksoftirqd，它会在调度器的控制下执行，减轻当前中断在softirq部分的工作负载。