softirq的起始
do_IRQ();
--> irq_enter(); //HARDIRQ部分的开始 更新系统中的一些统计量 标识出HARDIRQ上下文
--> generic_irq_handler();
--> irq_exit(); //softirq部分的起始
irq_exit
/*
* Exit an interrupt context. Process softirqs if needed and possible:
* 退出中断上下文,如果需要,处理softirqs
*/
void irq_exit(void)
{
account_system_vtime(current);
trace_hardirq_exit();
//# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
//#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
rcu_irq_exit();
#ifdef CONFIG_NO_HZ
/* Make sure that timer wheel updates are propagated */
if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
tick_nohz_stop_sched_tick(0);
#endif
preempt_enable_no_resched();
}
//为了做到减小preempt_count这个操作,可是花了很大的劲
#define sub_preempt_count do { preempt_count() -= (val); } while (0)
#define preempt_count() (current_thread_info()->preempt_count)
static inline struct thread_info *current_thread_info(void) __attribute_const__;
static inline struct thread_info *current_thread_info(void)
{
register unsigned long sp asm ("sp");
return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}
/*
* low level task data that entry.S needs immediate access to.
* __switch_to() assumes cpu_context follows immediately after cpu_domain.
*/
struct thread_info {
unsigned long flags; /* low level flags */
int preempt_count; /* 0 => preemptable, <0 => bug */
mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
__u32 cpu; /* cpu */
__u32 cpu_domain; /* cpu domain */
struct cpu_context_save cpu_context; /* cpu context */
__u32 syscall; /* syscall number */
__u8 used_cp[16]; /* thread used copro */
unsigned long tp_value;
struct crunch_state crunchstate;
union fp_state fpstate __attribute__((aligned(8)));
union vfp_state vfpstate;
#ifdef CONFIG_ARM_THUMBEE
unsigned long thumbee_state; /* ThumbEE Handler Base register */
#endif
struct restart_block restart_block;
};
获取进程描述的过程,详见《linux内核设计与实现》:
为什么要做这一步?
减去IRQ_EXIT_OFFSET是用于标识一个HARDIRQ中断上下文的结束,这一步动作与do_IRQ中的irq_enter遥相呼应。
invoke_softirq是真正处理SOFTIRQ部分的函数,但进入调用必须满足两个条件:
- 处于非中断上下文(!in_interrupt())
- local_softirq_pending
in_interrupt宏根据preempt_count变量来判断当前是否处在一个中断上下文执行:
#define in_interrupt() (irq_count())
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))
linux内核中HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK的组合,为中断上下文。(nmi应该是x86的APIC的概念,non-mask-interrupt中断,不可屏蔽中断)
既然减去IRQ_EXIT_OFFSET可以表示HARDIRQ中断上下文结束,那么这个值应该是在进入HARDIRQ时设置的,以表示当前处于HARDIRQ上下文,不出所料:
#define __irq_enter() \
do { \
account_system_vtime(current); \
add_preempt_count(HARDIRQ_OFFSET); \ //here!
trace_hardirq_enter(); \
} while (0)
linux内核对preempt_count的使用:
第二条件是local_softirq_pending
这个宏用于判断__softirq_pending中有等待的softirq:
#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
#define percpu_read(var) percpu_from_op("mov", var, "m" (var))
irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
EXPORT_SYMBOL(irq_stat);
typedef struct {
unsigned int __softirq_pending;
#ifdef CONFIG_LOCAL_TIMERS
unsigned int local_timer_irqs;
#endif
#ifdef CONFIG_SMP
unsigned int ipi_irqs[NR_IPI];
#endif
} ____cacheline_aligned irq_cpustat_t;
irq_stat是一个per-CPU变量,系统中每个CPU都拥有各自的副本。
unsigned int __softirq_pending;
成员用于标识当前正在等待被处理的softirq,每一种softirq都在__softirq_pending中占据一个bit。每个CPU都有自己的__softirq_pending变量。
总结一些两个条件:
当前不在interrupt上下文中,且__softirq_pending中有等待的softirq。当前不在interrupt上下文,保证了如果代码在执行softirq部分执行时,如果发生了一个外部中断,那么中断处理函数结束HARDIRQ部分时,不会去处理softirq,而是直接返回,这样,此前被中断的SOFTIRQ部分将继续被执行。
满足此二者,即可调用invoke_softirq:
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
static inline void invoke_softirq(void)
{
if (!force_irqthreads)
__do_softirq();
else
wakeup_softirqd();
}
#else
static inline void invoke_softirq(void)
{
if (!force_irqthreads)
do_softirq();
else
wakeup_softirqd();
}
#endif
__ARCH_IRQ_EXIT_IRQS_DISABLED 宏是体系结构相关的,用来决定在HARDIRQ部分结束时有没有关闭处理器响应外部中断的能力。有些体系结构,可以在HARDIRQ结束时,进入SOFTIRQ之前,就能保证外部中断是被屏蔽的状态,这就可以直接调用__do_softirq()
否则就调用do_softirq();
do_softirq要比__do_softirq()多做一些事情,主要就是中断屏蔽,以确保开始执行时中断是关闭的。
/*
* We restart softirq processing MAX_SOFTIRQ_RESTART times,
* and we fall back to softirqd after that.
*
* This number has been established via experimentation.
* The two things to balance is latency against fairness -
* we want to handle softirqs as soon as possible, but they
* should not be able to lock up the box.
*/
#define MAX_SOFTIRQ_RESTART 10
asmlinkage void __do_softirq(void)
{
struct softirq_action *h;
__u32 pending;
int max_restart = MAX_SOFTIRQ_RESTART;
int cpu;
pending = local_softirq_pending();
account_system_vtime(current);
__local_bh_disable((unsigned long)__builtin_return_address(0),
SOFTIRQ_OFFSET);
lockdep_softirq_enter();
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
local_irq_enable();
h = softirq_vec;
do {
if (pending & 1) {
unsigned int vec_nr = h - softirq_vec;
int prev_count = preempt_count();
kstat_incr_softirqs_this_cpu(vec_nr);
trace_softirq_entry(vec_nr);
h->action(h);
trace_softirq_exit(vec_nr);
if (unlikely(prev_count != preempt_count())) {
printk(KERN_ERR "huh, entered softirq %u %s %p"
"with preempt_count %08x,"
" exited with %08x?\n", vec_nr,
softirq_to_name[vec_nr], h->action,
prev_count, preempt_count());
preempt_count() = prev_count;
}
rcu_bh_qs(cpu);
}
h++;
pending >>= 1;
} while (pending);
local_irq_disable();
pending = local_softirq_pending();
if (pending && --max_restart)
goto restart;
if (pending)
wakeup_softirqd();
lockdep_softirq_exit();
account_system_vtime(current);
__local_bh_enable(SOFTIRQ_OFFSET);
}
#ifndef __ARCH_HAS_DO_SOFTIRQ
asmlinkage void do_softirq(void)
{
__u32 pending;
unsigned long flags;
if (in_interrupt())
return;
local_irq_save(flags);
pending = local_softirq_pending();
if (pending)
__do_softirq();
local_irq_restore(flags);
}
#endif
软中断类型
之前说过每个softirq枚举类型都在__softirq_pending变量上占据一个bit,softirq类型都有哪些?
enum
{
HI_SOFTIRQ=0,
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
BLOCK_IOPOLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};
其中,
HI_SOFTIRQ和TASKLET_SOFTIRQ用于实现tasklet
TIMER_SOFTIRQ和HRTIMER_SOFTIRQ用于实现定时器
NET_TX_SOFTIRQ和NET_RX_SOFTIRQ用于网络设备的发送和接收操作。
BLOCK_SOFTIRQ和BLOCK_IOPOLL_SOFTIRQ用于块设备的操作。
SCHED_SOFTIRQ用于调度器
内核中还有一个数组,保存了每一个软中断的handler
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
struct softirq_action
{
void (*action)(struct softirq_action *);
};
char *softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
"TASKLET", "SCHED", "HRTIMER", "RCU"
};
铺垫到这里,再看do_softirq函数就轻松了,do{} while()遍历本地的pending的每一位,由低到高,看哪一bit为1,再到本地的softirq_vec中找到对应的handler去执行。
注意:
- __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET);里面做了一次preempt_count() += cnt; 用于标识SOFTIRQ_OFFSET上下文。__local_bh_enable反之。
- local_irq_save(flags)和local_irq_restore(flags)。local_irq_save的调用把当前的中断状态(开或关)保存到flags中,然后禁用当前处理器上的中断。注意, flags 被直接传递, 而不是通过指针来传递,这是由于 local_irq_save被实现为宏 。local_irq_disable不保存状态而关闭本地处理器上的中断发送; 只有我们知道中断并未在其他地方被禁用的情况下,才能使用这个版本。SOFTIRQ处理期间,可以接收外部中断。
- __softirq_pending 低位先被scan到,所以低位对应的action会先被执行
- do while循环之后再次检测__softirq_pending 是否为0,因为softirq可能会被外设中断打断,驱动在实现该中断处理函数时可能使用了一个softirq,do while循环后要再次检查是否有新的softirq加入。这里有个max_restart变量值的判断,其初始化
int max_restart = MAX_SOFTIRQ_RESTART;
待到max_restart为0 或者 pending值读出来为0时,才停止RESTART操作。 - 如果4中从RESTART逻辑走出来后,pending值还不为0,说明执行次数太多了,需要唤醒ksoftirq来处理了。不能在softirq中耗费太久,这会导致一个中断处理流程迟迟无法结束,意味着被中断前的任务无法得到执行。ksoftirq的诞生就是为了解决这个问题,在linux内核初始化期间,生成了一个叫做ksoftirq的新进程,该进程在运行时主要的任务就是调用do_softirq来执行等待中的softirq。如果没有softirq要处理,就使其进入睡眠。通过wakeup_softirqd()来唤醒ksoftirqd,它会在调度器的控制下执行,减轻当前中断在softirq部分的工作负载。