背景

内核中有很多判断条件在正常情况下的结果都是固定的，除非极其罕见的场景才会改变，通常单个的这种判断的代价很低可以忽略，但是如果这种判断数量巨大且被频繁执行，那就会带来性能损失了。内核的static-key机制就是为了优化这种场景,其优化的结果是：对于大多数情况，对应的判断被优化为一个NOP指令，在非常有场景的时候就变成jump XXX一类的指令，使得对应的代码段得到执行。
在这里插入图片描述

1. static-key的使用方法

1.1. static-key定义

static_key 结构体的定义如下：

#ifdef CONFIG_JUMP_LABEL

struct static_key {
	atomic_t enabled;
/*
 * Note:
 *   To make anonymous unions work with old compilers, the static
 *   initialization of them requires brackets. This creates a dependency
 *   on the order of the struct with the initializers. If any fields
 *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
 *   to be modified.
 *
 * bit 0 => 1 if key is initially true
 *	    0 if initially false
 * bit 1 => 1 if points to struct static_key_mod
 *	    0 if points to struct jump_entry
 */
	union {
		unsigned long type;
		struct jump_entry *entries;
		struct static_key_mod *next;
	};
};

#else
struct static_key {
	atomic_t enabled;
};
#endif	/* CONFIG_JUMP_LABEL */

如果没有定义CONFIG_JUMP_LABEL，则static_key 退化成atomic变量。

1.2 初始化

#define DEFINE_STATIC_KEY_TRUE(name)	\
	struct static_key_true name = STATIC_KEY_TRUE_INIT
#define DEFINE_STATIC_KEY_FALSE(name)	\
	struct static_key_false name = STATIC_KEY_FALSE_INIT

#define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }

#define STATIC_KEY_INIT_TRUE                    \
    { .enabled = { 1 },                    \
      .entries = (void *)JUMP_TYPE_TRUE }
#define STATIC_KEY_INIT_FALSE                    \
    { .enabled = { 0 },                    \
      .entries = (void *)JUMP_TYPE_FALSE }

false和true的主要区别就是enabled 是否为1.

1.3 条件判断

#ifdef CONFIG_JUMP_LABEL

/*
 * Combine the right initial value (type) with the right branch order
 * to generate the desired result.
 *
 *
 * type\branch|	likely (1)	      |	unlikely (0)
 * -----------+-----------------------+------------------
 *            |                       |
 *  true (1)  |	   ...		      |	   ...
 *            |    NOP		      |	   JMP L
 *            |    <br-stmts>	      |	1: ...
 *            |	L: ...		      |
 *            |			      |
 *            |			      |	L: <br-stmts>
 *            |			      |	   jmp 1b
 *            |                       |
 * -----------+-----------------------+------------------
 *            |                       |
 *  false (0) |	   ...		      |	   ...
 *            |    JMP L	      |	   NOP
 *            |    <br-stmts>	      |	1: ...
 *            |	L: ...		      |
 *            |			      |
 *            |			      |	L: <br-stmts>
 *            |			      |	   jmp 1b
 *            |                       |
 * -----------+-----------------------+------------------
 *
 * The initial value is encoded in the LSB of static_key::entries,
 * type: 0 = false, 1 = true.
 *
 * The branch type is encoded in the LSB of jump_entry::key,
 * branch: 0 = unlikely, 1 = likely.
 *
 * This gives the following logic table:
 *
 *	enabled	type	branch	  instuction
 * -----------------------------+-----------
 *	0	0	0	| NOP
 *	0	0	1	| JMP
 *	0	1	0	| NOP
 *	0	1	1	| JMP
 *
 *	1	0	0	| JMP
 *	1	0	1	| NOP
 *	1	1	0	| JMP
 *	1	1	1	| NOP
 *
 * Which gives the following functions:
 *
 *   dynamic: instruction = enabled ^ branch
 *   static:  instruction = type ^ branch
 *
 * See jump_label_type() / jump_label_init_type().
 */

#define static_branch_likely(x)							\
({										\
	bool branch;								\
	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
		branch = !arch_static_branch(&(x)->key, true);			\
	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
		branch = !arch_static_branch_jump(&(x)->key, true);		\
	else									\
		branch = ____wrong_branch_error();				\
	likely(branch);								\
})

#define static_branch_unlikely(x)						\
({										\
	bool branch;								\
	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
		branch = arch_static_branch_jump(&(x)->key, false);		\
	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
		branch = arch_static_branch(&(x)->key, false);			\
	else									\
		branch = ____wrong_branch_error();				\
	unlikely(branch);							\
})

#else /* !CONFIG_JUMP_LABEL */

#define static_branch_likely(x)		likely(static_key_enabled(&(x)->key))
#define static_branch_unlikely(x)	unlikely(static_key_enabled(&(x)->key))

#endif /* CONFIG_JUMP_LABEL */

可见同样依赖HAVE_JUMP_LABEL。如果没有定义的话，直接退化成likely和unlikely
static_branch_unlikely 和 static_branch_likely 只是填充指令的方式不同（可以参考上面的代码注释），当static_key为false时，都会进入else逻辑语句中。

if (static_branch_unlikely((&static_key)))
    do likely work;
else
    do unlikely work

1.4 修改判断条件

使用static_branch_enable 和 static_branch_disable可以改变static_key 状态

#define static_branch_enable(x)        static_key_enable(&(x)->key)
#define static_branch_disable(x)    static_key_disable(&(x)->key)

底层是调用static_key_slow_dec, static_key_slow_dec来改变key->enabled计数。

static inline void static_key_enable(struct static_key *key)
{
    int count = static_key_count(key);

    WARN_ON_ONCE(count < 0 || count > 1);
    
    if (!count)
        static_key_slow_inc(key);
}
static inline void static_key_disable(struct static_key *key)
{
    int count = static_key_count(key);

    WARN_ON_ONCE(count < 0 || count > 1);
    
    if (count)
        static_key_slow_dec(key);
}

static inline void static_key_slow_inc(struct static_key *key)
{
	STATIC_KEY_CHECK_USE(key);
	atomic_inc(&key->enabled);
}

static inline void static_key_slow_dec(struct static_key *key)
{
	STATIC_KEY_CHECK_USE(key);
	atomic_dec(&key->enabled);
}

2、示例代码

下面我们用一段代码来分析static-key对程序分支跳转硬编码的影响。

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/static_key.h>

DEFINE_STATIC_KEY_FALSE(key);

void func(int a){
    if (static_branch_unlikely(&key)) {  
        printk("my_module: Feature is enabled\n");
    } else {
        printk("my_module: Feature is disabled\n");
    }
}

static int __init my_module_init(void) {
    pr_info("my_module: Module loaded\n");
    int a = 1;
    func(a);
    static_branch_enable(&key);
    func(a);
    return 0;
}

static void __exit my_module_exit(void) {
    pr_info("my_module: Module unloaded\n");
}

module_init(my_module_init);
module_exit(my_module_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("Sample Kernel Module with Static Key");

func汇编代码如下：

0000000000000000 <func>:
   0:   a9bf7bfd        stp     x29, x30, [sp, #-16]!
   4:   910003fd        mov     x29, sp
   8:   d503201f        nop
   c:   90000000        adrp    x0, 0 <func>
  10:   91000000        add     x0, x0, #0x0
  14:   94000000        bl      0 <printk>
  18:   a8c17bfd        ldp     x29, x30, [sp], #16
  1c:   d65f03c0        ret
  20:   90000000        adrp    x0, 0 <func>
  24:   91000000        add     x0, x0, #0x0
  28:   94000000        bl      0 <printk>
  2c:   17fffffb        b       18 <func+0x18>

func中不适用static-key时，汇编代码如下：

void func(int a){
    if (a) {  
        printk("my_module: Feature is enabled\n");
    } else {
        printk("my_module: Feature is disabled\n");
    }
}

0000000000000000 <func>:
   0:   a9bf7bfd        stp     x29, x30, [sp, #-16]!
   4:   910003fd        mov     x29, sp
   8:   340000a0        cbz     w0, 1c <func+0x1c>
   c:   90000000        adrp    x0, 0 <func>
  10:   91000000        add     x0, x0, #0x0
  14:   94000000        bl      0 <printk>
  18:   14000004        b       28 <func+0x28>
  1c:   90000000        adrp    x0, 0 <func>
  20:   91000000        add     x0, x0, #0x0
  24:   94000000        bl      0 <printk>
  28:   a8c17bfd        ldp     x29, x30, [sp], #16
  2c:   d65f03c0        ret