Redis中的整数集合（IntSet）

news2026/2/14 13:42:18

Redis节省内存的两个优秀设计思想：一个是使用连续的内存空间，避免内存碎片开销；二个是针对不同长度的数据，采用不同大小的元数据，以避免使用统一大小的元数据，造成内存空间的浪费。IntSet便具备以上两个设计思想。IntSet是Redis中set集合的一种实现方式，基于整数数组来实现，并且具备长度可变、有序等特征。

IntSet的结构

来看看Redis 6.2.4对其结构的定义

typedef struct intset {
    uint32_t encoding; /*编码方式，支持存放16位、32位、64位整数*/
    uint32_t length; /* 元素个数 */
    int8_t contents[]; /整数数组，保存集合数据 */
} intset;

其中encoding包含三种模式，表示存储的整数大小不同

/* Note that these encodings are ordered, so:
 * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
#define INTSET_ENC_INT16 (sizeof(int16_t)) /*两个字节*/
#define INTSET_ENC_INT32 (sizeof(int32_t)) /*四个字节*/
#define INTSET_ENC_INT64 (sizeof(int64_t)) /*八个字节*/

为了方便查找，Redis会将其按照升序进行排列，保存在contents[]数组中。例如下图
在这里插入图片描述

1byte = 8bit，int16 也就是2个字节。所以上图所示的set集合总字节大小为 encoding 4 + length 4 + 3 * 2byte = 14byte。当这个集合的每个元素的大小固定之后，我们寻址就非常方便了。

在这里插入图片描述

我们可以观察出一个寻址公式：address[i] = startPtr + (encoding) * index

IntSet添加元素

文字描述

在集合中利用二分查找找要插入的元素位置pos

找到—修改，return

未找到得到元素应当插入的位置

判断元素的位数是否超过了curEncoding

未超过，将pos之后的元素全部后移一位，插入新的元素

超过，创建新的intset，更新所有元素的编码方式，倒序依次将数组中的元素拷贝到扩容后的正确位置，新元素比0大放队首，比0小放队尾

简单的流程图

在这里插入图片描述

源代码

看下面的Redis源代码

/* Search for the position of "value". Return 1 when the value was found and
 * sets "pos" to the position of the value within the intset. Return 0 when
 * the value is not present in the intset and sets "pos" to the position
 * where "value" can be inserted. */
static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
    int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
    int64_t cur = -1;

    /* The value can never be found when the set is empty */
    if (intrev32ifbe(is->length) == 0) {
        if (pos) *pos = 0;
        return 0;
    } else {
        /* Check for the case where we know we cannot find the value,
         * but do know the insert position. */
        if (value > _intsetGet(is,max)) {
            if (pos) *pos = intrev32ifbe(is->length);
            return 0;
        } else if (value < _intsetGet(is,0)) {
            if (pos) *pos = 0;
            return 0;
        }
    }
    while(max >= min) {
        mid = ((unsigned int)min + (unsigned int)max) >> 1;
        cur = _intsetGet(is,mid);
        if (value > cur) {
            min = mid+1;
        } else if (value < cur) {
            max = mid-1;
        } else {
            break;
        }
    }
    if (value == cur) {
        if (pos) *pos = mid;
        return 1;
    } else {
        if (pos) *pos = min;
        return 0;
    }
}

/* Insert an integer in the intset */
intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
    uint8_t valenc = _intsetValueEncoding(value); //获取当前编码值 
    uint32_t pos; //要插入的位置 
    if (success) *success = 1;//判断编码是不是超过了intset的编码
	 
    if (valenc > intrev32ifbe(is->encoding)) { //
        //超过了编码，要升级 8 -> 16 / 16 -> 32 / 32 -> 64 
        return intsetUpgradeAndAdd(is,value);
    } else {
        //没有超过，寻找与value值一样的元素 
        if (intsetSearch(is,value,&pos)) {
        	//找到了，直接返回。set集合是无重复的 
            if (success) *success = 0;
			return is;
        }
		//为数组扩容 + 1 位 
        is = intsetResize(is,intrev32ifbe(is->length)+1);
        //将pos之后的元素全部往后挪一位 
        if (pos < intrev32ifbe(is->length)) 
			intsetMoveTail(is,pos,pos+1);
    }
	//插入新元素 
    _intsetSet(is,pos,value);
    //重置元素长度 
    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
    return is;
}

注意：此处并不需要挪所有元素，因为编码格式一旦超过，必然是最大整数或者最小整数。

/* Upgrades the intset to a larger encoding and inserts the given integer. */
static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
	//获取当前intset编码 
    uint8_t curenc = intrev32ifbe(is->encoding);
    //计算新的编码值 
    uint8_t newenc = _intsetValueEncoding(value);
    //获取元素个数 
    int length = intrev32ifbe(is->length);
    //判断元素是否比 0 大，大于插入队尾，小于插入队首 
	int prepend = value < 0 ? 1 : 0;

    /* First set new encoding and resize */
    //重新为数组编码 
    is->encoding = intrev32ifbe(newenc);
    //重置数组大小 
	is = intsetResize(is,intrev32ifbe(is->length)+1); 
    
	/* Upgrade back-to-front so we don't overwrite values.
     * Note that the "prepend" variable is used to make sure we have an empty
     * space at either the beginning or the end of the intset. */
    
	//倒序遍历，逐个搬运元素到新的位置 
    while(length--) //按照新的编码方式插入元素 
        _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));

    /* Set the value at the beginning or the end. */
    if (prepend)
        _intsetSet(is,0,value);
    else
        _intsetSet(is,intrev32ifbe(is->length),value);
    is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
    return is;
}