Redis 源码分析-内部数据结构 dict
在上一篇 Redis 数据库源码分析 提到了 Redis 其实用了全局的 hash 表来存储所有的键值对,即下方图示的 dict,dict 中有两个数组,其中 ht[1] 只在 rehash 时候才真正用到,平时都是指向 null,hash 的底层又是使用数组来实现的,那就引出了接下来的几个问题:
- redis 启动初始化这个 dict 时数组容量是多少
- 如果发生了 hash 冲突怎么办
- 扩容机制(扩容的时机,扩容的实现)
dict 的创建
/* Create a new hash table */
dict *dictCreate(dictType *type, void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));
_dictInit(d,type,privDataPtr);
return d;
}
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type, void *privDataPtr)
{
_dictReset(&d->ht[0]);
_dictReset(&d->ht[1]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -1;
d->iterators = 0;
return DICT_OK;
}
/* Reset a hash table already initialized with ht_init().
* NOTE: This function should only be called by ht_destroy(). */
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0; // table 的长度, 一定是2的倍数
ht->sizemask = 0; // size - 1,具体原因参见上篇
ht->used = 0; // 已经存的元素总数
}
代码分析:
- 2行,创建变量时的
dictType *type
,包含很多函数指针,无论是添加或查找 key 时 hash 函数还是删除 dictEntry 时的析构函数 - 25行,
ht->table = NULL
,也就是说 dict 在被创建时 table 并没有分配内存空间,意味着第一个数据插入时才会真正分配空间。
dict 的扩容
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK;
/* If the hash table is empty expand it to the initial size. */
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
if (d->ht[0].used >= d->ht[0].size &&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
{
return dictExpand(d, d->ht[0].used*2);
}
return DICT_OK;
}
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
dictht n; /* the new hash table */
unsigned long realsize = _dictNextPower(size); // _dictNextPower 根据传入的 size 获得最终分配的 size
/* Rehashing to the same table size is not useful. */
if (realsize == d->ht[0].size) return DICT_ERR;
/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
/* Prepare a second hash table for incremental rehashing */
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}
/* Our hash table capability is a power of two */
static unsigned long _dictNextPower(unsigned long size)
{
unsigned long i = DICT_HT_INITIAL_SIZE;
if (size >= LONG_MAX) return LONG_MAX + 1LU;
while(1) {
if (i >= size)
return i;
i *= 2;
}
}
代码分析:
- 8行,如果是第一次插入(ht[0].size == 0),用初始化大小(DICT_HT_INITIAL_SIZE)分配,这个值默认是 4
- 14行,扩容的判断条件,负载因子(d->ht[0].used / d->ht[0].size),即 ht 中元素总数与 bucket 个数的比值
- 负载因子 >= 1,且 dict_can_resize,dict_can_resize 的值受此时 redis 状态的影响,例如 redis 在进行 aof 重写或 rdb生成时,不想造成太多的内存拷贝,就会关闭
- 负载因子 >= 5,即 dict_force_resize_ratio 的默认值,此时 hash 冲突已经很严重了,不管此时处于什么状态,都会进行扩容
dict 的销毁
/* Clear & Release the hash table */
void dictRelease(dict *d)
{
_dictClear(d,&d->ht[0],NULL);
_dictClear(d,&d->ht[1],NULL);
zfree(d);
}
/* Destroy an entire dictionary */
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
unsigned long i;
/* Free all the elements */
for (i = 0; i < ht->size && ht->used > 0; i++) {
dictEntry *he, *nextHe;
if (callback && (i & 65535) == 0) callback(d->privdata);
if ((he = ht->table[i]) == NULL) continue;
while(he) {
nextHe = he->next;
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
ht->used--;
he = nextHe;
}
}
/* Free the table and the allocated cache structure */
zfree(ht->table);
/* Re-initialize the table */
_dictReset(ht);
return DICT_OK; /* never fails */
}
#define dictFreeKey(d, entry) \
if ((d)->type->keyDestructor) \
(d)->type->keyDestructor((d)->privdata, (entry)->key)
#define dictFreeVal(d, entry) \
if ((d)->type->valDestructor) \
(d)->type->valDestructor((d)->privdata, (entry)->v.val)
代码分析:
- 20-27行:删除每个 dictEntry 时会调用 key 和 value 的析构函数(keyDestructor 和 valDestructor),用于释放键和值所占的内存。
dict 添加元素
主要涉及两个函数,dictAdd 和 dictReplace
dictAdd
该函数向 hash table 中添加数据,如果 key 已经存在会返回失败
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
/* Low level add or find:
* This function adds the entry but instead of setting a value returns the
* dictEntry structure to the user, that will make sure to fill the value
* field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey,NULL);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned, and "*existing" is populated
* with the existing entry if existing is not NULL.
*
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
long index;
dictEntry *entry;
dictht *ht;
if (dictIsRehashing(d)) _dictRehashStep(d);
/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
return NULL;
/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}
代码分析:
- 34行判断此时是否正在 rehash,如果在,则将 rehash 向前推进一步
- 38行,
_dictKeyIndex
函数根据传入的 key 寻找插入的位置,如果该 key 已经存在,会返回-1 - 45行,如果此时正在 rehash,就把数据插入到 ht[1],否则插入到 ht[0]
- 47行-49行,使用
头插法插入数据
上面有两个关键函数,一个是_dictKeyIndex
,另一个是_dictRehashStep
_dictKeyIndex
/* Returns the index of a free slot that can be populated with
* a hash entry for the given 'key'.
* If the key already exists, -1 is returned
* and the optional output parameter may be filled.
*
* Note that if we are in the process of rehashing the hash table, the
* index is always returned in the context of the second (new) hash table. */
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
unsigned long idx, table;
dictEntry *he;
if (existing) *existing = NULL;
/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
for (table = 0; table <= 1; table++) {
idx = hash & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
if (existing) *existing = he;
return -1;
}
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return idx;
}
代码分析:
- 15行,会判断此时是否需要扩容
- 28行,查找时会先从 ht[0] 查找,如果 ht[0] 中没有数据且此时不在进行 rehash,就直接返回
_dictKeyIndex 如果 key 已经存在会返回 -1,否则返回该 key 需要插入的桶的索引。
_dictRehashStep
/* This function performs just a step of rehashing, and only if there are
* no safe iterators bound to our hash table. When we have iterators in the
* middle of a rehashing we can't mess with the two hash tables otherwise
* some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
* while it is actively used. */
static void _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1);
}
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
代码分析:
-
32-35行,如果 rehashidx 指向的 bucket 里一个 dictEntry 也没有,那么它就没有可迁移的数据。这时它尝试在 ht[0].table 数组中不断向后遍历,直到找到下一个存有数据的 bucket 位置。如果一直找不到,则最多走 n*10 步(empty_visits)。避免在该操作上一直向后遍历,影响响应时间。
-
43行,原 bucket 里的元素重新 hash 计算后对新的 sizemask 进行余操作,确定在新 table 上的位置。
-
55-61行,如果 ht[0] 上的数据都迁移到 ht[1] 上了(即d->ht[0].used == 0),那么整个重哈希结束,ht[0] 变成 ht[1] 的内容,而 ht[1] 重置为空。
参考下面这张图:哈希桶即 bucket,entry 即 dictEntry
dictReplace
该函数向 hash table 中添加数据,如果 key 已经存在会替换
/* Add or Overwrite:
* Add an element, discarding the old value if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, *existing, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will succeed. */
entry = dictAddRaw(d,key,&existing);
if (entry) {
dictSetVal(d, entry, val);
return 1;
}
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
auxentry = *existing;
dictSetVal(d, existing, val);
dictFreeVal(d, &auxentry);
return 0;
}
代码分析:
- 12行,如果插入成功(key不存在)entry 就是插入的对象,如果 key 已经存在,则 existing 变量就是对应的 dictEntry
- 23-26行,注意顺序是先创建并设置新值,再释放旧值。
dict 删除元素
/* Remove an element, returning DICT_OK on success or DICT_ERR if the
* element was not found. */
int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}
/* Search and remove an element. This is an helper function for
* dictDelete() and dictUnlink(), please check the top comment
* of those functions. */
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
uint64_t h, idx;
dictEntry *he, *prevHe;
int table;
if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
prevHe = NULL;
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
}
d->ht[table].used--;
return he;
}
prevHe = he;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return NULL; /* not found */
}
代码分析:
- 17行,如果在 rehash,也推进一步
- 25-38行,其实就是首先根据 hash 找到对应的 bucket,在链表中删除该元素,找不到则返回 null,根据是否在 rehash 决定只查找 ht[0] 还是 ht[1]
- 32-33行,删除后会调用 key 和 value 的析构函数(keyDestructor 和 valDestructor)
dict 查找元素
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
uint64_t h, idx, table;
if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
代码分析:
- 7行,如果此时在 rehash,它也会推进一步
- 10行,hash 值与 sizemask 取余 = hash % size,这样写的原因是 & 一步到位,而 % 是一直除下去
- 17行,如果 ht[0] 中没有,且当前不在 rehash,直接返回 null,否则去 ht[1] 中查找
参考资料
【1】Redis(5.0.8)源码
【2】哔哩哔哩-Redis底层设计与源码分析
【3】极客时间-Redis核心技术剖析与实战