c++ - unordered_set与unordered

文章目录

前言
- 一、unordered_set模拟实现
- 二、unordered_map模拟实现

前言

1、unordered_set与unordered_map的介绍与接口使用可参考：unordered_set 、 unordered_map。
2、unordered_set和 unordered_map 的底层实现都是基于哈希表的。哈希表是一种通过哈希函数组织数据，以支持快速插入和搜索的数据结构。在 C++ STL（Standard Template Library）中，这两种容器利用哈希表的高效特性来提供快速的查找、插入和删除操作。
3、哈希表

namespace OpenHash
{
	//前置声明 -->给迭代器向上查找
	template<class K, class T, class KeyOfValue, class HF >
	class HashBucket;

	//哈希节点
	template<class T>
	struct HashBucketNode
	{
		HashBucketNode(const T& val)
			: _pNext(nullptr), _val(val)
		{}
		HashBucketNode<T>* _pNext;	
		T _val;	//数据
	};

	//迭代器
	template<class K,class T,class  KeyOfValue,class HF,class Ptr,class Ref>
	struct HsIterator
	{
		typedef HashBucketNode<T> Node;
		typedef HashBucket<K, T, KeyOfValue, HF> Hash;
		typedef HsIterator<K, T, KeyOfValue, HF,Ptr,Ref> Self;

		Node* _cur;
		const Hash* _hash;
		HsIterator(const  Hash* hash,  Node* cur ):_cur(cur),_hash(hash){}

		Self& operator++()
		{	
			KeyOfValue func;
			HF hf;
			if (_cur->_pNext != nullptr)
			{
				_cur = _cur->_pNext;
			}
			else
			{
				int hashi = hf(func(_cur->_val)) % _hash->_table.size();
				Node* cur = _hash->_table[++hashi];
				while (hashi < _hash->_table.size() && cur == nullptr)
				{
					cur = _hash->_table[hashi];
					hashi++;	
				}

				_cur = cur;
			}
			return *this;
		}

		bool operator==(const Self& it)
		{
			return _cur == it._cur;
		}

		bool operator!= (const Self& it)
		{
			return _cur != it._cur;
		}

		Ref operator*()
		{
			return _cur->_val;
		}

		Ptr operator->()
		{
			return &(_cur->_val);
		}
	}; 

	template<class K,class T, class KeyOfValue, class HF >
	class HashBucket
	{
		typedef HashBucketNode<T> Node;	//节点重命名
		typedef HashBucket<K,T, KeyOfValue, HF> Self;	//自身重命名
	public:
		typedef  HsIterator<K, T, KeyOfValue, HF, T*, T&> Iterator;	//迭代器重命名
		typedef HsIterator<K, T, KeyOfValue, HF,const T*,const T&> ConstIterator;	//const迭代器重命名

		//迭代器作为有友元
		template<class K, class T, class  KeyOfValue, class HF, class Ptr, class Ref>
		friend struct HsIterator;

		//构造函数 哈希桶初始化大小为10
		HashBucket(size_t capacity = 10)
			: _table(10)
			, _size(0)
		{}

		//拷贝构造
		HashBucket(const Self& self)
		{
			_table.resize(10);
			//通过遍历Self和插入接口即可
			for (int i = 0; i < self._table.size(); i++)
			{
				Node* cur = self._table[i];
				while (cur)
				{
					Insert(cur->_val);
					cur = cur->_pNext;
				}
			}
		}

		//赋值重载
		Self& operator=(Self self)
		{
			Clear();
			Swap(self);
			return *this;
		}

		~HashBucket()
		{
			Clear();
		}

		Iterator Begin()
		{
			//找到第一个不为空的桶
			int i = 0;
			while (_table[i] == nullptr)
			{
				i++;
			}

			return Iterator(this, _table[i]);
		}

		Iterator End()
		{
			//用nullptr代表最后一个元素的后一个位置
			return Iterator(this, nullptr);
		}

		ConstIterator Begin() const
		{
			int i = 0;
			while (_table[i] == nullptr)
			{
				i++;
			}

			return ConstIterator(this, _table[i]);
		}

		ConstIterator End() const
		{
			return ConstIterator(this, nullptr);
		}

		//返回值：为了unordered_map的重载[]能够使用
		pair<bool, Iterator>Insert(const T & val)
		{
			//将数据转化为键值
			KeyOfValue func;

			//不允许重复键值
			Iterator it = Find(func(val));
			if ( it != End())
			{
				return make_pair(false,it);
			}

			//将键值转化为整形（哈希地址）
			HF ik;
			int hashi = ik(func(val)) % _table.size();

			//超过负载因子
			if (_size == _table.size())
			{
				//库容2倍
				vector<Node*> tmp(_table.size() * 2);

				//将原来的数据转移到tmp
				for (int i = 0; i < _table.size(); i++)
				{
					if (_table[i] != nullptr)
					{
						Node* cur = _table[i];
						while (cur)
						{
							//重新获取哈希地址
							int k = ik(func(cur->_val)) % tmp.size();
							//头插入tmp
							Node* next = cur->_pNext;
							cur->_pNext = tmp[k];
							tmp[k] = cur;
							cur = next;
						}
					}
				}
				//将容器进行交换
				_table.swap(tmp);
			}

			//插入
			Node* cur = new Node(val);
			cur->_pNext = _table[hashi];
			_table[hashi] = cur;
			_size++;

			return make_pair(true, Iterator(this,cur));
		}

		// 删除哈希桶中为data的元素(data不会重复)
		bool Erase(const K& key)
		{
			//将数据转化为键值
			KeyOfValue func;

			//将数据转化为键值
			HF ik;
			int hashi = ik(key) % _table.size();

			//获取头节点
			Node* cur = _table[hashi];
			//前驱指针
			Node* prev = nullptr;
			while (cur)
			{
				//找到，重新连接
				if (func(cur->_val) == key)
				{
					if (prev == nullptr)
					{
						_table[hashi] = cur->_pNext;
					}
					else
					{
						prev->_pNext = cur->_pNext;
					}
					_size--;
					delete cur;
					return true;
				}
				prev = cur;
				cur = cur->_pNext;
			}
			
			return false;
		}

		Iterator Find(const K& key)
		{
			//将数据转化为键值
			KeyOfValue func;

			//将数据转化为键值
			HF ik;
			int hashi = ik(key) % _table.size();

			//获取头节点
			Node* cur = _table[hashi];

			//查找
			while (cur)
			{
				if (func(cur->_val) == key)
				{
					return Iterator(this,cur);
				}
				cur = cur->_pNext;
			}

			return Iterator(this, nullptr);
		}

		//大小
		size_t Size()const
		{
			return _size;
		}

		//是否为空
		bool Empty()const
		{
			return 0 == _size;
		}

		//清空
		void Clear()
		{
			//遍历容器清空每个节点
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_pNext;
					delete cur;
					cur = next;
				}
				//最后置空
				_table[i] = nullptr;
			}
			//大小为0
			_size = 0;
		}

		//交换
		void Swap(Self& ht)
		{
			//交换内部容器即可
			_table.swap(ht._table);
			swap(_size, ht._size);
		}

	private:
		vector<Node*> _table;	//储存哈希桶头节点指针
		size_t _size = 0;      // 哈希表中有效元素的个数
	};
};

一、unordered_set模拟实现

1、底层实现

unordered_set 的底层是一个哈希表。但是，它只存储元素本身，而不是键值对。因此，每个元素的值同时也是其唯一的键。

2、特点

无序性：元素的顺序不保证，且可能会随着插入和删除操作而改变。
元素的唯一性：unordered_set 中的每个元素都是唯一的。
快速查找：同样地，平均情况下，查找、插入和删除操作的时间复杂度都是 O(1)。

3、代码实现
（1）基础框架

//哈希函数
template<class	K>
class HashFunc
{
public:
	size_t operator()(const K& val)
	{
		return val;
	}
};
//特化版本的哈希函数
template<>
class HashFunc<string>
{
public:
	size_t operator()(const string& s)
	{
		const char* str = s.c_str();
		unsigned int seed = 131; // 31 131 1313 13131 131313
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}

		return hash;
	}
};

//数据转化为键值的函数
template<class K>
struct KeyOfValue
{
	const K& operator()(const K& data)
	{
		return data;
	}
};

//unordered_set
template<class K, class HF = HashFunc<K>>
class unordered_set
{
	//哈希表重命名
	typedef OpenHash::HashBucket<K, const K, KeyOfValue<K>, HF> HT;
public:
	//对哈希表迭代器进行重命名
	typename typedef  HT::Iterator iterator;
	typename typedef  HT::ConstIterator const_iterator;

private:
	//哈希表
	HT _ht;
};

unordered_set模板参数
K：即充当键值也充当数据
HF：哈希函数，用于计算哈希值

unordered_set成员变量
_ht: 哈希表

给哈希表传模板参数
K : 代表键值类型，该类型变量用于查找删除等接口
const K:代表数据类型，因为不能被修改所以加上const修饰，该类型变量用于插入等
KeyOfValue<K>：将数据转化为键值（在unordered_set中不明显，因为键值类型与数据类型一致，主要在unordered_map中体现）
HF：哈希函数，用于计算哈希值

（2）接口实现
这里的接口都是直接复用哈希表的接口，包括插入、删除、查找等，析构不用我们自己实现，因为结束时析构自动调用哈希表的析构函数进行清理。

//构造函数
	unordered_set() : _ht()
	{}

	//拷贝构造函数
	unordered_set(const unordered_set& p) :_ht(p._ht)
	{}

	//赋值函数
	unordered_set& operator=(const unordered_set& p)
	{
		_ht = p._ht;

		return *this;
	}

	//迭代器
	iterator begin()
	{
		return _ht.Begin();
	}
	iterator end()
	{
		return _ht.End();
	}
	const_iterator begin() const
	{
		return _ht.Begin();
	}
	const_iterator end()const
	{
		return _ht.End();
	}

	//元素个数
	size_t size()const { return _ht.Size(); }

	//是否为空
	bool empty()const { return _ht.Empty(); }

	//查找
	iterator find(const K& key) { return _ht.Find(key); }

	//插入
	pair<bool, iterator> insert(const K& valye)
	{
		return _ht.Insert(valye);
	}

	//删除
	bool  erase(const K& position)
	{
		return _ht.Erase(position);
	}

（3）测试

void test_set()
{
	//默认构造
	unordered_set<int> s;
	//插入
	s.insert(1);
	s.insert(2);
	s.insert(3);
	s.insert(4);
	
	cout << "s1:";
	//拷贝构造
	unordered_set<int> s1(s);
	//迭代器遍历
	unordered_set<int>::iterator it1 = s1.begin();
	while (it1 != s1.end())
	{
		cout << *it1 << " ";
		++it1;
	}

	cout << endl;
	cout << "s2:";
	//赋值
	unordered_set<int> s2;
	s2 = s;
	//删除
	s2.erase(1);
	s2.erase(2);
	//迭代器遍历
	unordered_set<int>::iterator it2 = s2.begin();
	while (it2 != s2.end())
	{
		cout << *it2 << " ";
		++it2;
	}
}

在这里插入图片描述

二、unordered_map模拟实现

1、底层实现

unordered_map 的底层是一个哈希表，每个元素都是一个键值对（key-value pair）。通过哈希函数，键（key）被映射到一个特定的位置（也称为桶或槽位），这个位置存储了相应的值（value）。如果多个键经过哈希函数计算后得到相同的位置，就发生了哈希冲突，此时通常通过链表或红黑树等数据结构来解决冲突。

2、特点

无序性：元素的顺序不保证，且可能会随着插入和删除操作而改变。
快速查找：平均情况下，查找、插入和删除操作的时间复杂度都是 O(1)。
键的唯一性：每个键在 unordered_map 中必须是唯一的。

3、代码实现
（1）基础框架

//哈希函数
template<class	K>
class HashFunc
{
public:
	size_t operator()(const K& val)
	{
		return val;
	}
};
//特化string
template<>
class HashFunc<string>
{
public:
	size_t operator()(const string& s)
	{
		const char* str = s.c_str();
		unsigned int seed = 131; 
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}

		return hash;
	}
};

//数据转化为键值
template<class K, class V>
struct KeyOfValue
{
	const K& operator()(const pair<K, V>& data)
	{
		return data.first;
	}
};

template<class K, class V, class HF = HashFunc<K>>
class unordered_map
{
	//对哈希表重命名
	typedef OpenHash::HashBucket<K, pair<const K, V>, KeyOfValue<K, V>, HF>  HT;
	
public:
	//迭代器重命名
	typename typedef  HT::Iterator iterator;
	typename typedef  HT::ConstIterator const_iterator;

private:
	//哈希表
	HT _ht;
};

unordered_map模板参数
K：键值类型
V：数据类型
HF：哈希函数

unordered_map成员变量
_ht:哈希表

给哈希表传模板参数
K：键值，用于删除、查找等接口
KeyOfValue<K, V>：将数据转化为键值
pair<const K, V>：作为数据，其中const K作为键值类型（因为键值不能改需要通过const修饰），V作为映射的数据类型，通过KeyOfValue<K, V> 函数返回键值（如在插入时只传入pair<const K, V>类型数据，无法通过数据进行比较，所以需要通过该函数转化）。
HF：哈希函数

（2）接口实现
A . 重载[ ] : 该接口的特点是如果该键值的不存在，则进行插入（键值映射的数据用默认构造初始化），如果存在就返回键值映射的数据，所以需要与插入接口的配合，通过插入接口的返回值来配合。

//重载[]
V& operator[](const K& key)
{
	pair < bool, iterator > ret = _ht.Insert(make_pair(key, V()));
	return ret.second->second;
}

B . 其他接口都是直接复用哈希表的接口，包括插入、删除、查找等，析构不用我们自己实现，因为结束时析构自动调用哈希表的析构函数进行清理。

//构造函数
unordered_map() : _ht()
{}

//拷贝构造
unordered_map(const unordered_map& p) :_ht(p._ht)
{}

//赋值
unordered_map & operator=(const unordered_map& p)
{
	_ht = p._ht;

	return *this;
}

//迭代器
iterator begin()
{
	return _ht.Begin();
}
iterator end()
{
	return _ht.End();
}
const_iterator begin() const
{
	return _ht.Begin();
}
const_iterator end()const
{
	return _ht.End();
}

//元素个数
size_t size()const
{ 
	return _ht.Size();
}
//是否为空
bool empty()const 
{ 
	return _ht.Empty();
}

//查找
iterator find(const K& key) 
{ 
	return _ht.Find(key);
}
//
 插入
pair<bool,iterator> insert(const pair<K, V>& valye)
{
	return _ht.Insert(valye);
}

//删除
bool erase(const K&  key)
{
	return _ht.Erase(key);
}

（3）测试

 void test_map()
{
	//默认构造
	unordered_map<int, int> m;
	//插入
	m.insert({ 1,1 });
	m.insert({ 2,2 });
	m.insert({ 3,3 });
	m.insert({ 4,4 });

	cout << "m1:";
	//拷贝构造
	unordered_map<int, int> m1 = m;
	//迭代器遍历
	unordered_map<int, int>::iterator it1 = m1.begin();
	while (it1 != m1.end())
	{
		cout << it1->first << ":" << it1->second << "   ";
		++it1;
	}
	cout << endl;
	cout << "m2:";
	//赋值
	unordered_map<int, int> m2;
	m2 = m;
	//删除
	m2.erase(1);
	m2.erase(2);
	//重载[]
	m2[3] = 100;
	m2[5] = 100;
	//迭代器遍历
	unordered_map<int, int>::iterator it2 = m2.begin();
	while (it2 != m2.end())
	{
		cout << it2->first << ":" << it2->second << "   ";
		++it2;
	}
}