由[哈希/散列]模拟实现[unordered_map/unordered

文章目录

1.迭代器分析
2.细节处理
3.完整代码
- 3.1HashTable.h
- 3.2unordered_set.h
- 3.3unordered_map.h
- 3.4Test.cpp

1.迭代器分析

在这里插入图片描述

2.细节处理

以下两篇文章均为笔者的呕心沥血
想要搞懂本篇文章的uu请自行查阅

哈希/散列的细节实现

哈希/散列–哈希表[思想到结构][==修订版==]

手撕迭代器的细节处理

模拟实现map/set[改编红黑树实现map/set容器底层]

3.完整代码

3.1HashTable.h

#pragma once
#define _CRT_SECURE_NO_WARNINGS 
#include <iostream>
#include <list>
#include <vector>
#include <algorithm>
#include <array>
#include <time.h>
#include <queue>
#include <stack>
#include <string>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <functional>
#include <assert.h>
using namespace std;

//转换函数
template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		size_t value = 0;
		for (auto ch : s)
		{
			value = value * 131 + ch;
		}
		return value;
	}
};

//开散列/链地址法
namespace ChainAddressing
{
	//STL库中unordered_map/unordered_set的定义
	/* template
	   <
		class Key,
		class T,
		class Hash = hash<Key>,
		class Pred = equal_to<Key>,
		class Alloc = allocator< pair<const Key,T> >
	   >
		class unordered_map;
		class unordered_set;
	*/

	//结点类
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_next(nullptr)
			, _data(data)
		{

		}
	};

///  迭代器  

	//哈希表前置声明
	template<class K, class T, class GetKey, class Hash>
	class HashTable;
	//迭代器类
	template<class K, class T, class Ref, class Ptr, class GetKey, class Hash>
	struct HashIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable<K, T, GetKey, Hash> HT;

		typedef HashIterator<K, T, Ref, Ptr, GetKey, Hash> Self;
		typedef HashIterator<K, T, T&, T*, GetKey, Hash> Iterator;

		//成员属性
		Node* _node;
		const HT* _ht;

		//构造函数
		HashIterator(Node* node, const HT* ht)
			:_node(node)
			, _ht(ht)
		{
		
		}

		//特殊构造函数
		HashIterator(const Iterator& it)
			:_node(it._node)
			, _ht(it._ht)
		{
		
		}

		//解引用运算符
		Ref operator*()
		{
			return _node->_data;
		}

		//成员访问运算符
		Ptr operator->()
		{
			return &_node->_data;
		}

		//关系运算符
		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		//前置++
		Self& operator++()
		{
			//下个结点不空 向下++即可
			if (_node->_next != nullptr)
			{
				_node = _node->_next;
			}
			//下个结点为空
			else
			{
				GetKey get;
				Hash hash;
				//当前数据的哈希下标
				size_t hashi = hash(get(_node->_data)) % _ht->_tables.size();
				//下个结点为空 说明当前桶已空 找下一个非空桶
				++hashi;

				//找非空桶时 肯定不能越过哈希表
				while (hashi < _ht->_tables.size())
				{
					//_ht->_tables[hashi] : 
					// 取出的是表中的ptr 指向桶中首结点指针[如果存在的话]
					if (_ht->_tables[hashi])
					{
						_node = _ht->_tables[hashi];
						break;
					}
					else
						++hashi;
				}

				//while循环结束情况:
				//1.不满足循环条件 hashi == _ht->_tables.size() 
				// 即找完整个表 都没有找到非空桶 即当前桶之后的桶全为空
				//2._ht->_tables[hashi]不为空 找到了 break出来了 
				//面对上述情况 只需对第一种情况操作 即桶均空 ++失败 表中已无数据 返回空指针
				if (hashi == _ht->_tables.size())
					_node = nullptr;
			}
			return *this;
		}
	};

///

	//哈希表类
	template< class K, class T, class GetKey, class Hash >
	class HashTable
	{
		template<class K, class T, class Ref, class Ptr, class GetKey, class Hash>
		friend struct HashIterator;

		typedef HashNode<T> Node;
	public:
		typedef HashIterator<K, T, T&, T*, GetKey, Hash> iterator;
		typedef HashIterator<K, T, const T&, const T*, GetKey, Hash> const_iterator;

		//迭代器
		iterator begin()
		{
			Node* ptr = nullptr;
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				ptr = _tables[i];
				if (ptr)
					break;
			}

			return iterator(ptr, this);
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		const_iterator begin() const
		{
			Node* ptr = nullptr;
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				ptr = _tables[i];
				if (ptr)
					break;
			}

			return const_iterator(ptr, this);
		}

		const_iterator end() const
		{
			return const_iterator(nullptr, this);
		}

		//析构函数
		~HashTable()
		{
			for (auto& ptr : _tables)
			{
				while (ptr)
				{
					//记录下一个结点
					Node* next = ptr->_next;
					//释放当前结点
					delete ptr;
					//更新ptr
					ptr = next;
				}

				ptr = nullptr;
			}
		}

		//查找函数
		iterator Find(const K& key)
		{
			if (_tables.size() == 0)
				return end();

			GetKey get;
			Hash hash;

			size_t hashi = hash(key) % _tables.size();
			Node* ptr = _tables[hashi];
			while (ptr)
			{
				if (get(ptr->_data) == key)
					return iterator(ptr, this);
				ptr = ptr->_next;
			}
			return end();
		}

		//删除函数
		bool Erase(const K& key)
		{
			Hash hash;
			GetKey get;

			size_t hashi = hash(key) % _tables.size();
			Node* prv = nullptr;
			Node* ptr = _tables[hashi];
			while (ptr)
			{
				if (get(ptr->_data) == key)
				{
					if (prv == nullptr)
						_tables[hashi] = ptr->_next;
					else
						prv->_next = ptr->_next;

					delete ptr;
					return true;
				}
				else
				{
					prv = ptr;
					ptr = ptr->_next;
				}
			}
			return false;
		}

		//C++SGI版本STL库:获得下一个素数
		//在SGI下 设定哈希表的容量为素数
		//[可能效率更高 有兴趣的可以自行查阅]
		/*
		size_t GetNextPrime(size_t index)
		{
			static const int num = 28;
			static const unsigned long prime[num] =
			{
				53, 97, 193, 389, 769,
				1543, 3079, 6151, 12289, 24593,
				49157, 98317, 196613, 393241, 786433,
				1572869, 3145739, 6291469, 12582917, 25165843,
				50331653, 100663319, 201326611, 402653189, 805306457,
				1610612741, 3221225473, 4294967291
			};

			size_t i = 0;
			for (i = 0; i < num; ++i)
			{
				if (prime[i] > index)
					return prime[i];
			}

			return prime[i];
		}
		*/

		//插入函数
		pair<iterator, bool> Insert(const T& data)
		{
			GetKey get;
			iterator it = Find(get(data));
			if (it != end())
				return make_pair(it, false);

			Hash hash;
			//负载因子/荷载系数 -- Load_Factor = _n / _tables.size();
			//负载因子 == 1时扩容
			if (_n == _tables.size())
			{
				///  高级代码1.0  /
				/*
				size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				HashTable<K, V> newht;
				newht.resize(newsize);
				for (auto& ptr : _tables)
				{
					while (ptr)
					{
						newht.Insert(ptr->_pair);
						ptr = ptr->_next;
					}
				}

				_tables.swap(newht._tables);
				*/


				//初代扩容
				size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;

				//引进stl库素数思想
				//size_t newsize = GetNextPrime(_tables.size());

				vector<Node*> newtables(newsize, nullptr);
				//遍历旧表 取出旧表里每一个指针
				for (auto& ptr : _tables)
				{
					//ptr是旧表里存储的每一个指针
					//它指向当前哈希桶的首结点 即他存储的是首结点的地址
					while (ptr)
					{
						//算出 当前结点根据新哈希函数计算的新下标
						size_t hashi = hash(get(ptr->_data)) % newtables.size();

						//ptr是首结点的地址 ptr->_next为第二个结点的地址
						Node* next = ptr->_next;

						// 头插到新表
						ptr->_next = newtables[hashi];
						newtables[hashi] = ptr;

						//更新ptr 即向下遍历
						ptr = next;
					}
				}

				_tables.swap(newtables);
			}

			//哈希函数计算出的下标
			size_t hashi = hash(get(data)) % _tables.size();
			//链表头插
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return make_pair(iterator(newnode, this), false);
		}

		//最大桶数据个数
		size_t MaxBucketSize()
		{
			size_t max = 0;
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				auto ptr = _tables[i];
				size_t size = 0;
				while (ptr)
				{
					++size;
					ptr = ptr->_next;
				}

				//每个桶数据个数
				//printf("[%d] -> %d\n", i, size);

				if (size > max)
					max = size;
			}
			return max;
		}
	private:
		vector<Node*> _tables; // 指针数组
		size_t _n = 0;         // 存储有效数据个数
	};
}

3.2unordered_set.h

#pragma once
#include "HashTable.h"

namespace ape
{
	template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
	public:
		//获得key
		struct SetGetKey
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename ChainAddressing::HashTable<K, K, SetGetKey, Hash>::const_iterator iterator;
		typedef typename ChainAddressing::HashTable<K, K, SetGetKey, Hash>::const_iterator const_iterator;

		//迭代器
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}

		//插入
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}
		//查找
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
		//删除
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}

	private:
		ChainAddressing::HashTable<K, K, SetGetKey, Hash> _ht;
	};

	//打印函数
	void print(const unordered_set<int>& s)
	{
		unordered_set<int>::const_iterator it = s.begin();
		while (it != s.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}

	//测试函数
	void test_unordered_set()
	{
		int a[] = { 3, 33, 2, 13, 5, 12, 1002 };
		unordered_set<int> s;
		for (auto e : a)
			s.insert(e);

		s.insert(54);
		s.insert(107);

		unordered_set<int>::iterator it = s.begin();
		while (it != s.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;

		for (auto e : s)
			cout << e << " ";

		cout << endl;
		print(s);
	}
}

3.3unordered_map.h

#pragma once
#include "HashTable.h"

namespace ape
{
	template<class K, class V, class Hash = HashFunc<K>>
	class unordered_map
	{
	public:
		//获得key
		struct MapGetKey
		{
			const K& operator()(const pair<K, V>& pair)
			{
				return pair.first;
			}
		};
	public:
		typedef typename ChainAddressing::HashTable<K, pair<const K, V>, MapGetKey, Hash>::iterator iterator;
		typedef typename ChainAddressing::HashTable<K, pair<const K, V>, MapGetKey, Hash>::const_iterator const_iterator;

		//迭代器
		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}

		//插入
		pair<iterator, bool> insert(const pair<K, V>& pair)
		{
			return _ht.Insert(pair);
		}

		//下标运算符
		V& operator[](const K& key)
		{
			pair<iterator, bool> pair = insert(make_pair(key, V()));
			return pair.first->second;
		}

		//查找
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}

		//删除
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}

	private:
		ChainAddressing::HashTable<K, pair<const K, V>, MapGetKey, Hash> _ht;
	};

	void test_unordered_map1()
	{
		unordered_map<int, int> m;
		m.insert(make_pair(1, 1));
		m.insert(make_pair(3, 3));
		m.insert(make_pair(2, 2));

		unordered_map<int, int>::iterator it = m.begin();
		while (it != m.end())
		{
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;
	}

	void test_unordered_map2()
	{
		string s[] = { "西瓜", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉", "梨" };
		unordered_map<string, int> um;
		for (auto& e : s)
		{
			um[e]++;
		}

		for (auto& pair : um)
		{
			cout << pair.first << ":" << pair.second << endl;
		}
	}

	class Date
	{
		friend struct HashDate;
		friend ostream& operator<<(ostream& _cout, const Date& d);

	public:
		Date(int year = 1900, int month = 1, int day = 1)
			: _year(year)
			, _month(month)
			, _day(day)
		{
		
		}

		bool operator<(const Date& d) const
		{
			return (_year < d._year) 
				|| (_year == d._year && _month < d._month)
				|| (_year == d._year && _month == d._month && _day < d._day);
		}

		bool operator>(const Date& d) const
		{
			return (_year > d._year)
				|| (_year == d._year && _month > d._month) 
				|| (_year == d._year && _month == d._month && _day > d._day);
		}

		bool operator==(const Date& d) const
		{
			return _year == d._year
				&& _month == d._month
				&& _day == d._day;
		}

	private:
		int _year;
		int _month;
		int _day;
	};
	ostream& operator<<(ostream& _cout, const Date& d)
	{
		_cout << d._year << "-" << d._month << "-" << d._day;
		return _cout;
	}

	struct HashDate
	{
		size_t operator()(const Date& d)
		{
			size_t value = 0;

			value += d._year;
			value *= 131;

			value += d._month;
			value *= 131;

			value += d._day;
			value *= 131;
			 
			return value;
		}
	};

	void test_unordered_map4()
	{
		Date d1(2023, 10, 1);
		Date d2(2023, 10, 5);
		Date d3(2023, 10, 2);
		Date d4(2023, 10, 2);
		Date d5(2023, 10, 2);
		Date d6(2023, 10, 1);

		Date a[] = { d1, d2, d3, d4, d5, d6 };

		unordered_map<Date, int, HashDate> um;
		for (auto e : a)
		{
			um[e]++;
		}

		for (auto& pair : um)
		{
			cout << pair.first << ":" << pair.second << endl;
		}
	}
}

3.4Test.cpp

#define _CRT_SECURE_NO_WARNINGS 
#include <iostream>
#include <list>
#include <vector>
#include <algorithm>
#include <array>
#include <time.h>
#include <queue>
#include <stack>
#include <string>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <functional>
#include <assert.h>
using namespace std;


#include "HashTable.h"
#include "Unordered_Map.h"
#include "Unordered_Set.h"


int main()
{
	//ape::test_unordered_set();
	ape::test_unordered_map4();
	
	return 0;
}