当前位置: 首页 > news >正文

完整教程:【C++】22. 封装哈希表实现unordered_set和unordered_map

完整教程:【C++】22. 封装哈希表实现unordered_set和unordered_map

一、源码及框架分析

SGI-STL30版本源代码中没有unordered_map和unordered_set,SGI-STL30版本是C++11之前的STL版本,这两个容器是C++11之后才更新的。但是SGI-STL30实现了哈希表,容器的名字是hash_map和hash_set,他是作为⾮标准的容器出现的,⾮标准是指⾮C++标准规定必须实现的,源代码在hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h中。

hash_map和hash_set的实现结构框架核⼼部分截取出来如下:

//hash_set.h
#include <stl_hashtable.h>#include <stl_hash_set.h>//hash_map.h#include <stl_hashtable.h>#include <stl_hash_map.h>//stl_hash_set.htemplate <class Value>struct __hashtable_node{__hashtable_node* next;Value val;};template <class Value, class HashFcn= hash<Value>,class EqualKey= equal_to<Value>,class Alloc= alloc>class hash_set{private:typedef hashtable<Value, Value, HashFcn, identity<Value>,EqualKey, Alloc> ht;ht rep;public:typedef typename ht::key_type key_type;typedef typename ht::value_type value_type;typedef typename ht::hasher hasher;typedef typename ht::key_equal key_equal;typedef typename ht::const_iterator iterator;typedef typename ht::const_iterator const_iterator;};//stl_hash_map.htemplate <class Key, class T, class HashFcn= hash<Key>,class EqualKey= equal_to<Key>,class Alloc= alloc>class hash_map{private:typedef hashtable<pair<const Key, T>, Key, HashFcn,select1st<pair<const Key, T>>, EqualKey, Alloc> ht;ht rep;public:typedef typename ht::key_type key_type;typedef T data_type;typedef T mapped_type;typedef typename ht::value_type value_type;typedef typename ht::hasher hasher;typedef typename ht::key_equal key_equal;typedef typename ht::iterator iterator;typedef typename ht::const_iterator const_iterator;};// stl_hashtable.htemplate <class Value, class Key, class HashFcn,class ExtractKey, class EqualKey,class Alloc>class hashtable{public:typedef Key key_type;typedef Value value_type;typedef HashFcn hasher;typedef EqualKey key_equal;hasher hash_funct() const {return hash;}key_equal key_eq() const {return equals;}private:hasher hash;key_equal equals;ExtractKey get_key;typedef __hashtable_node<Value> node;vector<node*, Alloc> buckets;size_type num_elements;public:typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,Alloc> iterator;pair<iterator, bool>insert_unique(const value_type& obj)const_iterator find(const key_type& key) const};

在这里插入图片描述

  • 通过画图分析可以看到,结构上hash_map和hash_set跟map和set的完全类似,复⽤同⼀个hashtable实现key和key/value结构,hash_set传给hash_table的是key,hash_map传给hash_table的是pair<constkey,value>。

二、模拟实现unordered_map和unordered_set

1、实现出复⽤哈希表的框架,并⽀持insert

//UnorderedSet.h
namespace zsy
{
template<
class K
, class Hash
= HashFunc<K>>class unordered_set{struct SetKeyOfT{const K&operator()(const K& key){return key;}};public:bool insert(const K& key){return _ht.Insert(key);}private:hash_bucket::HashTable<K, K, SetKeyOfT, Hash> _ht;};}//UnorderedMap.hnamespace zsy{template<class K, class V, class Hash= HashFunc<K>>class unordered_map{struct MapKeyOfT{const K&operator()(const pair<K, V>& kv){return kv.first;}};public:bool insert(const pair<K, V>& kv){return _ht.Insert(kv);}private:hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT, Hash> _ht;};}// HashTable.h//仿函数: 转换为无符号整型template<class K>struct HashFunc{size_t operator()(const K& key){return (size_t)key;}};//特化: 将string类转换为无符号整型template<>struct HashFunc<string>{size_t operator()(const string& s){//BKDR哈希算法size_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}};//素数表函数:用于哈希表初始化和扩容(取大于n的最小素数)inline unsigned long _stl_next_prime(unsigned long n){static const int _stl_num_primes = 28;static const unsigned long _stl_prime_list[_stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = _stl_prime_list;const unsigned long* last = _stl_prime_list + _stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);//[first,second) >=nreturn pos == last ? *(last - 1) : *pos;}namespace hash_bucket{template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T& data):_data(data), _next(nullptr){}};// 实现步骤: // 1、实现哈希表 // 2、封装unordered_map和unordered_set的框架 解决KeyOfT // 3、iterator // 4、const_iterator // 5、key不⽀持修改的问题 // 6、operator[] template<class K, class T, class KeyOfT, class Hash>class HashTable{typedef HashNode<T> Node;public:HashTable():_tables(_stl_next_prime(0), nullptr), _n(0){}HashTable(const HashTable& ht){_tables.resize(ht._tables.size(), nullptr);//初始化N个空节点_n = ht._n;//遍历源哈希表的每个桶,进行深拷贝for (size_t i = 0; i < ht._tables.size();++i){Node* cur = ht._tables[i];Node* newHead = nullptr;Node* tail = nullptr;//拷贝链表中的每个节点while (cur){Node* newnode = new Node(cur->_data);//深拷贝节点//尾插if (newHead == nullptr){newHead = newnode;tail = newnode;}else{tail->_next = newnode;tail = tail->_next;}cur = cur->_next;}_tables[i] = newHead;//将新链表头指针存入当前哈希表}}void Swap(HashTable& ht){_tables.swap(ht._tables);swap(_n, ht._n);}HashTable&operator=(HashTable ht){Swap(ht);return *this;}~HashTable(){//释放每个桶for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}}bool Insert(const T& data){//避免重复值插入 KeyOfT kot;Iterator it = Find(kot(data));if (it != End())return false;Hash hs;//负载因子=1时扩容if (_n == _tables.size()){vector<Node*>newTable(_stl_next_prime(_tables.size() + 1), nullptr);for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];while (cur){Node* next = cur->_next;//原数据头插到新表size_t hashi = hash(kot(cur->_data)) % newTable.size();cur->_next = newTable[hashi];newTable[hashi] = cur;cur = next;}_tables[i] = nullptr;//对应旧表清空}_tables.swap(newTable);}size_t hashi = hash(kot(data)) % _tables.size();//头插Node* newnode = new Node(data);newnode->_next = _tables[hashi];//新节点指向原链表头_tables[hashi] = newnode;//newnode成为新链表头++_n;return true;}private:vector<Node*> _tables;//指针数组size_t _n = 0;};}

2、⽀持iterator的实现

iterator核⼼源代码

template <
class Value
, class Key
, class HashFcn
,
class ExtractKey
, class EqualKey
, class Alloc
>
struct __hashtable_iterator {
typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc> hashtable;typedef __hashtable_iterator<Value, Key, HashFcn,ExtractKey, EqualKey, Alloc> iterator;typedef __hashtable_const_iterator<Value, Key, HashFcn,ExtractKey, EqualKey, Alloc> const_iterator;typedef __hashtable_node<Value> node;typedef forward_iterator_tag iterator_category;typedef Value value_type;node* cur;hashtable* ht;__hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}__hashtable_iterator() {}reference operator*() const {return cur->val;}#ifndef __SGI_STL_NO_ARROW_OPERATORpointer operator->() const {return &(operator*());}#endif /* __SGI_STL_NO_ARROW_OPERATOR */iterator &operator++();iterator operator++(int);bool operator==(const iterator& it) const {return cur == it.cur;}bool operator!=(const iterator& it) const {return cur != it.cur;}};template <class V, class K, class HF, class ExK, class EqK, class A>__hashtable_iterator<V, K, HF, ExK, EqK, A>&__hashtable_iterator<V, K, HF, ExK, EqK, A>::operator++(){const node* old = cur;cur = cur->next;if (!cur) {size_type bucket = ht->bkt_num(old->val);while (!cur &&++bucket < ht->buckets.size())cur = ht->buckets[bucket];}return *this;}

iterator实现思路分析:

  • iterator实现的⼤框架跟list的iterator思路是⼀致的,⽤⼀个类型封装结点的指针,再通过重载运算符实现,迭代器像指针⼀样访问的⾏为,要注意的是哈希表的迭代器是单向迭代器。

  • 这⾥的难点是operator++的实现。iterator中有⼀个指向结点的指针,如果当前桶下⾯还有结点,则结点的指针指向下⼀个结点即可。如果当前桶⾛完了,则需要想办法计算找到下⼀个桶。这⾥的难点反⽽是结构设计的问题,参考上⾯的源码,我们可以看到iterator中除了有结点的指针,还有哈希表对象的指针,这样当前桶⾛完了,要计算下⼀个桶就相对容易多了,⽤key值计算出当前桶位置,依次往后找下⼀个不为空的桶即可。

  • begin()返回第⼀个桶中第⼀个节点指针构造的迭代器,这⾥end()返回迭代器可以⽤空表⽰。

  • unordered_set的iterator也不⽀持修改,我们把unordered_set的第⼆个模板参数改成const K即可, HashTable<K, const K, SetKeyOfT, Hash> _ht;

  • unordered_map的iterator不⽀持修改key但是可以修改value,我们把unordered_map的第⼆个模板参数pair的第⼀个参数改成const K即可, HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;

  • ⽀持完整的迭代器还有很多细节需要修改,具体参考下⾯代码。

//前置声明
template<
class K
, class T
, class KeyOfT
, class Hash
>
class HashTable
;
template<
class K
, class T
, class Ref
, class Ptr
, class KeyOfT
, class Hash
>
struct HTIterator
{
typedef HashNode<T> Node;typedef HashTable<K, T, KeyOfT, Hash> HT;typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;Node* _node;const HT* _ht;HTIterator(Node* node, const HT* ht):_node(node), _ht(ht){}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator==(const Self& s){return _node == s._node;}bool operator!=(const Self& s){return _node != s._node;}Self&operator++(){//当前桶还有数据,走下一个节点if (_node->_next){_node = _node->_next;}//当前桶走完了,找下一个不为空的桶else{KeyOfT kot;Hash hash;size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();++hashi;while (hashi < _ht->_tables.size()){_node = _ht->_tables[hashi];if (_node)break;else++hashi;}//走完所有桶,end()给的空_nodeif (hashi == _ht->_tables.size()){_node = nullptr;}}return *this;}};template<class K, class T, class KeyOfT, class Hash>class HashTable{//友元声明 允许访问struct HTIteratortemplate<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>friend struct HTIterator;typedef HashNode<T> Node;public:typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;Iterator Begin(){//哈希表为空if (_n == 0)return End();//找第一个不为空的桶for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];if (cur){return Iterator(cur, this);}}//走完所有桶return End();}Iterator End(){return Iterator(nullptr, this);}ConstIterator Begin() const{if (_n == 0)return End();for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];if (cur){return ConstIterator(cur, this);}}return End();}ConstIterator End() const{return ConstIterator(nullptr, this);}}

3、map⽀持[ ]

  • unordered_map要⽀持[]主要需要修改insert返回值⽀持,修改HashTable中的insert返回值为pair<Iterator, bool> Insert(const T& data)

  • 有了insert⽀持[ ]实现就很简单了,具体参考下⾯代码实现。

//UnorderedMap.h
template<
class K
, class V
, class Hash
= HashFunc<K>>V&operator[](const K& key){pair<iterator, bool> ret = insert({ key,V()});return ret.first->second;}

4、封装实现的完整代码

1)HashTable.h

#pragma once
#include<vector>//仿函数: 转换为无符号整型template<class K>struct HashFunc{size_t operator()(const K& key){return (size_t)key;}};//特化: 将string类转换为无符号整型template<>struct HashFunc<string>{size_t operator()(const string& s){//BKDR哈希算法size_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}} ;//素数表函数:用于哈希表初始化和扩容(取大于n的最小素数)inline unsigned long _stl_next_prime(unsigned long n){static const int _stl_num_primes = 28;static const unsigned long _stl_prime_list[_stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = _stl_prime_list;const unsigned long* last = _stl_prime_list + _stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);//[first,second) >=nreturn pos == last ? *(last - 1) : *pos;}//哈希桶namespace hash_bucket{template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T& data):_data(data),_next(nullptr){}};//前置声明template<class K, class T, class KeyOfT, class Hash>class HashTable;template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>struct HTIterator{typedef HashNode<T> Node;typedef HashTable<K, T, KeyOfT, Hash> HT;typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;Node* _node;const HT* _ht;HTIterator(Node* node, const HT* ht):_node(node), _ht(ht){}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator==(const Self& s){return _node == s._node;}bool operator!=(const Self& s){return _node != s._node;}Self&operator++(){//当前桶还有数据,走下一个节点if (_node->_next){_node = _node->_next;}//当前桶走完了,找下一个不为空的桶else{KeyOfT kot;Hash hash;size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();++hashi;while (hashi < _ht->_tables.size()){_node = _ht->_tables[hashi];if (_node)break;else++hashi;}//走完所有桶,end()给的空_nodeif (hashi == _ht->_tables.size()){_node = nullptr;}}return *this;}};template<class K, class T, class KeyOfT, class Hash>class HashTable{//友元声明 允许访问struct HTIteratortemplate<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>friend struct HTIterator;typedef HashNode<T> Node;public:typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;Iterator Begin(){//哈希表为空if (_n == 0)return End();//找第一个不为空的桶for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];if (cur){return Iterator(cur, this);}}//走完所有桶return End();}Iterator End(){return Iterator(nullptr, this);}ConstIterator Begin() const{if (_n == 0)return End();for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];if (cur){return ConstIterator(cur, this);}}return End();}ConstIterator End() const{return ConstIterator(nullptr, this);}HashTable():_tables(_stl_next_prime(0), nullptr), _n(0){}HashTable(const HashTable& ht){_tables.resize(ht._tables.size(), nullptr);//初始化N个空节点_n = ht._n;//遍历源哈希表的每个桶,进行深拷贝for (size_t i = 0; i < ht._tables.size();++i){Node* cur = ht._tables[i];Node* newHead = nullptr;Node* tail = nullptr;//拷贝链表中的每个节点while (cur){Node* newnode = new Node(cur->_data);//深拷贝节点//尾插if (newHead == nullptr){newHead = newnode;tail = newnode;}else{tail->_next = newnode;tail = tail->_next;}cur = cur->_next;}_tables[i] = newHead;//将新链表头指针存入当前哈希表}}void Swap(HashTable& ht){_tables.swap(ht._tables);swap(_n, ht._n);}HashTable&operator=(HashTable ht){Swap(ht);return *this;}~HashTable(){//释放每个桶for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}}pair<Iterator,bool>Insert(const T& data){//避免重复值插入 KeyOfT kot;Iterator it = Find(kot(data));if (it != End())return { it,false};Hash hash;//负载因子=1时扩容if (_n == _tables.size()){vector<Node*>newTable(_stl_next_prime(_tables.size() + 1),nullptr);for (size_t i = 0; i < _tables.size();++i){Node* cur = _tables[i];while (cur){Node* next = cur->_next;//原数据头插到新表size_t hashi = hash(kot(cur->_data)) % newTable.size();cur->_next = newTable[hashi];newTable[hashi] = cur;cur = next;}_tables[i] = nullptr;//对应旧表清空}_tables.swap(newTable);}size_t hashi = hash(kot(data)) % _tables.size();//头插Node* newnode = new Node(data);newnode->_next = _tables[hashi];//新节点指向原链表头_tables[hashi] = newnode;//newnode成为新链表头++_n;return {Iterator(newnode,this),true};}Iterator Find(const K& key){KeyOfT kot;Hash hash;size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){return Iterator(cur, this);}cur = cur->_next;}return End();}bool Erase(const K& key){KeyOfT kot;Hash hash;size_t hashi = hash(key) % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){//头节点if (prev == nullptr){_tables[hashi] = cur->_next;}//中间节点else{prev->_next = cur->_next;}delete cur;--_n;return true;}else{prev = cur;cur = cur->_next;}}//节点不存在return false;}private:vector<Node*> _tables;//指针数组size_t _n = 0;};}

2)UnorderedSet.h

#pragma once
#include"HashTable.h"
//UnorderedSet.h
namespace zsy
{
template<
class K
, class Hash
= HashFunc<K>>class unordered_set{struct SetKeyOfT{const K&operator()(const K& key){return key;}};public:typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}pair<iterator, bool>insert(const K& key){return _ht.Insert(key);}iterator find(const K& key){return _ht.Find(key);}bool erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;};}

3)UnorderedMap.h

#pragma once
#include"HashTable.h"
//MyUnorderedMap.h
namespace zsy
{
template<
class K
, class V
, class Hash
= HashFunc<K>>class unordered_map{struct MapKeyOfT{const K&operator()(const pair<K, V>& kv){return kv.first;}};public:typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}V&operator[](const K& key){pair<iterator, bool> ret = insert({ key,V()});return ret.first->second;}pair<iterator, bool>insert(const pair<K, V>& kv){return _ht.Insert(kv);}iterator find(const K& key){return _ht.Find(key);}bool erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;};}

4)Test.cpp

接着对我们封装后的UnorderedSet和UnorderedMap进行测试:

#include<iostream>using namespace std;#include"UnorderedMap.h"#include"UnorderedSet.h"namespace zsy{void test_unordered_set(){int a[] = {3,11,86,7,88,82,10,5,6,7,6};unordered_set<int> s;for (auto e : a){s.insert(e);}unordered_set<int>::iterator it = s.begin();while (it != s.end()){cout <<*it <<" ";++it;}cout << endl;}void test_unordered_map(){unordered_map<string, string> dict;dict.insert({"left","左边"});dict.insert({"right","右边"});dict.insert({"insert","插入"});dict["sort"] = "排序";//[]实现插入dict["insert"] = "插入元素";//[]实现修改unordered_map<string, string>::iterator it = dict.begin();while (it != dict.end()){it->second += "x";//it->first不允许修改,it->second允许修改cout << it->first <<":" << it->second << endl;++it;}}}int main(){zsy::test_unordered_set();zsy::test_unordered_map();return 0;}

运行结果:

http://www.wxhsa.cn/company.asp?id=5031

相关文章:

  • Azure App Service连接Azure SQL MI
  • 将目标数据复制到服务器-ServerSetReplicatedTargetData()
  • 不是说 PHP 不行了吗?为什么 Swoole 还在更新?
  • qoj1831 Bruteforce
  • C++数据结构和算法:链表
  • CAI:开源网络安全AI框架,打造自主安全测试智能体
  • GAS中,负责封装技能所影响的目标数据(如 Actor、位置、碰撞结果等)-FGameplayAbilityTargetData
  • 详细介绍:Maven入门_简介、安装与配置
  • 实用指南:立体校正原理
  • train-labels.idx1-ubyte里是什么
  • 滑动窗口最大值-leetcode
  • 创建预测窗口-ScopedPredictionWindow();
  • 95. 不同的二叉搜索树 II
  • lc1028-从先序遍历还原二叉树
  • P12558 [UOI 2024] Heroes and Monsters 题解
  • 加把劲——2025 年中总结
  • Ability-GetCurrentActorInfo()-IsLocallyControlled()和APawn::IsLocallyControlled()
  • 应该遵守的代码规范与读《数学之美》有感
  • AbilitySystemComponent和AbilityTask
  • AT_arc171_c [ARC171C] Swap on Tree
  • 202509_QQ_冷门的Base家族
  • SpawnActorDeferred()和SpawnActorOfClass()
  • 【QT】信号和槽
  • 学习日报|线程池专题学习总结 - 详解
  • 如何设计业务架构 - 智慧园区
  • snmp协议
  • 刷题复习(四)二分搜索
  • aardio | 通过点击checkbox复选框本身判断是否勾选
  • 项目介绍
  • 新媒体运营用AI排版工具|10分钟搞定公众号图文的全流程指南