unordered_map和unordered_set的模拟实现和封装

作者：喵喵爱编程 | 2024-08-05 15:56:32

踩

闭散列哈希表和开散列哈希桶数据结构


enum State//状态
    {
        EMPTY,//空
        EXIST,//存在
        DELETE//删除
    };
 
template<class K, class V>
struct HashData//数据结构
    {
        pair<K, V> _kv;
        State _state = EMPTY;
    };
		template<class T>
		struct HashNode
    {
        HashNode<T>* _next;
        T _data;
 
        HashNode(const T& data)
                :_next(nullptr)
                , _data(data)
        {}
    };

闭散列哈希表实际上就是一个vector数组，本质和顺序表一样，为了方便删除，我们直接给各个数据设置一个State值，当为EMPTY时则为空，能够插入值，当为EXIST时则为存在，不能插入值，当我们需要删除时，直接把状态值设置为DELETE，视为删除，插入值时只需要判断是否为EMPTY或DELETE即可。
开散列哈希桶则就是vector中存入指针，然后将key值链接起来。

哈希表HashTable


	class HashTable
    {
    public:
				bool Insert(const pair<K, V>& kv);
				HashData<K, V>* Find(const K& key);
				bool Erase(const K& key);
    private:
        vector<HashData<K, V>> _tables;
        size_t _n = 0; // 存储的数据个数
    };

Find()


		HashData<K, V>* Find(const K& key)
        {
            if (_tables.size() == 0)
            {
                return nullptr;
            }
 
            size_t hashi = key % _tables.size();
 
            // 线性探测
            size_t i = 1;
            size_t index = hashi;
            while (_tables[index]._state != EMPTY)
            {
                if (_tables[index]._state == EXIST
                        && _tables[index]._kv.first == key)
                {
                    return &_tables[index];
                }
 
                index = hashi + i;//往后找
                index %= _tables.size();//找了一圈后 index==hashi
                ++i;
                //找完全部后没找到就退出循环
                if (index == hashi)
                {
                    break;
                }
            }
 
            return nullptr;
        }

数据查找只需判断这个值是否存在，然后再一个一个往后找即可，用hashi定位key位置，如果哈希冲突，则该key闭散列了，要往后找，index加上i值一个一个往后找，当找完一圈后再模%，相等即找完。

Erase()


		bool Erase(const K& key)
        {
            HashData<K, V>* ret = Find(key);
            if (ret)
            {
                ret->_state = DELETE;//修改状态值即可“删除”
                --_n;
                return true;
            }
            else
            {
                return false;
            }
        }

找到值后修改State状态为DELETE即可。

Insert()

首先判断是否需要扩容，如果负载因子>=7，则扩容


			//判断是否扩容
			//if ((double)_n / (double)_tables.size() >= 0.7)
		    if (_tables.size() == 0 || _n * 10 / _tables.size() >= 7)
            {
                size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;
                HashTable<K, V> newht;
                newht._tables.resize(newsize);
 
                // 遍历旧表，重新映射到新表
                for (auto& data : _tables)
                {
                    if (data._state == EXIST)
                    {
                        newht.Insert(data._kv);
                    }
                }
 
                _tables.swap(newht._tables);
            }
 
            size_t hashi = kv.first % _tables.size();

扩容的方法是把旧表遍历一边，重新插入到新表中，然后再交换两个表，更新映射关系hashi即可


		bool Insert(const pair<K, V>& kv)
        {
            if (Find(kv.first))//找到就不插入了
                return false;
            // 判断扩容
						//
            // 线性探测
            size_t i = 1;
            size_t index = hashi;
            while (_tables[index]._state == EXIST)
            {
                index = hashi + i;
                index %= _tables.size();
                ++i;
            }
 
            _tables[index]._kv = kv;
            _tables[index]._state = EXIST;
            _n++;
 
            return true;
        }

扩容完成后再继续线性探测，找到空位置插入。

迭代器


	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
    struct __HashIterator
    {
        typedef HashNode<T> Node;
        typedef HashTable<K, T, KeyOfT, Hash> HT;
        typedef __HashIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
 
        typedef __HashIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
 
        Node* _node;
        const HT* _ht;
 
        __HashIterator(Node* node, const HT* ht)
                :_node(node)
                , _ht(ht)
        {}
 
        __HashIterator(const Iterator& it)
                :_node(it._node)
                , _ht(it._ht)
        {}
				Ref operator*()
        {
            return _node->_data;
        }
 
        Ptr operator->()
        {
            return &_node->_data;
        }
 
        bool operator!=(const Self& s)
        {
            return _node != s._node;
        }

迭代器的实现难点在于套模版，其他和红黑树的迭代器一样，也可以用非const构造const迭代器

opertor++()


				Self& operator++()
        {
            if (_node->_next != nullptr)
            {
                _node = _node->_next;
            }
            else
            {
                // 找下一个不为空的桶
                KeyOfT kot;
                Hash hash;
                // 算出我当前的桶位置
                size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
                ++hashi;
                while (hashi < _ht->_tables.size())
                {
                    if (_ht->_tables[hashi])
                    {
                        _node = _ht->_tables[hashi];
                        break;
                    }
                    else
                    {
                        ++hashi;
                    }
                }
 
                // 没有找到不为空的桶
                if (hashi == _ht->_tables.size())
                {
                    _node = nullptr;
                }
            }
 
            return *this;
        }

一个桶的链表结束后，再找下一个不为空的桶。这里用了两个仿函数kot和hash，就是把_data中的key值传出来和size进行比较，后面封装unorderedmap和unorderedset会用到仿函数


struct MapKeyOfT
        {
            const K& operator()(const pair<K, V>& kv)
            {
                return kv.first;
            }
        };
struct SetKeyOfT
        {
            const K& operator()(const K& key)
            {
                return key;
            }
        };

Hash是将key值转化成[可以取模的整型]的仿函数


template<class K>
struct HashFunc
{
    size_t operator()(const K& key)
    {
        return key;
    }
};
 
// 特化
template<>
struct HashFunc<string>
{
    // BKDR
    size_t operator()(const string& s)
    {
        size_t hash = 0;
        for (auto ch : s)
        {
            hash += ch;
            hash *= 31;
        }
 
        return hash;
    }
};

由于string的特殊性，需要单独处理，这里用到了特化这个案例；乘以31是为了处理字符相同但是顺序不同的情况。

哈希桶


template<class K, class T, class KeyOfT, class Hash>
class HashTable
{
  template<class K1, class T1, class Ref, class Ptr, class KeyOfT1, class Hash1>
  friend struct __HashIterator;
 
  typedef HashNode<T> Node;
public:
  typedef __HashIterator<K, T, T&, T*, KeyOfT, Hash> iterator;
  typedef __HashIterator<K, T, const T&, const T*, KeyOfT, Hash> const_iterator;
private:
        vector<Node*> _tables; // 指针数组
        size_t _n = 0; // 存储有效数据个数
    };

要让迭代器可以访问私有成员，将它设置为友元函数，注意，模版的命名不能冲突。


		iterator begin()
        {
            Node* cur = nullptr;
            for (size_t i = 0; i < _tables.size(); ++i)
            {
                cur = _tables[i];
                if (cur)
                {
                    break;
                }
            }
 
            return iterator(cur, this);
        }
 
        iterator end()
        {
            return iterator(nullptr, this);
        }
 
        const_iterator begin() const
        {
            Node* cur = nullptr;
            for (size_t i = 0; i < _tables.size(); ++i)
            {
                cur = _tables[i];
                if (cur)
                {
                    break;
                }
            }
 
            return const_iterator(cur, this);
        }
 
        const_iterator end() const
        {
            return const_iterator(nullptr, this);
        }

begin只需要找到第一个不为空的桶中的第一个节点即可，end则为空。

Find()


		iterator Find(const K& key)
        {
            if (_tables.size() == 0)
                return end();
 
            KeyOfT kot;
            Hash hash;
            size_t hashi = hash(key) % _tables.size();
            Node* cur = _tables[hashi];
            while (cur)
            {
                if (kot(cur->_data) == key)
                {
                    return iterator(cur, this);
                }
 
                cur = cur->_next;
            }
 
            return end();
        }

find没什么好说的，直接找到映射的桶，然后遍历链表即可

Erase()


		bool Erase(const K& key)
        {
            Hash hash;
            KeyOfT kot;
            size_t hashi = hash(key) % _tables.size();
            Node* prev = nullptr;
            Node* cur = _tables[hashi];
            while (cur)
            {
                if (kot(cur->_data) == key)
                {
                    if (prev == nullptr)
                    {
                        _tables[hashi] = cur->_next;
                    }
                    else
                    {
                        prev->_next = cur->_next;
                    }
                    delete cur;
 
                    return true;
                }
                else
                {
                    prev = cur;
                    cur = cur->_next;
                }
            }
 
            return false;
        }

删除即是找到节点，然后再头删这个节点即可。

Insert()


		pair<iterator, bool> Insert(const T& data)
        {
            KeyOfT kot;
            iterator it = Find(kot(data));
            if (it != end())
            {
                return make_pair(it, false);
            }
            Hash hash;
            // 负载因因子==1时扩容
            if (_n == _tables.size())
                size_t newsize = GetNextPrime(_tables.size());
                vector<Node*> newtables(newsize, nullptr);
                //for (Node*& cur : _tables)
                for (auto& cur : _tables)
                {
                    while (cur)
                    {
                        Node* next = cur->_next;
 
                        size_t hashi = hash(kot(cur->_data)) % newtables.size();
 
                        // 头插到新表
                        cur->_next = newtables[hashi];
                        newtables[hashi] = cur;
 
                        cur = next;
                    }
                }
 
                _tables.swap(newtables);
            }
 
            size_t hashi = hash(kot(data)) % _tables.size();
            // 头插
            Node* newnode = new Node(data);
            newnode->_next = _tables[hashi];
            _tables[hashi] = newnode;
 
            ++_n;
            return make_pair(iterator(newnode, this), false);
        }

插入主要还是扩容比较麻烦，这里用的是质数扩容，库里面用的就是这个：


size_t GetNextPrime(size_t prime)
        {
            // SGI
            static const int __stl_num_primes = 28;
            static const unsigned long __stl_prime_list[__stl_num_primes] =
                    {
                            53, 97, 193, 389, 769,
                            1543, 3079, 6151, 12289, 24593,
                            49157, 98317, 196613, 393241, 786433,
                            1572869, 3145739, 6291469, 12582917, 25165843,
                            50331653, 100663319, 201326611, 402653189, 805306457,
                            1610612741, 3221225473, 4294967291
                    };
 
            size_t i = 0;
            for (; i < __stl_num_primes; ++i)
            {
                if (__stl_prime_list[i] > prime)
                    return __stl_prime_list[i];
            }
 
            return __stl_prime_list[i];
        }

最大桶


		size_t MaxBucketSize()
        {
            size_t max = 0;
            for (size_t i = 0; i < _tables.size(); ++i)
            {
                auto cur = _tables[i];
                size_t size = 0;
                while (cur)
                {
                    ++size;
                    cur = cur->_next;
                }
 
                //printf("[%d]->%d\\n", i, size);
                if (size > max)
                {
                    max = size;
                }
            }
 
            return max;
        }

哈希表实现完后，对于unordered_map/set只需要简单封装一下即可


#include "HashTable.h"
 
namespace lty
{
    template<class K, class V, class Hash = HashFunc<K>>
    class unordered_map
    {
    public:
        struct MapKeyOfT
        {
            const K& operator()(const pair<K, V>& kv)
            {
                return kv.first;
            }
        };
    public:
        typedef typename HashBucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::iterator iterator;
        typedef typename HashBucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::const_iterator const_iterator;
 
        iterator begin()
        {
            return _ht.begin();
        }
 
        iterator end()
        {
            return _ht.end();
        }
 
        const_iterator begin() const
        {
            return _ht.begin();
        }
 
        const_iterator end() const
        {
            return _ht.end();
        }
 
        pair<iterator, bool> insert(const pair<K, V>& kv)
        {
            return _ht.Insert(kv);
        }
 
        V& operator[](const K& key)
        {
            pair<iterator, bool> ret = insert(make_pair(key, V()));
            return ret.first->second;
            //first==iterator  first-> == _node->_data  
              first->second==_node->_data->second,也就是V值
        }
 
        iterator find(const K& key)
        {
            return _ht.Find(key);
        }
 
        bool erase(const K& key)
        {
            return _ht.Erase(key);
        }
 
    private:
        HashBucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
    };

set和map除了K、V值的不一样，还有就是map重载了[]，和红黑树的[]一样，返回pari的second值


#pragma once
#include "HashTable.h"
 
namespace lty
{
    template<class K, class Hash = HashFunc<K>>
    class unordered_set
    {
    public:
        struct SetKeyOfT
        {
            const K& operator()(const K& key)
            {
                return key;
            }
        };
    public:
        typedef typename HashBucket::HashTable<K, K, SetKeyOfT, Hash>::const_iterator iterator;
        typedef typename HashBucket::HashTable<K, K, SetKeyOfT, Hash>::const_iterator const_iterator;
 
        iterator begin()
        {
            return _ht.begin();
        }
 
        iterator end()
        {
            return _ht.end();
        }
 
        const_iterator begin() const
        {
            return _ht.begin();
        }
 
        const_iterator end() const
        {
            return _ht.end();
        }
 
        pair<iterator, bool> insert(const K& key)
        {
            return _ht.Insert(key);
        }
 
        iterator find(const K& key)
        {
            return _ht.Find(key);
        }
 
        bool erase(const K& key)
        {
            return _ht.Erase(key);
        }
 
    private:
        HashBucket::HashTable<K, K, SetKeyOfT, Hash> _ht;
    };

迭代器直接调用哈希中的即可。

哈希表的实现难点就在于模版的套用，很绕，只要搞懂了模版是怎么套用的，实现就会变得简单，只要知道哈希表原理即可。

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/喵喵爱编程/article/detail/933046