Fast HashTable

liujiaxi123456 · 2025-05-26 19:38:34 · 科技·工程

声明：

本模板肯定不是最好的板子，但是还是相对优秀且方便（使用方式跟 STL 自带的基本一致）。

关于卡哈希：

由于哈希方式与 umap 等并不相同，所以通常数据没有人会卡（并非指得卡不掉）。
实在要是刚好被卡了，自己改改 hash 函数即可。
- P.S. 提供一个基本保证不被卡的版本（但是略慢）。
本模板仅作为速度板子，保证在一般数据下的快速，下文所有速度均指一般数据下的速度。
P.S. 由于刻意构造，下面提供的速度版无法通过板子题最后一个点，但是实际使用中从未被卡过，如果被卡了可以用上面的版本。

考虑到 luogu 文章广场并没有（或几乎没有）提供这类较快的封装实现，故仍应有一定价值。

不喜轻喷，希望能帮到大家。

Fast HashTable 2.5

Update:

Fast HashTable 2.5：增加了 pair。
Fast HashTable 2.4：改进了一些细节。
Fast HashTable 2.3：增加 get 功能。
Fast HashTable 2.2：增加 clear 功能。
Fast HashTable 2.1：增加 count 功能。

简介：

哈希表，常识了吧。

但是我的哈希表要快很多。

而且不会被 CF 之类的卡。

P.S. 并不是说我的 hash 卡不掉，是指的由于人家不知道我的 hash 机制，所以没法构造数据。
实在要是刚好被蒙到卡了，自己改改 hash 函数即可。
本模板仅作为速度板子，保证在一般数据下的快速。

功能/使用：

hs[x]：访问键值（下标）为 x 的值。
- 返回值为一个引用的变量，可以直接修改/读取。
- 只要调用，无论时候修改，都一定会生成一个新的空间。
hs.count(x)：查询键值为 x 的是否被访问过。
- 返回值为一个 bool 变量，表示是否修改过。
- 不会生成新的空间。
hs.get(x)：查询键值为 x 的实际值。
- 返回实际值。
- 不会生成新的空间。
hs.clear()：清空。
- 复杂度为 O(size) 。

版本：

拉链法。
闭散列法/线性探测法。

效率比较：

时间：

当数据在 3e6 左右及以下时，两种方法几乎没有时间区别。
当数据很大时，拉链法似乎会快一些（应该是数组大后内存访问之类的问题）。
在 3e6 的数据下：
- 两种方法用时基本一致。
- gp_hash_table 约为它们的 2.1 倍，cc_hash_table 约为 3.2 倍。
- unordered_map 约为 5 倍，map 约为 25 倍左右。

空间：

闭散列法的空间会比拉链法小不少。
在 3e6 的数据下：
- 拉链法：123MB。
- 闭散列法：85MB。
- gp_hash_table：136MB。
- cc_hash_table：111MB。
- unordered_map：90MB。
- map：119MB。

综上，此上两种哈希表严格强于所有 STL/PB_DS 自带的容器。

实现：

拉链法：

template <typename Key, typename Value, const unsigned int Maxn> class HashTable { // Key, Value 跟 map 一样，Maxn 表示多少个数
private:
    unsigned int head[Maxn<<1], sz, limit;
    struct Edge { unsigned int next; Key key; Value val; } edge[Maxn];
    inline unsigned int hash(const string &s) {
        unsigned int hs = 0;
        for(unsigned int i=0; i<(unsigned int)s.size(); i++)    hs = (hs+s[i])*26;
        return hs&limit;
    }
    inline unsigned int hash(const char &ch) { return ch&limit; }
    template <typename T1, typename T2> inline unsigned int hash(const pair<T1, T2> &x) { return (x.first*x.second+x.first+x.second)&limit; }
    template <typename T> inline unsigned int hash(const T &x) { return x&limit; }
public:
    inline HashTable() {
        limit = 1;
        for(int x=Maxn-1; x; x>>=1, limit<<=1);
        limit--;
    }
    Value& operator [] (Key x) {
        unsigned int hs = hash(x);
        for(unsigned int i=head[hs]; i; i=edge[i].next) {
            if(edge[i].key == x)    return edge[i].val;
        }
        edge[++sz] = {head[hs], x, Value()};
        return head[hs] = sz, edge[sz].val;
    }
    inline bool count(Key x) {
        unsigned int hs = hash(x);
        for(unsigned int i=head[hs]; i; i=edge[i].next) {
            if(edge[i].key == x)    return true;
        }
        return false;
    }
    inline Value get(Key x) {
        unsigned int hs = hash(x);
        for(unsigned int i=head[hs]; i; i=edge[i].next) {
            if(edge[i].key == x)    return edge[i].val;
        }
        return Value();
    }
    inline void clear() {
        memset(head, 0, (limit+1)<<2), sz = 0;
    }
};

闭散列法：

template <typename Key, typename Value, const unsigned int Maxn> class HashTable {
 // Key, Value 跟 map 一样，Maxn 表示多少个数
private:
    Key key[Maxn<<1]; Value value[Maxn<<1]; int limit;
    inline unsigned int hash(const string &s) {
        unsigned int hs = 0;
        for(unsigned int i=0; i<(unsigned int)s.size(); i++)    hs = (hs+s[i])*26;
        return hs&limit;
    }
    inline unsigned int hash(const char &ch) { return ch&limit; }
    template <typename T1, typename T2> inline unsigned int hash(const pair<T1, T2> &x) { return (x.first*x.second+x.first+x.second)&limit; }
    template <typename T> inline unsigned int hash(const T &x) { return x&limit; }
public:
    inline HashTable() {
        limit = 1;
        for(int x=Maxn-1; x; x>>=1, limit<<=1);
        limit--;
    }
    Value& operator [] (Key x) {
        unsigned int hs = hash(x);
        while(key[hs]!=x and value[hs]!=0)  hs = (hs+1)&limit;
        return key[hs] = x, value[hs];
    }
    inline bool count(Key x) {
        unsigned int hs = hash(x);
        while(key[hs]!=x and value[hs]!=0)  hs = (hs+1)&limit;
        return (key[hs] == x);
    }
    inline Value get(Key x) {
        unsigned int hs = hash(x);
        while(key[hs]!=x and value[hs]!=0)  hs = (hs+1)&limit;
        return value[hs];
    }
    inline void clear() {
        memset(key, 0, (limit+1)*sizeof(key[0]));
        memset(value, 0, (limit+1)*sizeof(value[0]));
    }
};

使用案例：防止某些人看不懂

#include<iostream>
#define ll long long
using namespace std;

// 复制一遍就好了，这里以拉链法为例，两种是一样的
template <typename Key, typename Value, const unsigned int Maxn> class HashTable { // Key, Value 跟 map 一样，Maxn 表示多少个数
private:
    unsigned int head[Maxn<<1], sz, limit;
    struct Edge { unsigned int next; Key key; Value val; } edge[Maxn];
    inline unsigned int hash(const string &s) {
        unsigned int hs = 0;
        for(unsigned int i=0; i<(unsigned int)s.size(); i++)    hs = (hs+s[i])*26;
        return hs&limit;
    }
    inline unsigned int hash(const char &ch) { return ch&limit; }
    template <typename T> inline unsigned int hash(const T &x) { return x&limit; }
public:
    inline HashTable() {
        limit = 1;
        for(int x=Maxn-1; x; x>>=1, limit<<=1);
        limit--;
    }
    Value& operator [] (Key x) {
        unsigned int hs = hash(x);
        for(unsigned int i=head[hs]; i; i=edge[i].next) {
            if(edge[i].key == x)    return edge[i].val;
        }
        edge[++sz] = {head[hs], x, Value()};
        return head[hs] = sz, edge[sz].val;
    }
    inline bool count(Key x) {
        unsigned int hs = hash(x);
        for(unsigned int i=head[hs]; i; i=edge[i].next) {
            if(edge[i].key == x)    return true;
        }
        return false;
    }
};

HashTable<ll, int, 100000> hs; // 定义

int main() {
    hs[x] += 5; // 修改 hx[x]
    cout<< hs[x]; // 调用 hs[x]
    if(hs.count(x)) // 查询 x 是否出现
    return 0;
}

结语：

随手写的，希望能帮到大家。

若有问题/bug，欢迎找我。

都看到这里了，点个赞在走吧。