zz 简单的HashTable

发布于：2014-03-17 18:37:40 作者：兄弟网络点击：次

近期工作中要处理100W条记录,前一个同事使用SQLLite数据作为数据结构存储,采用数据库查询一条记录,时间当然不成问题.后来,我将数据库的数据导出来,发现由原来的150Ｍ多变成20M多，手机APP ，软件开发，而装载数据库存需要２５Ｓ，这种用空间换时间的做法未免牺牲太大.所以我想到不用数据库.而用普通的文件存储,要解决查询一条记录肯定要使用某种数据结构.首先想到的是STL中的hash_set,它采用hash_table作为底层数据结构,查询肯定不成问题,可惜初始化所需时间无法接受,我想可能hash_table的内部表格重建过于复杂,而我所要的并不复杂,于是我想写一个简单的hash_table,只要能进行插入,查询就可以,而且只针对字符串就行了,所以查看一些资料,写了一个简单的Hash_Table.

从底层来看,Hash_Table与stl中的hash_table的区别在于,内存办理不一样,hash_table采用std::alloc,Hash_Table直接使用new.还有一点不同的表格重建的策略不一样.

template
class CMyHashTable
{
public:
typedef const Value* ValPointer;
typedef const Value& ValRef;
struct ValNode
{
Value Data;
DWORD Key;
ValNode* pNext;
};

typedef ValNode* NodePointer;
typedef const ValNode* NodeCPointer;
typedef ValNode& NodeRef;
typedef const ValNode& NodeCRef;
protected:
const int*m_boxSize;
intm_boxCount;
int m_level;
int m_nbox; //桶的大小,初始为1024
int m_limh;
int m_liml;
NodePointer* m_boxes;
int m_size; //元素的问个数
public:
CMyHashTable(int boxCount, const int* boxSize, int initLevel);
~CMyHashTable();
public:
ValPointer find(ValRef value) const
{
ValNodes;
_fill_rec(s, value);
return &_find(s)->Data;
}

bool insert(ValRef value)
{
ValNodes;
_fill_rec(s, value);

if (_find(s) != NULL)
return false;

_insert_rec(s);
return true;
}

void insert_force(ValRef value)
{
ValNodes;
_fill_rec(s, value);
_insert_rec(s);
}

bool remove(ValRef value)
{
ValNodes;
_fill_rec(s, value);

boolrtv = _find_remove(s);
if (rtv)
{
--m_size;
if (m_size < m_liml)
decrease_level();
}

return rtv;
}

intsize() const { return m_size; }
intcapacity() const { return m_limh; }

void for_each(void (*fp)(ValRef));
protected:
inline DWORD_short_key(DWORD key) const
{
return key & (m_nbox - 1);
}
inline static DWORD_short_key(DWORD key, DWORD nbox)
{
return key & (nbox - 1);
}

NodeCPointer_find(NodeCRef) const;
bool_find_remove(NodeCRef);

void _insert_rec(NodeCRef rec)
{
++m_size;

NodePointerpnew = new ValNode;
*pnew = rec;

DWORDshort_key = _short_key(rec.Key);
pnew->pNext = m_boxes[short_key];
m_boxes[short_key] = pnew;

if (m_size >= m_limh)
increase_level();
}

voidswitch_level(int new_level);
voidincrease_level()
{
if (m_level < m_boxCount - 1)
switch_level(m_level + 1);
else
m_limh = 0x20000000;// Unable to increase level
}
voiddecrease_level()
{
if (m_level > 0)
switch_level(m_level - 1);
else
m_liml = 0;// Unable to decrease level
}

static void_fill_rec(NodeRef s, ValRef value)
{
s.Data= value;
s.Key= HashFun()(value);
s.pNext= NULL;
}

};

template
CMyHashTable::CMyHashTable(int boxCount, const int* boxSize, int initLevel)
:m_boxCount(boxCount), m_boxSize(boxSize)
{
m_level = initLevel;

m_limh = m_nbox = m_boxSize[m_level];
m_liml = initLevel > 0 ? m_boxSize[initLevel - 1] / 2 : 0;

m_boxes = new NodePointer[m_nbox];
m_size = 0;
memset(m_boxes, 0, sizeof(NodePointer) * m_nbox);
}
template
CMyHashTable::~CMyHashTable()
{
for (int i = 0; i < m_nbox; ++i)
{
NodePointerp = m_boxes[i];
while (p != NULL)
{
NodePointerq = p->pNext;
delete p;
p = q;
}
}
delete[] m_boxes;
}

template
typename CMyHashTable::NodeCPointer CMyHashTable::_find(NodeCRef r) const
{
DWORDshort_key = _short_key(r.Key);
NodePointerp = m_boxes[short_key];

while (p != NULL)
{
if (p->Key == r.Key)
{
if (EqualKey()(p->Data, r.Data))
return p;
}
p = p->pNext;
}
return NULL;
}
template
bool CMyHashTable::_find_remove(NodeCRef r)
{
DWORDshort_key = _short_key(r.Key);
NodePointer*p = &m_boxes[short_key];

while (*p != NULL)
{
if ((*p)->Key == r.Key)
{
if (EqualKey()((*p)->value, r.value))
{
NodePointerq = *p;
*p = q->next;
deleteq;
return true;
}
}
p = &(*p)->next;
}
return false;
}

template
void CMyHashTable::switch_level(int new_level)
{
intnew_n = m_boxSize[m_level = new_level];
NodePointer*new_boxes = new NodePointer[new_n];
memset(new_boxes, 0, sizeof(NodePointer) * new_n);

for (int i = 0; i < m_nbox; ++i)
{
NodePointerp = m_boxes[i], q;
while (p != NULL)
{
q = p->pNext;
DWORDsht_key = _short_key(p->Key, new_n);
p->pNext = new_boxes[sht_key];
new_boxes[sht_key] = p;

p = q;
}
}

m_limh = m_nbox = new_n;
m_liml = m_level > 0 ? m_boxSize[m_level - 1] / 2 : 0;
delete[] m_boxes;
m_boxes = new_boxes;
}

另外使用这个的两个仿函数

struct EPSTR
{
bool operator()(const string& s1, const string& s2) const
{
return strcmp(s1.c_str(), s2.c_str()) == 0;
}
};

struct HASH_FUN
{
size_t operator()(const string& str) const
{
const char*psz = str.c_str();
unsigned long _hash = 0;
for (; *psz != 0; ++psz)
{
_hash *= 16777619;
_hash ^= (unsigned long) (unsigned short&)*psz;
}
return _hash;
}
};

我使用这个HASH_TABLE装载100W条数据,只要16S,查询一条记录在1MS以内.应该说比使用数据库要好得多.

本文关键词：简单| HashTable|

信息中心

zz 简单的HashTable

[相关阅读]

服务项目

关于兄弟

服务支持

我们介绍

QQ咨询

电话咨询

新版调查