这是我的第一篇博客,希望大神们批评指正。

首先介绍以下什么是哈夫曼树(来自百度百科)

哈夫曼树─即最优二叉树,带权路径长度最小的二叉树,经常应用于数据压缩。 在计算机信息处理中,“哈夫曼编码”是一种一致性编码法(又称“熵编码法”),用于数据的无损耗压缩。这一术语是指使用一张特殊的编码表将源字符(例如某文件中的一个符号)进行编码。这张编码表的特殊之处在于,它是根据每一个源字符出现的估算概率而建立起来的(出现概率高的字符使用较短的编码,反之出现概率低的则使用较长的编码,这便使编码之后的字符串的平均期望长度降低,从而达到无损压缩数据的目的)。

构造哈夫曼树的主要思想:

构造哈夫曼树非常简单,将所有的节点放到一个队列中,用一个节点替换两个频率最低的节点,新节点的频率就是这两个节点的频率之和。这样,新节点就是两个被替换节点的父节点了。如此循环,直到队列中只剩一个节点(树根)。

这里用到了最小优先队列。

我这里用STL来实现。(这里有优先队列的介绍)

  1. template<typename T>
  2. struct cmp
  3. {
  4. bool operator()(TreeNode<T>* t1, TreeNode<T>* t2)
  5. {
  6. return !(*t1 < *t2);
  7. }
  8. };

优先队列的定义:

  1. priority_queue<TreeNode*,vector<TreeNode* >,cmp > pri_que;

哈夫曼树节点结构

  1. template<typename T>
  2. class TreeNode
  3. {
  4. public:
  5. TreeNode():pfather(NULL),plchild(NULL),prchild(NULL)
  6. {
  7. }
  8. T data;
  9. TreeNode *pfather;
  10. TreeNode *plchild;
  11. TreeNode *prchild;
  12. bool operator < (const TreeNode& rhs)
  13. {
  14. return data < rhs.data;
  15. }
  16. };

构造哈夫曼树

每次从最小优先队列取头两个节点,合并后放回最小优先队列,如此重复。

  1. template<typename T>
  2. TreeNode<T>* MakeHuffmanTree(T* begin, T* end) //构造哈夫曼树
  3. {
  4. priority_queue<TreeNode<T>*,vector<TreeNode<T>* >,cmp<T> > pri_que;
  5. T *iter = begin;
  6. TreeNode<T>* pNode;
  7. TreeNode<T>* pf = NULL;
  8. while(iter != end)
  9. {
  10. pNode = new TreeNode<T>;
  11. pNode->data = *iter++;
  12. pNode->pfather = pf;
  13. pri_que.push(pNode);
  14. }
  15. TreeNode<T>* plchild;
  16. TreeNode<T>* prchild;
  17. while(pri_que.size() > 1)//取两个小的合并
  18. {
  19. plchild = pri_que.top();
  20. pri_que.pop();
  21. prchild = pri_que.top();
  22. pri_que.pop();
  23. pNode = new TreeNode<T>;
  24. pNode->plchild = plchild;
  25. pNode->prchild = prchild;
  26. pNode->data = plchild->data + prchild->data;
  27. pri_que.push(pNode);
  28. }
  29. pNode = pri_que.top();
  30. pri_que.pop();
  31. return pNode;
  32. }

构造哈夫曼树这个函数的参数是一个结构体,保存着对应字符,其频率,编码值。

重载它的+运算符,为了合并时的+运算(只是频率相加)。

 


到此为止,已经可以把哈夫曼树生成出来了。

  1. template<typename T>
  2. struct mydata
  3. {
  4. mydata(){}
  5. mydata(int i):freq(i)
  6. {
  7. }
  8. string coded;
  9. int freq;
  10. T data;
  11. bool operator<(const mydata& rhs)
  12. {
  13. return freq < rhs.freq;
  14. }
  15. mydata operator+(mydata& rhs)
  16. {
  17. return mydata(freq + rhs.freq);
  18. }
  19. };

我们可以通过DFS将每个叶子节点的路径记录下来(用一个全局变量数组path),然后得到它的编码。

当发现当前节点是叶子节点,就把当前的路径赋值至该叶子节点的编码属性(coded)。

  1. const int MAXLEN = 20;
  2. char path[MAXLEN] = {0};
  3. template<typename T>
  4. void DFS(T* root,int deep = -1, char a = \'-\') //DFS 得到叶子节点的编码
  5. {
  6. if(root == NULL)
  7. return;
  8. if(a != \'-\')
  9. path[deep] = a;
  10. if(root->plchild == NULL || root->prchild == NULL)//leaf
  11. (root->data).coded = string(path,path + deep + 1);
  12. if(root->plchild != NULL)
  13. DFS(root->plchild, deep + 1, \'0\');
  14. if(root->prchild != NULL)
  15. DFS(root->prchild, deep + 1, \'1\');
  16. }

这样整个哈夫曼编码工作已经完成,为了查看我们的编码结果,我们可以用BFS跟DFS来看到我们的结果。在这里我选择了BFS。

当遍历到叶子节点,就将其编码属性(coded)和其对应字符输出。

  1. template<typename T,typename U>
  2. void BFS(T* root, mydata<U>* data) //BFS 将叶子节点的编码,提到data指向的数据
  3. {
  4. queue<T*> que;
  5. que.push(root);
  6. T* pT = NULL;
  7. while(!que.empty())
  8. {
  9. pT = que.front();
  10. //cout<<pT->data.freq<<endl;
  11. que.pop();
  12. if(pT->plchild != NULL)
  13. que.push(pT->plchild);
  14. if(pT->prchild != NULL)
  15. que.push(pT->prchild);
  16. if(pT->plchild == NULL || pT->prchild == NULL)// leaf 提取叶子节点的编码
  17. {
  18. //cout<<(pT->data).data<<":"<<(pT->data).coded<<endl;
  19. mydata<U>* pd = data;
  20. while((pT->data).data != pd->data)
  21. pd++;
  22. assert(pd->data == (pT->data).data);
  23. pd->coded = (pT->data).coded;
  24. }
  25. }
  26. }

测试驱动代码

  1. mydata<char> *pdata = new mydata<char>[4];
  2. pdata[0].data = \'a\';
  3. pdata[0].freq = 7;
  4. pdata[1].data = \'b\';
  5. pdata[1].freq = 5;
  6. pdata[2].data = \'c\';
  7. pdata[2].freq = 2;
  8. pdata[3].data = \'d\';
  9. pdata[3].freq = 4;
  10. TreeNode<mydata<char> >* pihuffmanTree = MakeHuffmanTree(pdata, pdata + 4);
  11. DFS(pihuffmanTree);
  12. BFS(pihuffmanTree);

为了更方便的使用我将这些封装到一个类里面。

  1. template<typename T>
  2. class Huffman
  3. {
  4. public:
  5. void Coded(string& coded);//传入待输出的编码
  6. void DeCode(const string& codedstr,string& decodestr);//输入待解码字符串,输出解码字符串
  7. void InputData(T* begin,T* end);//传入数据
  8.  
  9. private:
  10. string FindVal(char c);
  11. void m_CalcFreq(T* begin, T* end);//计算输入数据的频率
  12. TreeNode<mydata<T> > *root;//huffman根节点
  13. mydata<T>* data;
  14. int data_size;
  15. T* m_begin;//保存原始数据的开始与结束的位置
  16. T* m_end;
  17. //string codedstr;
  18. };

输入数据并计算其频率。

用map容器来统计输入字符每个出现的个数。

  1. template<typename T>
  2. void Huffman<T>::InputData(T* begin, T* end)
  3. {
  4. this->m_begin = begin;
  5. this->m_end = end;
  6. m_CalcFreq(begin, end);
  7. }
  8. template<typename T>
  9. void Huffman<T>::m_CalcFreq(T* begin, T* end)
  10. {
  11. int len = end - begin;
  12. //data_size = len;
  13. if(len == 0)
  14. return;
  15. map<T,int> countMap;
  16. map<T,int>::iterator mapIter = countMap.begin();
  17. T *pT = begin;
  18. while(pT != end)
  19. {
  20. mapIter = countMap.find(*pT);
  21. if(mapIter != countMap.end())//在map里有没有字符*iter
  22. ++mapIter->second;
  23. else
  24. {
  25. countMap.insert(make_pair(*pT,1));
  26. }
  27. pT++;
  28. }
  29. data_size = countMap.size();
  30. data = new mydata<T>[data_size];
  31. int i = 0;
  32. for (mapIter = countMap.begin(); mapIter != countMap.end(); ++mapIter)
  33. {
  34. data[i].data = mapIter->first;
  35. data[i].freq = mapIter->second;
  36. i++;
  37. }
  38. }

编码

  1. template<typename T>
  2. void Huffman<T>::Coded(string& coded)
  3. {
  4. root = MakeHuffmanTree(data,data + data_size);
  5. DFS(root);
  6. BFS(root,data);
  7. cout<<"code:"<<endl;
  8. for (int i = 0; i < data_size; ++i)
  9. {
  10. cout<<data[i].data<<":"<<data[i].coded<<endl;
  11. }
  12. T *begin = m_begin;
  13. while (begin != m_end)
  14. {
  15. coded += FindVal(*begin);
  16. begin++;
  17. }
  18. //string subcode =
  19. }

 

解码

  1. template<typename T>
  2. void Huffman<T>::DeCode(const string& codedstr,string& decodestr)
  3. {
  4. string::const_iterator iter = codedstr.begin();
  5. TreeNode<mydata<T> >* curNode = root;
  6. while (iter != codedstr.end())
  7. {
  8. if (curNode->plchild == NULL || curNode->prchild == NULL)
  9. {
  10. decodestr += (curNode->data).data;
  11. curNode = root;
  12. continue;
  13. }
  14. if (*iter == \'0\')
  15. curNode = curNode->plchild;
  16. if(*iter == \'1\')
  17. curNode = curNode->prchild;
  18. iter++;
  19. }
  20. }

测试驱动程序

  1. char *pmystr = "cbcddddbbbbaaaaaaa";
  2. Huffman<char> h;
  3. h.InputData(pmystr, pmystr + 18);
  4. cout<<"originstr: "<<pmystr<<endl;
  5. string coded;
  6. h.Coded(coded);
  7. cout<<"coded: "<<coded<<endl;
  8. string decode;
  9. h.DeCode(coded,decode);
  10. cout<<"decode: "<<decode<<endl;

完整程序(环境:VS2012)

  1. #include <iostream>
  2. //#include <algorithm>
  3. #include <queue>
  4. #include <string>
  5. #include <vector>
  6. #include <cassert>
  7. #include <map>
  8. using namespace std;
  9. template<typename T>
  10. class TreeNode
  11. {
  12. public:
  13. TreeNode():pfather(NULL),plchild(NULL),prchild(NULL)
  14. {
  15. }
  16. T data;
  17. TreeNode *pfather;
  18. TreeNode *plchild;
  19. TreeNode *prchild;
  20. bool operator < (const TreeNode& rhs)
  21. {
  22. return data < rhs.data;
  23. }
  24. };
  25. template<typename T>
  26. struct cmp
  27. {
  28. bool operator()(TreeNode<T>* t1, TreeNode<T>* t2)
  29. {
  30. return !(*t1 < *t2);
  31. }
  32. };
  33. template<typename T>
  34. TreeNode<T>* MakeHuffmanTree(T* begin, T* end) //构造哈夫曼树
  35. {
  36. priority_queue<TreeNode<T>*,vector<TreeNode<T>* >,cmp<T> > pri_que;
  37. T *iter = begin;
  38. TreeNode<T>* pNode;
  39. TreeNode<T>* pf = NULL;
  40. while(iter != end)
  41. {
  42. pNode = new TreeNode<T>;
  43. pNode->data = *iter++;
  44. pNode->pfather = pf;
  45. pri_que.push(pNode);
  46. }
  47. TreeNode<T>* plchild;
  48. TreeNode<T>* prchild;
  49. while(pri_que.size() > 1)//取两个小的合并
  50. {
  51. //cout<<static_cast<TreeNode<T>* >(pri_que.top())->data<<endl;
  52. //pri_que.pop();
  53. plchild = pri_que.top();
  54. pri_que.pop();
  55. prchild = pri_que.top();
  56. pri_que.pop();
  57. pNode = new TreeNode<T>;
  58. pNode->plchild = plchild;
  59. pNode->prchild = prchild;
  60. pNode->data = plchild->data + prchild->data;
  61. pri_que.push(pNode);
  62. }
  63. pNode = pri_que.top();
  64. pri_que.pop();
  65. return pNode;
  66. }
  67. template<typename T>
  68. struct mydata
  69. {
  70. mydata(){}
  71. mydata(int i):freq(i)
  72. {
  73. }
  74. string coded;
  75. int freq;
  76. T data;
  77. bool operator<(const mydata& rhs)
  78. {
  79. return freq < rhs.freq;
  80. }
  81. mydata operator+(mydata& rhs)
  82. {
  83. return mydata(freq + rhs.freq);
  84. }
  85. };
  86. template<typename T,typename U>
  87. void BFS(T* root, mydata<U>* data) //BFS 将叶子节点的编码,提到data指向的数据
  88. {
  89. queue<T*> que;
  90. que.push(root);
  91. T* pT = NULL;
  92. while(!que.empty())
  93. {
  94. pT = que.front();
  95. //cout<<pT->data.freq<<endl;
  96. que.pop();
  97. if(pT->plchild != NULL)
  98. que.push(pT->plchild);
  99. if(pT->prchild != NULL)
  100. que.push(pT->prchild);
  101. if(pT->plchild == NULL || pT->prchild == NULL)// leaf 提取叶子节点的编码
  102. {
  103. //cout<<(pT->data).data<<":"<<(pT->data).coded<<endl;
  104. mydata<U>* pd = data;
  105. while((pT->data).data != pd->data)
  106. pd++;
  107. assert(pd->data == (pT->data).data);
  108. pd->coded = (pT->data).coded;
  109. }
  110. }
  111. }
  112. const int MAXLEN = 20;
  113. char path[MAXLEN] = {0};
  114. template<typename T>
  115. void DFS(T* root,int deep = -1, char a = \'-\') //DFS 得到叶子节点的编码
  116. {
  117. if(root == NULL)
  118. return;
  119. if(a != \'-\')
  120. path[deep] = a;
  121. if(root->plchild == NULL || root->prchild == NULL)//leaf
  122. (root->data).coded = string(path,path + deep + 1);
  123. if(root->plchild != NULL)
  124. DFS(root->plchild, deep + 1, \'0\');
  125. if(root->prchild != NULL)
  126. DFS(root->prchild, deep + 1, \'1\');
  127. }
  128. template<typename T>
  129. class Huffman
  130. {
  131. public:
  132. void Coded(string& coded);
  133. void DeCode(const string& codedstr,string& decodestr);
  134. void InputData(T* begin,T* end);
  135. private:
  136. string FindVal(char c);
  137. void m_CalcFreq(T* begin, T* end);
  138. TreeNode<mydata<T> > *root;
  139. mydata<T>* data;
  140. int data_size;
  141. T* m_begin;
  142. T* m_end;
  143. //string codedstr;
  144. };
  145. template<typename T>
  146. void Huffman<T>::InputData(T* begin, T* end)
  147. {
  148. this->m_begin = begin;
  149. this->m_end = end;
  150. m_CalcFreq(begin, end);
  151. }
  152. template<typename T>
  153. void Huffman<T>::m_CalcFreq(T* begin, T* end)
  154. {
  155. int len = end - begin;
  156. //data_size = len;
  157. if(len == 0)
  158. return;
  159. map<T,int> countMap;
  160. map<T,int>::iterator mapIter = countMap.begin();
  161. T *pT = begin;
  162. while(pT != end)
  163. {
  164. mapIter = countMap.find(*pT);
  165. if(mapIter != countMap.end())
  166. ++mapIter->second;
  167. else
  168. {
  169. countMap.insert(make_pair(*pT,1));
  170. }
  171. pT++;
  172. }
  173. data_size = countMap.size();
  174. data = new mydata<T>[data_size];
  175. int i = 0;
  176. for (mapIter = countMap.begin(); mapIter != countMap.end(); ++mapIter)
  177. {
  178. data[i].data = mapIter->first;
  179. data[i].freq = mapIter->second;
  180. i++;
  181. }
  182. }
  183. template<typename T>
  184. void Huffman<T>::Coded(string& coded)
  185. {
  186. root = MakeHuffmanTree(data,data + data_size);
  187. DFS(root);
  188. BFS(root,data);
  189. cout<<"code:"<<endl;
  190. for (int i = 0; i < data_size; ++i)
  191. {
  192. cout<<data[i].data<<":"<<data[i].coded<<endl;
  193. }
  194. T *begin = m_begin;
  195. while (begin != m_end)
  196. {
  197. coded += FindVal(*begin);
  198. begin++;
  199. }
  200. //string subcode =
  201. }
  202. template<typename T>
  203. void Huffman<T>::DeCode(const string& codedstr,string& decodestr)
  204. {
  205. string::const_iterator iter = codedstr.begin();
  206. TreeNode<mydata<T> >* curNode = root;
  207. while (iter != codedstr.end())
  208. {
  209. if (curNode->plchild == NULL || curNode->prchild == NULL)
  210. {
  211. decodestr += (curNode->data).data;
  212. curNode = root;
  213. continue;
  214. }
  215. if (*iter == \'0\')
  216. curNode = curNode->plchild;
  217. if(*iter == \'1\')
  218. curNode = curNode->prchild;
  219. iter++;
  220. }
  221. }
  222. template<typename T>
  223. string Huffman<T>::FindVal(char c)
  224. {
  225. for (int i = 0; i < data_size ; ++i)
  226. {
  227. if (c != data[i].data)
  228. continue;
  229. return data[i].coded;
  230. }
  231. return string();
  232. }
  233. int main()
  234. {
  235. //mydata<char> *pdata = new mydata<char>[4];
  236. //pdata[0].data = \'a\';
  237. //pdata[0].freq = 7;
  238. //pdata[1].data = \'b\';
  239. //pdata[1].freq = 5;
  240. //pdata[2].data = \'c\';
  241. //pdata[2].freq = 2;
  242. //pdata[3].data = \'d\';
  243. //pdata[3].freq = 4;
  244. ////int a[12]={14,10,56,7,83,22,36,91,3,47,72,0};
  245. //TreeNode<mydata<char> >* pihuffmanTree = MakeHuffmanTree(pdata, pdata + 4);
  246. //DFS(pihuffmanTree);
  247. //BFS(pihuffmanTree);
  248. //string str = "cbcddddbbbbaaaaaaa";
  249. char *pmystr = "cbcddddbbbbaaaaaaa";
  250. Huffman<char> h;
  251. h.InputData(pmystr, pmystr + 18);
  252. cout<<"originstr: "<<pmystr<<endl;
  253. string coded;
  254. h.Coded(coded);
  255. cout<<"coded: "<<coded<<endl;
  256. string decode;
  257. h.DeCode(coded,decode);
  258. cout<<"decode: "<<decode<<endl;
  259. return 0;
  260. }

 

版权声明:本文为Honkee原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/Honkee/p/3780517.html