Skip to content

Commit 5bd5809

Browse files
authored
Merge pull request #349 from runtimeerror11/Huffman_Coding
Huffman Algorithm Implementation in C++.
2 parents a8b12d9 + 6abb50a commit 5bd5809

File tree

7 files changed

+11839
-0
lines changed

7 files changed

+11839
-0
lines changed
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
#include "Huffman.hpp"
2+
3+
void Huffman::createArr()
4+
{
5+
for (int i = 0; i < 128; i++)
6+
{
7+
node *a = new node();
8+
a->c = i;
9+
a->freq = 0;
10+
vec.push_back(a);
11+
}
12+
}
13+
14+
// Traversing the constructed tree to generate huffman codes of each present character
15+
void Huffman::traverse(node *n, string s)
16+
{
17+
if (n->left == NULL && n->right == NULL)
18+
{
19+
n->code = s;
20+
return;
21+
}
22+
if (n->left != NULL)
23+
{
24+
traverse(n->left, s + "0");
25+
}
26+
if (n->right != NULL)
27+
{
28+
traverse(n->right, s + '1');
29+
}
30+
}
31+
32+
// Function to convert binary string to its equivalent decimal value
33+
int Huffman::binToDec(string s)
34+
{
35+
int ans = 0;
36+
for (auto i : s)
37+
{
38+
ans = ans * 2 + i - '0';
39+
}
40+
return ans;
41+
}
42+
43+
// Creating Min Heap of Nodes by frequency of characters in the input file
44+
void Huffman::createMinHeap()
45+
{
46+
char c;
47+
istream.open(inFileName, ios::in);
48+
49+
while (istream.get(c))
50+
{
51+
vec[c]->freq++;
52+
}
53+
istream.close();
54+
for (auto i : vec)
55+
{
56+
if (i->freq > 0)
57+
{
58+
pq.push(i);
59+
}
60+
}
61+
}
62+
63+
// Constructing the Huffman tree
64+
void Huffman::createTree()
65+
{
66+
node *l = NULL, *r = NULL;
67+
priority_queue<node *, vector<node *>, Comp> temp(pq);
68+
69+
while (temp.size() != 1)
70+
{
71+
l = temp.top();
72+
temp.pop();
73+
74+
r = temp.top();
75+
temp.pop();
76+
root = new node();
77+
root->freq = l->freq + r->freq;
78+
root->left = l;
79+
root->right = r;
80+
81+
temp.push(root);
82+
}
83+
// here at last ,only one element will be left in the temp,which will be root of huffman tree
84+
// and stored in root variable of Huffman class
85+
priority_queue<node *, vector<node *>, Comp> temp1(pq);
86+
node *u = NULL;
87+
while (!temp1.empty())
88+
{
89+
temp1.pop();
90+
}
91+
}
92+
93+
// Generating Huffman codes(just to initialise the traverse func)
94+
void Huffman::createCodes()
95+
{
96+
traverse(root, "");
97+
}
98+
99+
// Saving Huffman Encoded File
100+
void Huffman::saveEncodedFile()
101+
{
102+
istream.open(inFileName, ios::in);
103+
ostream.open(outFileName, ios::out | ios::binary);
104+
string in = "";
105+
string s = "";
106+
char id;
107+
108+
// Saving the meta data (huffman tree)
109+
in += (char)pq.size();
110+
111+
priority_queue<node *, vector<node *>, Comp> tempPQ(pq);
112+
while (!tempPQ.empty())
113+
{
114+
node *curr = tempPQ.top();
115+
in += curr->c;
116+
// Saving 16 decimal values representing code of curr->data
117+
s.assign(127 - curr->code.length(), '0');
118+
s += '1';
119+
s += curr->code;
120+
// Saving decimal values of every 8-bit binary code
121+
in += (char)binToDec(s.substr(0, 8));
122+
for (int i = 0; i < 15; i++)
123+
{
124+
s = s.substr(8);
125+
in += (char)binToDec(s.substr(0, 8));
126+
}
127+
tempPQ.pop();
128+
}
129+
s.clear();
130+
131+
// Saving codes of every charachter appearing in the input file
132+
istream.get(id);
133+
while (!istream.eof())
134+
{
135+
s += vec[id]->code;
136+
// Saving decimal values of every 8-bit binary code
137+
while (s.length() > 8)
138+
{
139+
in += (char)binToDec(s.substr(0, 8));
140+
s = s.substr(8);
141+
}
142+
istream.get(id);
143+
}
144+
145+
// Finally if bits remaining are less than 8, append 0's
146+
int count = 8 - s.length();
147+
if (s.length() < 8)
148+
{
149+
s.append(count, '0');
150+
}
151+
in += (char)binToDec(s);
152+
// append count of appended 0's
153+
in += (char)count;
154+
155+
// write the in string to the output file
156+
ostream.write(in.c_str(), in.size());
157+
istream.close();
158+
ostream.close();
159+
}
160+
161+
// Function to convert a decimal number to its equivalent binary string
162+
string Huffman::decToBin(int inNum)
163+
{
164+
string temp = "", res = "";
165+
while (inNum > 0)
166+
{
167+
temp += (inNum % 2 + '0');
168+
inNum /= 2;
169+
}
170+
res.append(8 - temp.length(), '0');
171+
for (int i = temp.length() - 1; i >= 0; i--)
172+
{
173+
res += temp[i];
174+
}
175+
return res;
176+
}
177+
178+
// Reconstructing the Huffman tree while Decoding the file
179+
void Huffman::buildTree(char a_code, string &path)
180+
{
181+
node *curr = root;
182+
183+
for (int i = 0; i < path.length(); i++)
184+
{
185+
if (path[i] == '0')
186+
{
187+
if (curr->left == NULL)
188+
{
189+
curr->left = new node();
190+
}
191+
curr = curr->left;
192+
}
193+
else if (path[i] == '1')
194+
{
195+
if (curr->right == NULL)
196+
{
197+
curr->right = new node();
198+
}
199+
curr = curr->right;
200+
}
201+
}
202+
curr->c = a_code;
203+
}
204+
// Saving Decoded File to obtain the original File
205+
void Huffman::saveDecodedFile()
206+
{
207+
istream.open(inFileName, ios::in | ios::binary);
208+
ostream.open(outFileName, ios::out);
209+
unsigned char size;
210+
istream.read(reinterpret_cast<char *>(&size), 1);
211+
// Reading count at the end of the file which is number of bits appended to make final value 8-bit
212+
istream.seekg(-1, ios::end);
213+
char count0;
214+
istream.read(&count0, 1);
215+
// Ignoring the meta data (huffman tree) (1 + 17 * size) and reading remaining file
216+
istream.seekg(1 + 17 * size, ios::beg);
217+
218+
vector<unsigned char> text;
219+
unsigned char textseg;
220+
istream.read(reinterpret_cast<char *>(&textseg), 1);
221+
while (!istream.eof())
222+
{
223+
text.push_back(textseg);
224+
istream.read(reinterpret_cast<char *>(&textseg), 1);
225+
}
226+
227+
node *curr = root;
228+
string path;
229+
for (int i = 0; i < text.size() - 1; i++)
230+
{
231+
// Converting decimal number to its equivalent 8-bit binary code
232+
path = decToBin(text[i]);
233+
if (i == text.size() - 2)
234+
{
235+
path = path.substr(0, 8 - count0);
236+
}
237+
// Traversing huffman tree and appending resultant data to the file
238+
for (int j = 0; j < path.size(); j++)
239+
{
240+
if (path[j] == '0')
241+
{
242+
curr = curr->left;
243+
}
244+
else
245+
{
246+
curr = curr->right;
247+
}
248+
249+
if (curr->left == NULL && curr->right == NULL)
250+
{
251+
ostream.put(curr->c);
252+
curr = root;
253+
}
254+
}
255+
}
256+
istream.close();
257+
ostream.close();
258+
}
259+
260+
void Huffman::compress()
261+
{
262+
createMinHeap();
263+
createTree();
264+
createCodes();
265+
saveEncodedFile();
266+
}
267+
268+
// Reading the file to reconstruct the Huffman tree
269+
void Huffman::getTree()
270+
{
271+
istream.open(inFileName, ios::in | ios::binary);
272+
// Reading size of MinHeap
273+
unsigned char size;
274+
istream.read(reinterpret_cast<char *>(&size), 1);
275+
root = new node();
276+
// next size * (1 + 16) characters contain (char)data and (string)code[in decimal]
277+
for (int i = 0; i < size; i++)
278+
{
279+
char aCode;
280+
unsigned char hCodeC[16];
281+
istream.read(&aCode, 1);
282+
istream.read(reinterpret_cast<char *>(hCodeC), 16);
283+
// converting decimal characters into their binary equivalent to obtain code
284+
string hCodeStr = "";
285+
for (int i = 0; i < 16; i++)
286+
{
287+
hCodeStr += decToBin(hCodeC[i]);
288+
}
289+
// Removing padding by ignoring first (127 - curr->code.length()) '0's and next '1' character
290+
int j = 0;
291+
while (hCodeStr[j] == '0')
292+
{
293+
j++;
294+
}
295+
hCodeStr = hCodeStr.substr(j + 1);
296+
// Adding node with aCode data and hCodeStr string to the huffman tree
297+
buildTree(aCode, hCodeStr);
298+
}
299+
istream.close();
300+
}
301+
void Huffman::decompress()
302+
{
303+
getTree();
304+
saveDecodedFile();
305+
}
306+
307+
int main(int argc, char *argv[])
308+
{
309+
if (argc != 3)
310+
{
311+
cout << "File not detected! ";
312+
return 0;
313+
}
314+
cout << argv[1] << endl
315+
<< argv[2] << endl;
316+
Huffman h(argv[1], argv[2]);
317+
int i{};
318+
cout << "Select the option 1.Compress 2.Decompress\n";
319+
cin >> i;
320+
if (i == 1)
321+
{
322+
h.compress();
323+
cout << "Compressed Successfully!";
324+
}
325+
326+
if (i == 2)
327+
{
328+
h.decompress();
329+
cout << "Decompressed Successfully" << endl;
330+
}
331+
return 0;
332+
}

0 commit comments

Comments
 (0)