-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathB_huffman-compress.py
62 lines (43 loc) · 1.34 KB
/
B_huffman-compress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import contextlib, sys
import huffmancoding
from os import path
def main():
cwd = path.dirname(path.realpath(__file__))
inputfile = cwd+ '/B Text Folder/Output.txt'
outputfile = cwd + '/A Compressed Folder/NewOutput'
freqs = get_frequencies(inputfile)
freqs.increment(256)
code = freqs.build_code_tree()
canoncode = huffmancoding.CanonicalCode(tree=code, symbollimit=freqs.get_symbol_limit())
code = canoncode.to_code_tree()
with open(inputfile, "rb") as inp, \
contextlib.closing(huffmancoding.BitOutputStream(open(outputfile, "wb"))) as bitout:
write_code_len_table(bitout, canoncode)
compress(code, inp, bitout)
def get_frequencies(filepath):
freqs = huffmancoding.FrequencyTable([0] * 257)
with open(filepath, "rb") as input:
while True:
b = input.read(1)
if len(b) == 0:
break
freqs.increment(b[0])
return freqs
def write_code_len_table(bitout, canoncode):
for i in range(canoncode.get_symbol_limit()):
val = canoncode.get_code_length(i)
if val >= 256:
raise ValueError("The code for a symbol is too long")
for j in reversed(range(8)):
bitout.write((val >> j) & 1)
def compress(code, inp, bitout):
enc = huffmancoding.HuffmanEncoder(bitout)
enc.codetree = code
while True:
b = inp.read(1)
if len(b) == 0:
break
enc.write(b[0])
enc.write(256) # EOF
if __name__ == "__main__":
main()