-
-
Notifications
You must be signed in to change notification settings - Fork 359
Add Huffman in C#. #82
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
85424b9
d91ea9e
5d5089a
664e3f9
233962f
ff95d09
2f79bb5
d2000db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
// submitted by Julian Schacher (jspp) | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
|
||
namespace HuffmanCoding | ||
{ | ||
public class EncodeResult | ||
{ | ||
public List<bool> BitString { get; set; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't much more efficient than a regular There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will go for the BitArray then. I don't think a string is fitting in this case, since no real compression would be achieved. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The point isn't compression, it's showcasing thr algorithm. And there was no compression achieved with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean since we usually save whole bytes anyway? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strings in C# are UTF16 encoded which is 2 bytes. If you save the bitstring as a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still think just having this be a |
||
public Dictionary<char, List<bool>> Dictionary { get; set; } | ||
public HuffmanCoding.Node Tree { get; set; } | ||
|
||
public EncodeResult(List<bool> bitString, Dictionary<char, List<bool>> dictionary, HuffmanCoding.Node tree) | ||
{ | ||
this.BitString = bitString; | ||
this.Dictionary = dictionary; | ||
this.Tree = tree; | ||
} | ||
} | ||
|
||
public static class HuffmanCoding | ||
{ | ||
// The Node class used for the Huffman Tree. | ||
public class Node | ||
{ | ||
public Node[] Children { get; set; } = new Node[2]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just have two children, |
||
public List<bool> BitString { get; set; } = new List<bool>(); | ||
public int Weight { get; set; } | ||
public string Key { get; set; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the key? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The key of a node is string, so that a parent node's/branch's key is a combination of all it's children's keys. |
||
|
||
public Node(string key, int weight) | ||
{ | ||
this.Key = key; | ||
this.Weight = weight; | ||
} | ||
} | ||
|
||
// Node with biggest value at the top. | ||
class NodePriorityList | ||
{ | ||
public List<Node> Nodes { get; private set; } = new List<Node>(); | ||
|
||
public NodePriorityList() { } | ||
public NodePriorityList(List<Node> nodes) => Nodes = nodes.OrderByDescending(n => n.Weight).ToList(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have public NodePriorityList(List<Node> nodes)
{
Nodes = nodes.ToList();
Nodes.Sort();
} |
||
|
||
public void AddNode(Node newNode) | ||
{ | ||
if (Nodes.Count == 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just have var insertAt = Math.Max(Nodes.BinarySearch(newNode), 0);
Nodes.Insert(insertAt, newNode) |
||
{ | ||
Nodes.Add(newNode); | ||
return; | ||
} | ||
for (int i = Nodes.Count - 1; i >= 0; i--) | ||
{ | ||
if (Nodes[i].Weight > newNode.Weight) | ||
{ | ||
Nodes.Insert(i + 1, newNode); | ||
return; | ||
} | ||
else if (i == 0) | ||
Nodes.Insert(0, newNode); | ||
} | ||
} | ||
} | ||
|
||
public static EncodeResult Encode(string input) | ||
{ | ||
var root = CreateTree(input); | ||
var dictionary = CreateDictionary(root); | ||
var bitString = CreateBitString(input, dictionary); | ||
|
||
return new EncodeResult(bitString, dictionary, root); | ||
} | ||
|
||
public static string Decode(EncodeResult result) | ||
{ | ||
var output = ""; | ||
Node currentNode = result.Tree; | ||
foreach (var boolean in result.BitString) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should probably name this |
||
{ | ||
// Go down the tree. | ||
if (!boolean) | ||
currentNode = currentNode.Children[0]; | ||
else | ||
currentNode = currentNode.Children[1]; | ||
|
||
// Check if it's a leaf node. | ||
if (currentNode.Key.Count() == 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just check if all of the children are null. That's how you determine the leaves in a binary tree There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should really not check for leaf nodes this way. It works when the alphabet of whatever you want to encode consists of single characters. You can also run Huffman coding on words, in which case this would faily miserably. The nice solution is to add something like this to public IsLeaf => LeftChild == null && RightChild == null;
// Then you can just do
if (currentNode.IsLeaf)
{
// ...
} |
||
{ | ||
output += currentNode.Key; | ||
currentNode = result.Tree; | ||
} | ||
} | ||
return output; | ||
} | ||
|
||
private static Node CreateTree(string input) | ||
{ | ||
// Create a List of all characters and their count in input by putting them into nodes. | ||
var nodes = new List<Node>(); | ||
foreach (var character in input) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a nicer way to do this. Pseudo-ish code with a toy example: var input = "Hello, World";
var nodes = input
.GroupBy(c => c)
.Select(n => new Node { Key = n.Key, Weight = n.Count() })
.ToList(); |
||
{ | ||
var result = nodes.Where(n => n.Key[0] == character).SingleOrDefault(); | ||
|
||
if (result == null) | ||
nodes.Add(new Node(character.ToString(), 1)); | ||
else | ||
result.Weight++; | ||
} | ||
// Convert list of nodes to a NodePriorityList. | ||
var nodePriorityList = new NodePriorityList(nodes); | ||
nodes = nodePriorityList.Nodes; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't do this, just use the queue. In fact, just make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the |
||
|
||
// Create Tree. | ||
while (nodes.Count > 1) | ||
{ | ||
var parentNode = new Node("", 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should have a default constructor so you don't need to pass dummy parameters. I would actually have something like: public class Node
{
public static Node CreateLeaf(char key, int weight) { return new Node(...); }
public static Node CreateBranch(Node left, Node right) { return new Node(...); }
private Node(...) { /* regular Node constructor with every parameter */ }
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could also create two constructors for the |
||
// Add the two nodes with the smallest weight to the parent node and remove them from the tree. | ||
for (int i = 0; i < 2; i++) | ||
{ | ||
parentNode.Children[i] = nodes.Last(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make a nice method for this on the priority queue (and don't use the regular list at this point). Something like: var left = nodeQueue.Pop();
var right = nodeQueue.Pop();
nodeQueue.AddNode(Node.CreateBranch(left, right)); // the weight will be calculated in `CreateBranch` as `left.Weight + right.Weight` There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What are the improvements with using a queue? If I use it |
||
parentNode.Key += nodes.Last().Key; | ||
parentNode.Weight += nodes.Last().Weight; | ||
|
||
nodes.RemoveAt(nodes.Count - 1); | ||
}; | ||
nodePriorityList.AddNode(parentNode); | ||
if (parentNode.Weight > 100) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't be here, the weights are not percentages There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's left from debugging and I forgot to remove it :S |
||
throw new Exception(); | ||
} | ||
|
||
return nodePriorityList.Nodes[0]; | ||
} | ||
|
||
private static Dictionary<char, List<bool>> CreateDictionary(Node root) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know I'm usually a big proponent for performance, but seriously, just use the recursive method here. With C# 7's local functions you can even make a nice, local function for the recursive part There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I'll take a look, but I found the non-recursive version easy to understand. Didn't tried the recursive one tho. |
||
{ | ||
var dictionary = new Dictionary<char, List<bool>>(); | ||
|
||
var stack = new Stack<Node>(); | ||
stack.Push(root); | ||
Node temp; | ||
|
||
while (stack.Count != 0) | ||
{ | ||
temp = stack.Pop(); | ||
|
||
if (temp.Key.Count() == 1) | ||
dictionary.Add(temp.Key[0], temp.BitString); | ||
else | ||
{ | ||
for (int i = 0; i < temp.Children.Count(); i++) | ||
{ | ||
if (temp.Children[i] != null) | ||
{ | ||
if (i == 0) | ||
{ | ||
temp.Children[i].BitString.AddRange(temp.BitString); | ||
temp.Children[i].BitString.Add(false); | ||
} | ||
else | ||
{ | ||
temp.Children[i].BitString.AddRange(temp.BitString); | ||
temp.Children[i].BitString.Add(true); | ||
} | ||
|
||
stack.Push(temp.Children[i]); | ||
} | ||
} | ||
} | ||
} | ||
|
||
return dictionary; | ||
} | ||
|
||
private static List<bool> CreateBitString(string input, Dictionary<char, List<bool>> dictionary) | ||
{ | ||
var bitString = new List<bool>(); | ||
foreach (var character in input) | ||
bitString.AddRange(dictionary[character]); | ||
|
||
return bitString; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// submitted by Julian Schacher (jspp) | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
|
||
namespace HuffmanCoding | ||
{ | ||
class Program | ||
{ | ||
static void Main(string[] args) | ||
{ | ||
var result = HuffmanCoding.Encode("aaaabbbccd"); | ||
// Print dictionary. | ||
foreach (var entry in result.Dictionary) | ||
{ | ||
var bitString = ""; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just: |
||
foreach (var value in entry.Value) | ||
{ | ||
if (value) | ||
bitString += "1"; | ||
else | ||
bitString += "0"; | ||
} | ||
System.Console.WriteLine(entry.Key + " " + bitString); | ||
} | ||
// Print bitString. | ||
var readableBitString = ""; | ||
foreach (var boolean in result.BitString) | ||
{ | ||
if (boolean) | ||
readableBitString += "1"; | ||
else | ||
readableBitString += "0"; | ||
} | ||
System.Console.WriteLine(readableBitString); | ||
|
||
var originalString = HuffmanCoding.Decode(result); | ||
System.Console.WriteLine(originalString); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe a better name would be
EncodingResult