-
-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Compression: Huffman Coding #1513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Open
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
90f1982
Huffman Compression Algorithm
aladin002dz 4e87b22
Huffman Compression Algorithm Comments
aladin002dz c4e98aa
Huffman Compression Algorithm Comments
aladin002dz bd83db9
Compression Huffman: Optimize algorithm
aladin002dz b02eae8
Compression Huffman: prettier
aladin002dz f951e51
Compression: Huffman coding using Heaps
aladin002dz 85df912
Prettier Style
aladin002dz b583f2e
Compression; removing unecessary logging
aladin002dz File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
| /** | ||
| * Huffman Coding is a lossless data compression algorithm that uses variable-length codes to represent characters. | ||
| * | ||
| * The algorithm works by assigning shorter codes to characters that occur more frequently. This results in a compressed representation of the data. | ||
| * | ||
| * Huffman Coding is widely used in a variety of applications, including file compression, data transmission, and image processing. | ||
| * | ||
| * More information on Huffman Coding can be found here: https://en.wikipedia.org/wiki/Huffman_coding | ||
| */ | ||
|
|
||
| /** | ||
| * Builds a frequency table from a string. | ||
| * @example | ||
| * buildFrequencyTable('this is an example for huffman encoding') | ||
| * returns { ' ': 6, a: 2, c: 1, d: 1, e: 4, f: 3, g: 1, h: 2, i: 3, l: 1, m: 1, n: 4, o: 1, p: 1, r: 1, s: 2, t: 2, u: 1, x: 1 } | ||
| * @param {string} data - The string to build the frequency table from. | ||
| * @returns {Object} - The frequency table. | ||
| */ | ||
| function buildFrequencyTable(data) { | ||
| const freqTable = {} | ||
|
|
||
| for (const char of data) { | ||
| freqTable[char] = (freqTable[char] || 0) + 1 | ||
| } | ||
|
|
||
| return freqTable | ||
| } | ||
|
|
||
| /** | ||
| * A Huffman Node is a node in a Huffman tree. | ||
| * @class HuffmanNode | ||
| * @property {string} char - The character represented by the node. | ||
| * @property {number} freq - The frequency of the character. | ||
| * @property {HuffmanNode} left - The left child of the node. | ||
| * @property {HuffmanNode} right - The right child of the node. | ||
| */ | ||
| class HuffmanNode { | ||
| constructor(char, freq) { | ||
| this.char = char | ||
| this.freq = freq | ||
| this.left = null | ||
| this.right = null | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Builds a Huffman tree from a frequency table. | ||
| * @param {Object} freqTable - The frequency table to use for building the tree. | ||
| * @returns {HuffmanNode} - The root node of the Huffman tree. | ||
| */ | ||
| function buildHuffmanTree(freqTable) { | ||
| const nodes = Object.keys(freqTable).map( | ||
| (char) => new HuffmanNode(char, freqTable[char]) | ||
| ) | ||
|
|
||
| while (nodes.length > 1) { | ||
| nodes.sort((a, b) => b.freq - a.freq) | ||
| const right = nodes.pop() | ||
| const left = nodes.pop() | ||
| const parent = new HuffmanNode(null, left.freq + right.freq) | ||
| parent.left = left | ||
| parent.right = right | ||
| nodes.push(parent) | ||
| } | ||
|
|
||
| return nodes[0] | ||
| } | ||
|
|
||
| /** | ||
| * Builds a Huffman code table from a Huffman tree. | ||
| * @param {HuffmanNode} root - The root node of the Huffman tree. | ||
| * @param {string} [prefix=''] - The prefix to use for the Huffman codes. | ||
| * @param {Object} [codes={}] - The Huffman code table. | ||
| * @returns {Object} - The Huffman code table. | ||
| */ | ||
| function buildHuffmanCodes(root, prefix = '', codes = {}) { | ||
| if (root) { | ||
| if (root.char) { | ||
| codes[root.char] = prefix | ||
| } | ||
| buildHuffmanCodes(root.left, prefix + '0', codes) | ||
| buildHuffmanCodes(root.right, prefix + '1', codes) | ||
| } | ||
| return codes | ||
| } | ||
|
|
||
| /** | ||
| * Encodes a string using Huffman Coding. | ||
| * @param {string} data - The string to encode. | ||
| * @param {Object} freqTable - The frequency table to use for encoding. | ||
| * @returns {string} - The encoded string. | ||
| */ | ||
| function encodeHuffman(data, freqTable) { | ||
| const root = buildHuffmanTree(freqTable) | ||
| const codes = buildHuffmanCodes(root) | ||
|
|
||
| let encodedData = '' | ||
| for (let char of data) { | ||
| encodedData += codes[char] | ||
| } | ||
|
|
||
| return encodedData | ||
| } | ||
|
|
||
| /** | ||
| * Decodes a string using Huffman Coding. | ||
| * @param {string} encodedData - The string to decode. | ||
| * @param {HuffmanNode} root - The root node of the Huffman tree. | ||
| * @returns {string} - The decoded string. | ||
| */ | ||
| function decodeHuffman(encodedData, root) { | ||
| let decodedData = '' | ||
| let currentNode = root | ||
| for (let bit of encodedData) { | ||
| if (bit === '0') { | ||
| currentNode = currentNode.left | ||
| } else { | ||
| currentNode = currentNode.right | ||
| } | ||
|
|
||
| if (currentNode.char) { | ||
| decodedData += currentNode.char | ||
| currentNode = root | ||
| } | ||
| } | ||
|
|
||
| return decodedData | ||
| } | ||
|
|
||
| export { | ||
| buildHuffmanCodes, | ||
| buildHuffmanTree, | ||
| encodeHuffman, | ||
| decodeHuffman, | ||
| buildFrequencyTable | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| import { | ||
| BinaryHeap, | ||
| minHeapComparator | ||
| } from '../Data-Structures/Heap/BinaryHeap' | ||
| class Node { | ||
| constructor(symbol, frequency, left = null, right = null) { | ||
| this.symbol = symbol | ||
| this.frequency = frequency | ||
| this.left = left | ||
| this.right = right | ||
| } | ||
| } | ||
|
|
||
| class HuffmanCoder { | ||
| constructor(data) { | ||
| this.data = data | ||
| this.codes = {} | ||
| this.buildHuffmanTree() | ||
| this.generateCodes(this.huffmanTree, '') | ||
| } | ||
|
|
||
| buildFrequencyTable() { | ||
| const frequencyTable = {} | ||
| for (const char of this.data) { | ||
| if (char in frequencyTable) { | ||
| frequencyTable[char]++ | ||
| } else { | ||
| frequencyTable[char] = 1 | ||
| } | ||
| } | ||
| return frequencyTable | ||
| } | ||
|
|
||
| buildHuffmanTree() { | ||
| const frequencyTable = this.buildFrequencyTable() | ||
| const minHeap = new BinaryHeap(minHeapComparator) | ||
|
|
||
| for (const symbol in frequencyTable) { | ||
| minHeap.insert(new Node(symbol, frequencyTable[symbol])) | ||
| } | ||
|
|
||
| while (minHeap.size() > 1) { | ||
| const left = minHeap.extractTop() | ||
| const right = minHeap.extractTop() | ||
| const combined = new Node( | ||
| null, | ||
| left.frequency + right.frequency, | ||
| left, | ||
| right | ||
| ) | ||
| minHeap.insert(combined) | ||
| } | ||
|
|
||
| this.huffmanTree = minHeap.extractTop() | ||
| } | ||
|
|
||
| generateCodes(node, code) { | ||
| if (!node) return | ||
|
|
||
| if (node.symbol) { | ||
| this.codes[node.symbol] = code | ||
| } else { | ||
| this.generateCodes(node.left, code + '0') | ||
| this.generateCodes(node.right, code + '1') | ||
| } | ||
| } | ||
|
|
||
| encode(data) { | ||
| let encodedString = '' | ||
| for (const char of data) { | ||
| encodedString += this.codes[char] | ||
| } | ||
| return encodedString | ||
| } | ||
|
|
||
| decode(encodedString) { | ||
| let decodedString = '' | ||
| let currentNode = this.huffmanTree | ||
|
|
||
| for (const bit of encodedString) { | ||
| if (bit === '0') { | ||
| currentNode = currentNode.left | ||
| } else { | ||
| currentNode = currentNode.right | ||
| } | ||
|
|
||
| if (currentNode.symbol) { | ||
| decodedString += currentNode.symbol | ||
| currentNode = this.huffmanTree | ||
| } | ||
| } | ||
|
|
||
| return decodedString | ||
| } | ||
| } | ||
|
|
||
| export { HuffmanCoder } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| import { | ||
| buildHuffmanCodes, | ||
| buildHuffmanTree, | ||
| encodeHuffman, | ||
| decodeHuffman, | ||
| buildFrequencyTable | ||
| } from '../HuffmanArray' | ||
|
|
||
| describe('Huffman Coding', () => { | ||
| let data, freqTable, root | ||
|
|
||
| beforeEach(() => { | ||
| data = 'this is an example for huffman encoding' | ||
| freqTable = buildFrequencyTable(data) | ||
| root = buildHuffmanTree(freqTable) | ||
| }) | ||
|
|
||
| it('should encode and decode a string correctly', () => { | ||
| const encodedData = encodeHuffman(data, freqTable) | ||
| const decodedData = decodeHuffman(encodedData, root) | ||
|
|
||
| expect(decodedData).toEqual(data) | ||
| }) | ||
|
|
||
| it('should build Huffman codes correctly', () => { | ||
| const codes = buildHuffmanCodes(root) | ||
|
|
||
| expect(codes['t']).toEqual('01101') | ||
| expect(codes['h']).toEqual('0111') | ||
| expect(codes['i']).toEqual('0100') | ||
| expect(codes['s']).toEqual('1010') | ||
| }) | ||
| }) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| import { HuffmanCoder } from '../HuffmanHeap' | ||
|
|
||
| describe('HuffmanCoder', () => { | ||
| it('should encode and decode a simple string', () => { | ||
| const data = 'hello world' | ||
| const coder = new HuffmanCoder(data) | ||
| const encodedString = coder.encode(data) | ||
| const decodedString = coder.decode(encodedString) | ||
| expect(decodedString).toEqual(data) | ||
| }) | ||
|
|
||
| it('should encode and decode a string with repeating characters', () => { | ||
| const data = 'aaaaabbbbcccdeeeee' | ||
| const coder = new HuffmanCoder(data) | ||
| const encodedString = coder.encode(data) | ||
| const decodedString = coder.decode(encodedString) | ||
| expect(decodedString).toEqual(data) | ||
| }) | ||
| }) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.