Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit aa3194f

Browse files
fix(huffman): Handle edge cases and improve error handling (TheAlgorithms#912)
* fix(huffman): Handle edge cases and improve error handling - Change HuffmanDictionary::new() to return Option<Self> for safer API - Add proper handling for empty alphabet (returns None) - Add special case handling for single-symbol alphabets - Replace unwrap() calls with ? operator in decode() for better error handling - Add #[inline(always)] optimization for frequently called get_bit() - Add comprehensive tests for edge cases - Improve documentation with usage examples BREAKING CHANGE: HuffmanDictionary::new() now returns Option<Self> * Test: Increase coverage for huffman_encoding.rs decode method Adds two new test cases to ensure 100% patch coverage for HuffmanEncoding::decode: 1. test_decode_empty_encoding_struct: Covers the edge case where num_bits == 0. 2. minimal_decode_end_check: Ensures the final 'if self.num_bits > 0' check in the multi-symbol decode path is fully covered. Corrects 'char-lit-as-u8' and 'unnecessary-cast' lints in the newly added coverage tests to satisfy GitHub Actions.
1 parent f2a23e9 commit aa3194f

File tree

1 file changed

+163
-14
lines changed

1 file changed

+163
-14
lines changed

‎src/general/huffman_encoding.rs‎

Lines changed: 163 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,50 @@ pub struct HuffmanDictionary<T> {
7777
}
7878

7979
impl<T: Clone + Copy + Ord> HuffmanDictionary<T> {
80-
/// The list of alphabet symbols and their respective frequency should
81-
/// be given as input
82-
pub fn new(alphabet: &[(T, u64)]) -> Self {
80+
/// Creates a new Huffman dictionary from alphabet symbols and their frequencies.
81+
///
82+
/// Returns `None` if the alphabet is empty.
83+
///
84+
/// # Arguments
85+
/// * `alphabet` - A slice of tuples containing symbols and their frequencies
86+
///
87+
/// # Example
88+
/// ```
89+
/// # use the_algorithms_rust::general::HuffmanDictionary;
90+
/// let freq = vec![('a', 5), ('b', 2), ('c', 1)];
91+
/// let dict = HuffmanDictionary::new(&freq).unwrap();
92+
///
93+
pub fn new(alphabet: &[(T, u64)]) -> Option<Self> {
94+
if alphabet.is_empty() {
95+
return None;
96+
}
97+
8398
let mut alph: BTreeMap<T, HuffmanValue> = BTreeMap::new();
99+
100+
// Special case: single symbol
101+
if alphabet.len() == 1 {
102+
let (symbol, _freq) = alphabet[0];
103+
alph.insert(
104+
symbol,
105+
HuffmanValue {
106+
value: 0,
107+
bits: 1, // Must use at least 1 bit per symbol
108+
},
109+
);
110+
111+
let root = HuffmanNode {
112+
left: None,
113+
right: None,
114+
symbol: Some(symbol),
115+
frequency: alphabet[0].1,
116+
};
117+
118+
return Some(HuffmanDictionary {
119+
alphabet: alph,
120+
root,
121+
});
122+
}
123+
84124
let mut queue: BinaryHeap<HuffmanNode<T>> = BinaryHeap::new();
85125
for (symbol, freq) in alphabet.iter() {
86126
queue.push(HuffmanNode {
@@ -101,11 +141,14 @@ impl<T: Clone + Copy + Ord> HuffmanDictionary<T> {
101141
frequency: sm_freq,
102142
});
103143
}
104-
let root = queue.pop().unwrap();
105-
HuffmanNode::get_alphabet(0, 0, &root, &mut alph);
106-
HuffmanDictionary {
107-
alphabet: alph,
108-
root,
144+
if let Some(root) = queue.pop() {
145+
HuffmanNode::get_alphabet(0, 0, &root, &mut alph);
146+
Some(HuffmanDictionary {
147+
alphabet: alph,
148+
root,
149+
})
150+
} else {
151+
None
109152
}
110153
}
111154
pub fn encode(&self, data: &[T]) -> HuffmanEncoding {
@@ -143,27 +186,48 @@ impl HuffmanEncoding {
143186
}
144187
self.num_bits += data.bits as u64;
145188
}
189+
190+
#[inline]
146191
fn get_bit(&self, pos: u64) -> bool {
147192
(self.data[(pos >> 6) as usize] & (1 << (pos & 63))) != 0
148193
}
194+
149195
/// In case the encoding is invalid, `None` is returned
150196
pub fn decode<T: Clone + Copy + Ord>(&self, dict: &HuffmanDictionary<T>) -> Option<Vec<T>> {
197+
// Handle empty encoding
198+
if self.num_bits == 0 {
199+
return Some(vec![]);
200+
}
201+
202+
// Special case: single symbol in dictionary
203+
if dict.alphabet.len() == 1 {
204+
//all bits represent the same symbol
205+
let symbol = dict.alphabet.keys().next()?;
206+
let result = vec![*symbol; self.num_bits as usize];
207+
return Some(result);
208+
}
209+
210+
// Normal case: multiple symbols
151211
let mut state = &dict.root;
152212
let mut result: Vec<T> = vec![];
213+
153214
for i in 0..self.num_bits {
154-
if state.symbol.is_some() {
155-
result.push(state.symbol.unwrap());
215+
if letSome(symbol) = state.symbol {
216+
result.push(symbol);
156217
state = &dict.root;
157218
}
158219
state = if self.get_bit(i) {
159-
state.right.as_ref().unwrap()
220+
state.right.as_ref()?
160221
} else {
161-
state.left.as_ref().unwrap()
222+
state.left.as_ref()?
162223
}
163224
}
225+
226+
// Check if we ended on a symbol
164227
if self.num_bits > 0 {
165228
result.push(state.symbol?);
166229
}
230+
167231
Some(result)
168232
}
169233
}
@@ -181,12 +245,97 @@ mod tests {
181245
.for_each(|(b, &cnt)| result.push((b as u8, cnt)));
182246
result
183247
}
248+
249+
#[test]
250+
fn empty_text() {
251+
let text = "";
252+
let bytes = text.as_bytes();
253+
let freq = get_frequency(bytes);
254+
let dict = HuffmanDictionary::new(&freq);
255+
assert!(dict.is_none());
256+
}
257+
258+
#[test]
259+
fn one_symbol_text() {
260+
let text = "aaaa";
261+
let bytes = text.as_bytes();
262+
let freq = get_frequency(bytes);
263+
let dict = HuffmanDictionary::new(&freq).unwrap();
264+
let encoded = dict.encode(bytes);
265+
assert_eq!(encoded.num_bits, 4);
266+
let decoded = encoded.decode(&dict).unwrap();
267+
assert_eq!(decoded, bytes);
268+
}
269+
270+
#[test]
271+
fn test_decode_empty_encoding_struct() {
272+
// Create a minimal but VALID HuffmanDictionary.
273+
// This is required because decode() expects a dictionary, even though
274+
// the content of the dictionary doesn't matter when num_bits == 0.
275+
let freq = vec![(b'a', 1)];
276+
let dict = HuffmanDictionary::new(&freq).unwrap();
277+
278+
// Manually create the target state: an encoding with 0 bits.
279+
let empty_encoding = HuffmanEncoding {
280+
data: vec![],
281+
num_bits: 0,
282+
};
283+
284+
let result = empty_encoding.decode(&dict);
285+
286+
assert_eq!(result, Some(vec![]));
287+
}
288+
289+
#[test]
290+
fn minimal_decode_end_check() {
291+
let freq = vec![(b'a', 1), (b'b', 1)];
292+
let bytes = b"ab";
293+
294+
let dict = HuffmanDictionary::new(&freq).unwrap();
295+
let encoded = dict.encode(bytes);
296+
297+
// This decode will go through the main loop and hit the final 'if self.num_bits > 0' check.
298+
let decoded = encoded.decode(&dict).unwrap();
299+
300+
assert_eq!(decoded, bytes);
301+
}
302+
303+
#[test]
304+
fn test_decode_corrupted_stream_dead_end() {
305+
// Create a dictionary with three symbols to ensure a deeper tree.
306+
// This makes hitting a dead-end (None pointer) easier.
307+
let freq = vec![(b'a', 1), (b'b', 1), (b'c', 1)];
308+
let bytes = b"ab";
309+
let dict = HuffmanDictionary::new(&freq).unwrap();
310+
311+
let encoded = dict.encode(bytes);
312+
313+
// Manually corrupt the stream to stop mid-symbol.
314+
// We will truncate num_bits by a small amount (e.g., 1 bit).
315+
// This forces the loop to stop on an *intermediate* node.
316+
let corrupted_encoding = HuffmanEncoding {
317+
data: encoded.data,
318+
// Shorten the bit count by one. The total length of the 'ab' stream
319+
// is likely 4 or 5 bits. This forces the loop to end one bit early,
320+
// leaving the state on an internal node.
321+
num_bits: encoded
322+
.num_bits
323+
.checked_sub(1)
324+
.expect("Encoding should be > 0 bits"),
325+
};
326+
327+
// Assert that the decode fails gracefully.
328+
// The loop finishes, the final 'if self.num_bits > 0' executes,
329+
// and result.push(state.symbol?) fails because state.symbol is None.
330+
assert_eq!(corrupted_encoding.decode(&dict), None);
331+
}
332+
184333
#[test]
185334
fn small_text() {
186335
let text = "Hello world";
187336
let bytes = text.as_bytes();
188337
let freq = get_frequency(bytes);
189-
let dict = HuffmanDictionary::new(&freq);
338+
let dict = HuffmanDictionary::new(&freq).unwrap();
190339
let encoded = dict.encode(bytes);
191340
assert_eq!(encoded.num_bits, 32);
192341
let decoded = encoded.decode(&dict).unwrap();
@@ -208,7 +357,7 @@ mod tests {
208357
);
209358
let bytes = text.as_bytes();
210359
let freq = get_frequency(bytes);
211-
let dict = HuffmanDictionary::new(&freq);
360+
let dict = HuffmanDictionary::new(&freq).unwrap();
212361
let encoded = dict.encode(bytes);
213362
assert_eq!(encoded.num_bits, 2372);
214363
let decoded = encoded.decode(&dict).unwrap();

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /