Here's my solution to the first challenge of the Cryptopals Crypto Challanges (https://cryptopals.com/sets/1/challenges/1).
I decided to use a cargo workspace to structure my project with the library crates inside the crates/
directory and the binary crates (the solutions to the challenges) inside the challenges/
directory, so, if it is not too subjective, i would also like to know if the project structure makes sense.
Challenge
Convert hex to base64
The string:
49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d
Should produce:
SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t
So go ahead and make that happen. You'll need to use this code for the rest of the exercises.
Cryptopals Rule
Always operate on raw bytes, never on encoded strings. Only use hex and base64 for pretty-printing.
Workspace Structure
├── Cargo.lock
├── Cargo.toml
├── challenges
│ └── set1
│ └── challenge-1
│ ├── Cargo.toml
│ └── src
│ └── main.rs
├── crates
│ └── encoding
│ ├── Cargo.toml
│ └── src
│ ├── error.rs
│ └── lib.rs
└── README.md
The code
- challenges/set-1/challenge-1/src/main.rs
use encoding::{Encode, Decode};
static HEX_INPUT: &'static str = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d";
static EXPECTED_B64_OUTPUT: &'static str = "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t";
fn main() {
let decoded_input = Vec::from_hex(HEX_INPUT).unwrap();
let b64_out = decoded_input.to_base64();
println!("Hexadecimal input: {}", HEX_INPUT);
println!("Base64 output: {}", b64_out);
assert_eq!(b64_out, EXPECTED_B64_OUTPUT);
}
- crates/encoding/src/lib.rs
mod error;
pub use crate::error::*;
/// Types that can hold the result of hexadecimal and base64 decoding
pub trait Decode: Sized {
/// Converts the given hexadecimal string to an instance of type `Self`.
///
/// Both lower-case an upper-case letters are supported.
///
/// # Examples
///
/// ```
/// use encoding::Decode;
///
/// let result = Vec::from_hex("1a3d44").unwrap();
/// assert_eq!(result, vec![26, 61, 68]);
/// ```
fn from_hex(s: &str) -> Result<Self, DecodeHexError>;
}
impl Decode for Vec<u8> {
fn from_hex(s: &str) -> Result<Self, DecodeHexError> {
if s.len() % 2 != 0 {
return Err(DecodeHexError::OddLength);
}
let mut bytes = Vec::with_capacity(s.len() / 2);
for i in (0..s.len()).step_by(2) {
match u8::from_str_radix(&s[i..i + 2], 16) {
Ok(b) => bytes.push(b),
_ => {
return Err(DecodeHexError::InvalidHexChar);
}
}
}
Ok(bytes)
}
}
/// Types that can be encoded to hexadecimal and base64
pub trait Encode {
/// Encodes the given type into a base64 string
///
/// # Examples
///
/// ```
/// use crate::encoding::Encode;
///
/// let input = vec![108, 105, 103, 104, 116, 32, 119, 111, 114, 107];
/// let result = input.to_base64();
/// assert_eq!(result, String::from("bGlnaHQgd29yaw=="));
/// ```
fn to_base64(&self) -> String;
}
impl Encode for Vec<u8> {
fn to_base64(&self) -> String {
let mut base64 = String::with_capacity(4 * self.len() / 3);
for block in self.chunks(3) {
for b64_char in block_to_base64(block) {
base64.push(b64_char);
}
}
// padding
if self.len() % 3 >= 1 {
base64.pop();
if self.len() % 3 == 1 {
base64.pop();
base64.push('=');
}
base64.push('=');
}
base64
}
}
/// Converts a block of 3 bytes to an iterator with 4 base64 encoded characters
fn block_to_base64(block: &[u8]) -> Vec<char> {
let mut b64_chars = Vec::with_capacity(4);
let (a, b, c) = match block.len() {
3 => (block[0], block[1], block[2]),
2 => (block[0], block[1], 0),
1 => (block[0], 0, 0),
_ => return vec![],
};
// first 6 bits of a
b64_chars.push(u8_to_base64(a >> 2));
// last 2 bits of a followed by the first 4 bits of b
b64_chars.push(u8_to_base64(a % 4 * 16 + (b >> 4)));
// last 4 bits of b followed by the first 2 bits of c
b64_chars.push(u8_to_base64(b % 16 * 4 + (c >> 6)));
// last 6 bits of c
b64_chars.push(u8_to_base64(c & 0x3f));
b64_chars
}
/// Converts a u8 representing a base64 sextet to the respective base64 representation
fn u8_to_base64(u: u8) -> char {
match u {
0..=25 => (b'A' + u) as char,
26..=51 => (b'a' + (u - 26)) as char,
52..=61 => (b'0' + (u - 52)) as char,
62 => '=',
63 => '/',
_ => panic!("byte exceeded range for base64 conversion: {}", u),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bytes_from_hex_correct_input() {
let result = Vec::from_hex("1a3d44").unwrap();
let expected = vec![26, 61, 68];
assert_eq!(result, expected);
}
#[test]
fn bytes_from_hex_odd_length() {
let result = Vec::from_hex("a4b5h");
let expected = Err(DecodeHexError::OddLength);
assert_eq!(result, expected);
}
#[test]
fn bytes_from_hex_invalid_byte() {
let result = Vec::from_hex("bb7do7");
let expected = Err(DecodeHexError::InvalidHexChar);
assert_eq!(result, expected);
}
#[test]
fn bytes_from_hex_empty() {
let result = Vec::from_hex("").unwrap();
assert_eq!(result, vec![])
}
#[test]
fn bytes_to_base64_padding_0() {
let input = vec![108, 105, 103, 104, 116, 32, 119, 111, 114];
let result = input.to_base64();
let expected = String::from("bGlnaHQgd29y");
assert_eq!(result, expected);
}
#[test]
fn bytes_to_base64_padding_1() {
let input = vec![108, 105, 103, 104, 116, 32, 119, 111, 114, 107, 46];
let result = input.to_base64();
let expected = String::from("bGlnaHQgd29yay4=");
assert_eq!(result, expected);
}
#[test]
fn bytes_to_base64_padding_2() {
let input = vec![108, 105, 103, 104, 116, 32, 119, 111, 114, 107];
let result = input.to_base64();
let expected = String::from("bGlnaHQgd29yaw==");
assert_eq!(result, expected);
}
#[test]
fn bytes_to_base64_empty() {
let input = vec![];
let result = input.to_base64();
let expected = String::from("");
assert_eq!(result, expected);
}
}
- crates/encoding/src/error.rs
use std::fmt;
/// Errors that can occur while decoding hexadecimal strings
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DecodeHexError {
/// An hexadecimal string must have an even length
OddLength,
/// Attempt to parse a char that does not represent an hexadecimal value
InvalidHexChar,
}
impl std::error::Error for DecodeHexError {}
impl fmt::Display for DecodeHexError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DecodeHexError::OddLength => "input string has an odd number of bytes".fmt(f),
DecodeHexError::InvalidHexChar => {
"input string contains atleast one invalid character".fmt(f)
}
}
}
}
1 Answer 1
Naming
In Rust, a crate is just a unit of "packaging", so the crates
directory carries little meaning.
If you want to separate the helper libraries in there, it may be better name it libraries
, libs
, utils
, ...
You may also consider doing without for now. A binary can have sub-modules, and unless you end up actually reusing libraries, you're just adding overhead for nothing.
Typing
You use &str
for hexadecimal and String
for base64, which is fairly arbitrary. It would be better, instead, to create new types.
/// Hexadecimal encoded value.
#[derive(Clone, Debug)]
pub struct Hex(String);
/// Base64 encoded value.
#[derive(Clone, Debug)]
pub struct Base64(String);
This way, there's no ambiguity as to which encoding is used, and the constructor can assert that only adequate values are passed in:
impl TryFrom<String> for Hex {
type Error = DecodeHexError;
fn try_from(s: String) -> Result<Self, Self::Error> {
if s.len() % 2 != 0 {
return Err(DecodeHexError::OddLength);
}
if s.as_bytes().any(|b| !b.is_ascii_hexdigit()) {
return Err(DecodeHexError::InvalidHexChar);
}
Ok(Self(s))
}
}
// Similar implementation for `Base64`.
New traits are for new meanings
There's no need to define new traits to convert from one type to another, really.
Rust already provides generic conversion traits (From
and TryFrom
), which should you reuse for maximum compatibility with the rest of the ecosystem.
In this case, since the conversions are infallible, From
is the appropriate one:
impl<'a> From<&'a Hex> for Vec<u8> { ... }
impl<'a> From<&'a [u8]> for Base64 { ... }
Note: the validation step was moved to the constructor, so there's no need to revalidate now, which is why the conversions can no longer fail.
No indexes!
Rust has a very powerful suite of iterators, so that indexes are typically unnecessary.
Not using indexes means:
- No messing up indexes.
- No bounds-checking.
- More expressive logic.
For example, the decode step can be rewritten as:
impl<'a> From<&'a Hex> for Vec<u8> {
fn from(h: &'a Hex) -> Self {
fn decode_hexdigit(digit: u8) -> u8 {
if digit.is_ascii_digit() {
digit - b'0'
} else {
digit.to_ascii_uppercase() - b'A' + 10
}
}
let mut bytes = Vec::with_capacity(h.len() / 2);
// For stable Rust, use `chunks_exact(2)` instead.
for [high, low] in h.as_bytes().array_chunks::<2>() {
let high = decode_hexdigit(high) << 4;
let low = decode_hexdigit(low);
bytes.push(high + low)
}
bytes
}
}
The loop can also be rewritten in a function way:
h.as_bytes()
.array_chunks::<2>()
.map(|[high, low]| {
let high = decode_hexdigit(high) << 4;
let low = decode_hexdigit(low);
high + low
})
.collect()
Avoid extraneous allocations
Your block_to_base64
function does not, as it claims, returns an iterator. Instead it returns a Vec
, which will perform a temporary allocation which you will immediately throw away.
You can either change the function to accept the buffer to push into as a mutable argument -- no shame in that -- or actually return a result which doesn't allocate for nothing.
Note that since you actually never invoke the function with an empty slice, you could easily just return an array [char; 4]
, although for performance I'd advise returning a [u8; 4]
, and transform each into char
at the last moment (4 bytes should be returned in a register, 16 bytes may not).
I also chose to replace the match
to avoid having an "invalid" case on the hands, but that's fairly personal.
The code is very similar to your own, really:
fn block_to_base64(block: &[u8]) -> [u8; 4] {
debug_assert!(!block.is_empty());
debug_assert!(block.len() < 4);
let a = block.get(0).unwrap_or(0);
let b = block.get(1).unwrap_or(0);
let c = block.get(2).unwrap_or(0);
let mut result = [0; 4];
// First 6 bits of `a`.
result[0] = u8_to_base64(a >> 2);
...
result
}
And with that we can write the encoding step:
impl<'a> From<&'a [u8]> for Base64 {
fn from(bytes: &'a [u8]) -> Self {
let mut result = String::with_capacity(4 * self.len() / 3);
for block in bytes.chunks(3) {
for c in block_to_base64(block) {
result.push(c as char);
}
}
match bytes.len() % 3 {
0 => (),
1 => {
result.pop();
result.pop();
result.push('=');
result.push('=');
}
2 => {
result.pop();
result.push('=');
}
}
Self(result)
}
}
Putting altogether
The challenge can then be rewritten as:
use std::error::Error;
use encoding::{Hex, Base64};
static HEX_INPUT: &'static str = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d";
static EXPECTED_B64_OUTPUT: &'static str = "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t";
fn main() -> Result<(), Box<dyn Error>> {
let input: Hex = String::from(HEX_INPUT).try_into()?;
let bytes: Vec<u8> = input.into();
let output: Base64 = bytes.into();
println!("Hexadecimal input: {}", HEX_INPUT);
println!("Base64 output: {}", output.as_str());
assert_eq!(output.as_str(), EXPECTED_B64_OUTPUT);
}
Note: you'll need to implement as_str
, and may consider implementing Deref<str>
and Display
, for both Hex
and Base64
. It's easy, just delegate to the wrapped String
.