From ac3ab5b47a381374976873f1e7d76cb09040b9a4 Mon Sep 17 00:00:00 2001 From: Kasey White Date: Wed, 29 Jun 2022 16:28:50 -0400 Subject: [PATCH] chore: start decoding docs and fix wording for encode docs --- crates/flat/src/decode/decoder.rs | 37 +++++++++++++++++++ crates/flat/src/encode/encoder.rs | 60 +++++++++++++++++++------------ crates/uplc/src/flat.rs | 6 ++++ 3 files changed, 81 insertions(+), 22 deletions(-) diff --git a/crates/flat/src/decode/decoder.rs b/crates/flat/src/decode/decoder.rs index a9732261..d58e800f 100644 --- a/crates/flat/src/decode/decoder.rs +++ b/crates/flat/src/decode/decoder.rs @@ -23,10 +23,21 @@ impl<'b> Decoder<'b> { T::decode(self) } + /// Decode an integer of any size. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of the current unsigned integer. + /// If the most significant bit of the 8 bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and so on. + /// If the most significant bit was instead 0 we stop decoding any more bits. + /// Finally we use zigzag to convert the unsigned integer back to a signed integer. pub fn integer(&mut self) -> Result { Ok(zigzag::to_isize(self.word()?)) } + /// Decode a single bit of the buffer to get a bool + /// We mask out a single bit of the buffer based on used bits + /// and check if it is 0 for false or 1 for true pub fn bool(&mut self) -> Result { let current_byte = self.buffer[self.pos]; let b = 0 != (current_byte & (128 >> self.used_bits)); @@ -34,21 +45,39 @@ impl<'b> Decoder<'b> { Ok(b) } + /// Decode a byte from the buffer. + /// This byte alignment agnostic. + /// We use the next 8 bits in the buffer and return the resulting byte. pub fn u8(&mut self) -> Result { self.bits8(8) } + /// Decode a byte array. + /// Decodes a filler to byte align the buffer, + /// then decodes the next byte to get the array length up to a max of 255. + /// We decode bytes equal to the array length to form the byte array. + /// If the following byte for array length is not 0 we decode it and repeat above to continue decoding the byte array. + /// We stop once we hit a byte array length of 0. + /// If array length is 0 for first byte array length the we return a empty array. pub fn bytes(&mut self) -> Result, Error> { self.filler()?; self.byte_array() } + /// Decode a 32 bit char. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of the current unsigned integer. + /// If the most significant bit of the 8 bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and so on. + /// If the most significant bit was instead 0 we stop decoding any more bits. pub fn char(&mut self) -> Result { let character = self.word()? as u32; char::from_u32(character).ok_or(Error::DecodeChar(character)) } + // TODO: Do we need this? pub fn string(&mut self) -> Result { let mut s = String::new(); while self.bit()? { @@ -57,6 +86,14 @@ impl<'b> Decoder<'b> { Ok(s) } + /// Decode a string. + /// Convert to byte array and then use byte array decoding. + /// Decodes a filler to byte align the buffer, + /// then decodes the next byte to get the array length up to a max of 255. + /// We decode bytes equal to the array length to form the byte array. + /// If the following byte for array length is not 0 we decode it and repeat above to continue decoding the byte array. + /// We stop once we hit a byte array length of 0. + /// If array length is 0 for first byte array length the we return a empty array. pub fn utf8(&mut self) -> Result { // TODO: Better Error Handling String::from_utf8(Vec::::decode(self)?).map_err(Error::from) diff --git a/crates/flat/src/encode/encoder.rs b/crates/flat/src/encode/encoder.rs index e3a86d9c..3b8c3252 100644 --- a/crates/flat/src/encode/encoder.rs +++ b/crates/flat/src/encode/encoder.rs @@ -31,7 +31,8 @@ impl Encoder { Ok(self) } - /// Encode one unsined byte. + + /// Encode 1 unsigned byte. /// Uses the next 8 bits in the buffer, can be byte aligned or byte unaligned pub fn u8(&mut self, x: u8) -> Result<&mut Self, Error> { if self.used_bits == 0 { @@ -44,8 +45,9 @@ impl Encoder { Ok(self) } - /// Encode a `bool` value. - /// Uses the next bit in the buffer to encode this information. + /// Encode a `bool` value. This is byte alignment agnostic. + /// Uses the next unused bit in the current byte to encode this information. + /// One for true and Zero for false pub fn bool(&mut self, x: bool) -> &mut Self { if x { self.one(); @@ -55,10 +57,12 @@ impl Encoder { self } + /// Encode a byte array. /// Uses filler to byte align the buffer, then writes byte array length up to 255. /// Following that it writes the next 255 bytes from the array. - /// After reaching the end of the buffer we write a 0 byte. Only write 0 byte if the byte array is empty. + /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array. + /// After reaching the end of the byte array we write a 0 byte. Only write 0 byte if the byte array is empty. pub fn bytes(&mut self, x: &[u8]) -> Result<&mut Self, Error> { // use filler to write current buffer so bits used gets reset self.filler(); @@ -66,9 +70,10 @@ impl Encoder { self.byte_array(x) } - /// Encode a byte array in a byte aligned buffer. Throws exception if any bits for the current buffer byte were used. - /// writes byte array length up to 255 - /// following that it writes the next 255 bytes from the array. + /// Encode a byte array in a byte aligned buffer. Throws exception if any bits for the current byte were used. + /// Writes byte array length up to 255 + /// Following that it writes the next 255 bytes from the array. + /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array. /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty. pub fn byte_array(&mut self, arr: &[u8]) -> Result<&mut Self, Error> { if self.used_bits != 0 { @@ -80,10 +85,11 @@ impl Encoder { Ok(self) } - /// Encode a integer of any size. - /// First we use zigzag to double the number and encode the negative sign as the least significant bit. + /// Encode an integer of any size. + /// This is byte alignment agnostic. + /// First we use zigzag once to double the number and encode the negative sign as the least significant bit. /// Next we encode the 7 least significant bits of the unsigned integer. If the number is greater than - /// 127 we encode a leading one followed by repeating the above for the next 7 bits and so on. + /// 127 we encode a leading 1 followed by repeating the encoding above for the next 7 bits and so on. pub fn integer(&mut self, i: isize) -> &mut Self { let i = zigzag::to_usize(i); @@ -93,8 +99,9 @@ impl Encoder { } /// Encode a char of 32 bits. + /// This is byte alignment agnostic. /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than - /// 127 we encode a leading one followed by repeating the above for the next 7 bits and so on. + /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on. pub fn char(&mut self, c: char) -> &mut Self { self.word(c as usize); @@ -112,8 +119,9 @@ impl Encoder { self } + /// Encode a string. - /// Convert to byte array and then use byte array coding. + /// Convert to byte array and then use byte array encoding. /// Uses filler to byte align the buffer, then writes byte array length up to 255. /// Following that it writes the next 255 bytes from the array. /// After reaching the end of the buffer we write a 0 byte. Only write 0 byte if the byte array is empty. @@ -121,9 +129,10 @@ impl Encoder { self.bytes(s.as_bytes()) } - /// Encode a unsigned integer of any size + /// Encode a unsigned integer of any size. + /// This is byte alignment agnostic. /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than - /// 127 we encode a leading one followed by repeating the above for the next 7 bits and so on. + /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on. pub fn word(&mut self, c: usize) -> &mut Self { let mut d = c; loop { @@ -144,8 +153,9 @@ impl Encoder { } /// Encode a list of bytes with a function - /// If there are bytes in a list then write one bit followed by the functions encoding. - /// After the last item write a zero bit. If the list is empty only encode a zero bit. + /// This is byte alignment agnostic. + /// If there are bytes in a list then write 1 bit followed by the functions encoding. + /// After the last item write a 0 bit. If the list is empty only encode a 0 bit. pub fn encode_list_with( &mut self, list: Vec, @@ -161,6 +171,11 @@ impl Encoder { Ok(self) } + /// Encodes up to 8 bits of information and is byte alignment agnostic. + /// Uses unused bits in the current byte to write out the passed in byte value. + /// Overflows to the most significant digits of the next byte if number of bits to use is greater than unused bits. + /// Expects that number of bits to use is greater than or equal to required bits by the value. + /// The param num_bits is i64 to match unused_bits type. pub fn bits(&mut self, num_bits: i64, val: u8) -> &mut Self { match (num_bits, val) { (1, 0) => self.zero(), @@ -206,7 +221,7 @@ impl Encoder { self } - /// A filler amount of end 0s followed by a 1 at the end of a byte. + /// A filler amount of end 0's followed by a 1 at the end of a byte. /// Used to byte align the buffer by padding out the rest of the byte. pub(crate) fn filler(&mut self) -> &mut Self { self.current_byte |= 1; @@ -215,8 +230,8 @@ impl Encoder { self } - /// Write a zero bit into the buffer. - /// Write out buffer if last used bit in a byte. + /// Write a 0 bit into the current byte. + /// Write out to buffer if last used bit in the current byte. fn zero(&mut self) { if self.used_bits == 7 { self.next_word(); @@ -225,8 +240,8 @@ impl Encoder { } } - /// Write a one bit into the buffer. - /// If last used bit in a byte then make last bit one and write out buffer. + /// Write a 1 bit into the current byte. + /// Write out to buffer if last used bit in the current byte. fn one(&mut self) { if self.used_bits == 7 { self.current_byte |= 1; @@ -237,7 +252,7 @@ impl Encoder { } } /// Write out byte regardless of current buffer alignment. - /// Write most signifcant bits in remaining unused bits for current byte, + /// Write most signifcant bits in remaining unused bits for the current byte, /// then write out the remaining bits at the beginning of the next byte. fn byte_unaligned(&mut self, x: u8) { let x_shift = self.current_byte | (x >> self.used_bits); @@ -258,6 +273,7 @@ impl Encoder { /// Writes byte array length up to 255 /// Following that it writes the next 255 bytes from the array. /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty. + /// This is byte alignment agnostic. fn write_blk(&mut self, arr: &[u8], src_ptr: &mut usize) { let src_len = arr.len() - *src_ptr; let blk_len = src_len.min(255); diff --git a/crates/uplc/src/flat.rs b/crates/uplc/src/flat.rs index 04056f9d..1701774f 100644 --- a/crates/uplc/src/flat.rs +++ b/crates/uplc/src/flat.rs @@ -158,10 +158,16 @@ where impl Encode for &Constant { fn encode(&self, e: &mut Encoder) -> Result<(), en::Error> { match self { + // Integers are typically smaller so we save space + // by encoding them in 7 bits and this allows it to be byte alignment agnostic. Constant::Integer(i) => { encode_constant(0, e)?; i.encode(e)?; } + // Strings and bytestrings span multiple bytes so using bytestring is + // the most effective encoding. + // i.e. A 17 or greater length byte array loses efficiency being encoded as + // a unsigned integer instead of a byte array Constant::ByteString(bytes) => { encode_constant(1, e)?; bytes.encode(e)?;