From 41487733f7015d1c9feff48fd17a118df1957f4e Mon Sep 17 00:00:00 2001 From: Kasey White Date: Sat, 28 May 2022 00:23:20 -0400 Subject: [PATCH] feat: builtin encoding Co-authored-by: rvcas --- crates/flat/src/filler.rs | 18 ++-- crates/flat/src/lib.rs | 2 +- crates/uplc/src/ast.rs | 167 --------------------------------- crates/uplc/src/builtins.rs | 109 +++++++++++----------- crates/uplc/src/flat.rs | 181 ++++++++++++++++++++++++++++++++++++ crates/uplc/src/lib.rs | 1 + 6 files changed, 247 insertions(+), 231 deletions(-) create mode 100644 crates/uplc/src/flat.rs diff --git a/crates/flat/src/filler.rs b/crates/flat/src/filler.rs index 162211b0..7e33d2c1 100644 --- a/crates/flat/src/filler.rs +++ b/crates/flat/src/filler.rs @@ -1,13 +1,13 @@ pub enum Filler { - FillerStart(Box), + // FillerStart(Box), FillerEnd, } -impl Filler { - pub fn len(&self) -> usize { - match self { - Filler::FillerStart(f) => f.len() + 1, - Filler::FillerEnd => 1, - } - } -} +// impl Filler { +// pub fn len(&self) -> usize { +// match self { +// Filler::FillerStart(f) => f.len() + 1, +// Filler::FillerEnd => 1, +// } +// } +// } diff --git a/crates/flat/src/lib.rs b/crates/flat/src/lib.rs index 8d7d14f1..cf0f6fc0 100644 --- a/crates/flat/src/lib.rs +++ b/crates/flat/src/lib.rs @@ -1,7 +1,7 @@ mod encode; mod encoder; mod filler; -mod zigzag; +pub mod zigzag; pub mod en { pub use super::encode::*; diff --git a/crates/uplc/src/ast.rs b/crates/uplc/src/ast.rs index b5914863..91794e48 100644 --- a/crates/uplc/src/ast.rs +++ b/crates/uplc/src/ast.rs @@ -1,33 +1,11 @@ -use anyhow::anyhow; -use flat::en::{Encode, Encoder}; - use crate::builtins::DefaultFunction; -const TERM_TAG_WIDTH: u32 = 4; -const CONST_TAG_WIDTH: u32 = 4; - #[derive(Debug, Clone)] pub struct Program { pub version: (usize, usize, usize), pub term: Term, } -impl Program { - pub fn flat(&self) -> anyhow::Result> { - let bytes = flat::encode(self.clone()).map_err(|err| anyhow!("{}", err))?; - - Ok(bytes) - } - - pub fn flat_hex(&self) -> anyhow::Result { - let bytes = self.flat()?; - - let hex = hex::encode(&bytes); - - Ok(hex) - } -} - #[derive(Debug, Clone)] pub enum Term { // tag: 0 @@ -54,22 +32,6 @@ pub enum Term { Builtin(DefaultFunction), } -pub fn encode_term_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { - safe_encode_bits(TERM_TAG_WIDTH, tag, e) -} - -pub fn safe_encode_bits(num_bits: u32, byte: u8, e: &mut Encoder) -> Result<(), String> { - if 2_u8.pow(num_bits) < byte { - Err(format!( - "Overflow detected, cannot fit {} in {} bits.", - byte, num_bits - )) - } else { - e.bits(num_bits as i64, byte); - Ok(()) - } -} - #[derive(Debug, Clone)] pub enum Constant { // TODO: figure out the right size for this @@ -86,132 +48,3 @@ pub enum Constant { // tag: 5 Bool(bool), } - -pub fn encode_constant(tag: u8, e: &mut Encoder) -> Result<(), String> { - e.encode_list_with(encode_constant_tag, [tag].to_vec()) -} - -pub fn encode_constant_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { - safe_encode_bits(CONST_TAG_WIDTH, tag, e) -} - -impl Encode for Program { - fn encode(&self, e: &mut Encoder) -> Result<(), String> { - let (major, minor, patch) = self.version; - - major.encode(e)?; - minor.encode(e)?; - patch.encode(e)?; - - self.term.encode(e)?; - - Ok(()) - } -} - -impl Encode for Term { - fn encode(&self, e: &mut Encoder) -> Result<(), String> { - // still need annotation but here we have the term tags - match self { - Term::Var(name) => { - encode_term_tag(0, e)?; - name.encode(e)?; - } - Term::Delay(term) => { - encode_term_tag(1, e)?; - term.encode(e)?; - } - Term::Lambda { - parameter_name, - body, - } => { - encode_term_tag(2, e)?; - // need to create encoding for Binder - todo!(); - } - Term::Apply { function, argument } => { - encode_term_tag(3, e)?; - function.encode(e)?; - argument.encode(e)?; - } - - Term::Constant(constant) => { - encode_term_tag(4, e)?; - constant.encode(e)?; - } - - Term::Force(term) => { - encode_term_tag(5, e)?; - term.encode(e)?; - } - - Term::Error => { - encode_term_tag(6, e)?; - todo!() - } - Term::Builtin(b) => { - encode_term_tag(7, e)?; - // implement encode for builtins - todo!() - } - } - - Ok(()) - } -} - -impl Encode for &Constant { - fn encode(&self, e: &mut Encoder) -> Result<(), String> { - match self { - Constant::Integer(i) => { - encode_constant(0, e)?; - i.encode(e)?; - } - Constant::ByteString(bytes) => { - encode_constant(1, e)?; - bytes.encode(e)?; - } - Constant::String(s) => { - encode_constant(2, e)?; - s.as_bytes().encode(e)?; - } - // there is no char constant tag - Constant::Char(c) => { - c.encode(e)?; - - let mut b = [0; 4]; - - let s = c.encode_utf8(&mut b); - - s.as_bytes().encode(e)?; - } - Constant::Unit => encode_constant(3, e)?, - Constant::Bool(b) => { - encode_constant(4, e)?; - b.encode(e)?; - } - } - - Ok(()) - } -} - -#[cfg(test)] -mod test { - use super::{Constant, Program, Term}; - - #[test] - fn flat_encode_integer() { - let program = Program { - version: (11, 22, 33), - term: Term::Constant(Constant::Integer(11)), - }; - - let bytes = program.flat().unwrap(); - - assert_eq!( - bytes, - vec![0b00001011, 0b00010110, 0b00100001, 0b01001000, 0b00000101, 0b10000001] - ) - } -} diff --git a/crates/uplc/src/builtins.rs b/crates/uplc/src/builtins.rs index 373c0a5f..9231c494 100644 --- a/crates/uplc/src/builtins.rs +++ b/crates/uplc/src/builtins.rs @@ -1,79 +1,80 @@ use strum_macros::EnumString; +#[repr(u8)] #[allow(non_camel_case_types)] #[derive(Debug, Clone, EnumString)] #[strum(serialize_all = "camelCase")] pub enum DefaultFunction { // Integer functions - AddInteger, - SubtractInteger, - MultiplyInteger, - DivideInteger, - QuotientInteger, - RemainderInteger, - ModInteger, - EqualsInteger, - LessThanInteger, - LessThanEqualsInteger, + AddInteger = 0, + SubtractInteger = 1, + MultiplyInteger = 2, + DivideInteger = 3, + QuotientInteger = 4, + RemainderInteger = 5, + ModInteger = 6, + EqualsInteger = 7, + LessThanInteger = 8, + LessThanEqualsInteger = 9, // ByteString functions - AppendByteString, - ConsByteString, - SliceByteString, - LengthOfByteString, - IndexByteString, - EqualsByteString, - LessThanByteString, - LessThanEqualsByteString, + AppendByteString = 10, + ConsByteString = 11, + SliceByteString = 12, + LengthOfByteString = 13, + IndexByteString = 14, + EqualsByteString = 15, + LessThanByteString = 16, + LessThanEqualsByteString = 17, // Cryptography and hash functions #[strum(serialize = "sha2_256")] - Sha2_256, - Sha3_256, - Blake2b_256, - VerifySignature, - VerifyEcdsaSecp256k1Signature, - VerifySchnorrSecp256k1Signature, + Sha2_256 = 18, + Sha3_256 = 19, + Blake2b_256 = 20, + VerifySignature = 21, + VerifyEcdsaSecp256k1Signature = 22, + VerifySchnorrSecp256k1Signature = 23, // String functions - AppendString, - EqualsString, - EncodeUtf8, - DecodeUtf8, + AppendString = 24, + EqualsString = 25, + EncodeUtf8 = 26, + DecodeUtf8 = 27, // Bool function - IfThenElse, + IfThenElse = 28, // Unit function - ChooseUnit, + ChooseUnit = 29, // Tracing function - Trace, + Trace = 30, // Pairs functions - FstPair, - SndPair, + FstPair = 31, + SndPair = 32, // List functions - ChooseList, - MkCons, - HeadList, - TailList, - NullList, + ChooseList = 33, + MkCons = 34, + HeadList = 35, + TailList = 36, + NullList = 37, // Data functions // It is convenient to have a "choosing" function for a data type that has more than two // constructors to get pattern matching over it and we may end up having multiple such data // types, hence we include the name of the data type as a suffix. - ChooseData, - ConstrData, - MapData, - ListData, - IData, - BData, - UnConstrData, - UnMapData, - UnListData, - UnIData, - UnBData, - EqualsData, - SerialiseData, + ChooseData = 38, + ConstrData = 39, + MapData = 40, + ListData = 41, + IData = 42, + BData = 43, + UnConstrData = 44, + UnMapData = 45, + UnListData = 46, + UnIData = 47, + UnBData = 48, + EqualsData = 49, + SerialiseData = 50, // Misc constructors // Constructors that we need for constructing e.g. Data. Polymorphic builtin // constructors are often problematic (See note [Representable built-in // functions over polymorphic built-in types]) - MkPairData, - MkNilData, - MkNilPairData, + MkPairData = 51, + MkNilData = 52, + MkNilPairData = 53, } diff --git a/crates/uplc/src/flat.rs b/crates/uplc/src/flat.rs new file mode 100644 index 00000000..2b92854d --- /dev/null +++ b/crates/uplc/src/flat.rs @@ -0,0 +1,181 @@ +use anyhow::anyhow; + +use flat::en::{Encode, Encoder}; + +use crate::{ + ast::{Constant, Program, Term}, + builtins::DefaultFunction, +}; + +const BUILTIN_TAG_WIDTH: u32 = 7; +const CONST_TAG_WIDTH: u32 = 4; +const TERM_TAG_WIDTH: u32 = 4; + +impl Program { + pub fn flat(&self) -> anyhow::Result> { + let bytes = flat::encode(self.clone()).map_err(|err| anyhow!("{}", err))?; + + Ok(bytes) + } + + pub fn flat_hex(&self) -> anyhow::Result { + let bytes = self.flat()?; + + let hex = hex::encode(&bytes); + + Ok(hex) + } +} + +impl Encode for Program { + fn encode(&self, e: &mut Encoder) -> Result<(), String> { + let (major, minor, patch) = self.version; + + major.encode(e)?; + minor.encode(e)?; + patch.encode(e)?; + + self.term.encode(e)?; + + Ok(()) + } +} + +impl Encode for Term { + fn encode(&self, e: &mut Encoder) -> Result<(), String> { + // still need annotation but here we have the term tags + match self { + Term::Var(name) => { + encode_term_tag(0, e)?; + name.encode(e)?; + } + Term::Delay(term) => { + encode_term_tag(1, e)?; + term.encode(e)?; + } + Term::Lambda { + parameter_name: _, + body: _, + } => { + encode_term_tag(2, e)?; + // need to create encoding for Binder + todo!(); + } + Term::Apply { function, argument } => { + encode_term_tag(3, e)?; + function.encode(e)?; + argument.encode(e)?; + } + + Term::Constant(constant) => { + encode_term_tag(4, e)?; + constant.encode(e)?; + } + + Term::Force(term) => { + encode_term_tag(5, e)?; + term.encode(e)?; + } + + Term::Error => { + encode_term_tag(6, e)?; + todo!() + } + Term::Builtin(builtin) => { + encode_term_tag(7, e)?; + + builtin.encode(e)?; + } + } + + Ok(()) + } +} + +impl Encode for &Constant { + fn encode(&self, e: &mut Encoder) -> Result<(), String> { + match self { + Constant::Integer(i) => { + encode_constant(0, e)?; + i.encode(e)?; + } + Constant::ByteString(bytes) => { + encode_constant(1, e)?; + bytes.encode(e)?; + } + Constant::String(s) => { + encode_constant(2, e)?; + s.as_bytes().encode(e)?; + } + // there is no char constant tag + Constant::Char(c) => { + c.encode(e)?; + + let mut b = [0; 4]; + + let s = c.encode_utf8(&mut b); + + s.as_bytes().encode(e)?; + } + Constant::Unit => encode_constant(3, e)?, + Constant::Bool(b) => { + encode_constant(4, e)?; + b.encode(e)?; + } + } + + Ok(()) + } +} + +impl Encode for DefaultFunction { + fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> { + e.bits(BUILTIN_TAG_WIDTH as i64, self.clone() as u8); + + Ok(()) + } +} + +fn encode_term_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { + safe_encode_bits(TERM_TAG_WIDTH, tag, e) +} + +fn safe_encode_bits(num_bits: u32, byte: u8, e: &mut Encoder) -> Result<(), String> { + if 2_u8.pow(num_bits) < byte { + Err(format!( + "Overflow detected, cannot fit {} in {} bits.", + byte, num_bits + )) + } else { + e.bits(num_bits as i64, byte); + Ok(()) + } +} + +pub fn encode_constant(tag: u8, e: &mut Encoder) -> Result<(), String> { + e.encode_list_with(encode_constant_tag, [tag].to_vec()) +} + +pub fn encode_constant_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { + safe_encode_bits(CONST_TAG_WIDTH, tag, e) +} + +#[cfg(test)] +mod test { + use super::{Constant, Program, Term}; + + #[test] + fn flat_encode_integer() { + let program = Program { + version: (11, 22, 33), + term: Term::Constant(Constant::Integer(11)), + }; + + let bytes = program.flat().unwrap(); + + assert_eq!( + bytes, + vec![0b00001011, 0b00010110, 0b00100001, 0b01001000, 0b00000101, 0b10000001] + ) + } +} diff --git a/crates/uplc/src/lib.rs b/crates/uplc/src/lib.rs index 54249879..e5258e37 100644 --- a/crates/uplc/src/lib.rs +++ b/crates/uplc/src/lib.rs @@ -1,5 +1,6 @@ pub mod ast; pub mod builtins; +mod flat; pub mod parser; #[macro_use]