feat: builtin encoding
Co-authored-by: rvcas <x@rvcas.dev>
This commit is contained in:
		
							parent
							
								
									c01469ea51
								
							
						
					
					
						commit
						41487733f7
					
				|  | @ -1,13 +1,13 @@ | |||
| pub enum Filler { | ||||
|     FillerStart(Box<Filler>), | ||||
|     // FillerStart(Box<Filler>),
 | ||||
|     FillerEnd, | ||||
| } | ||||
| 
 | ||||
| impl Filler { | ||||
|     pub fn len(&self) -> usize { | ||||
|         match self { | ||||
|             Filler::FillerStart(f) => f.len() + 1, | ||||
|             Filler::FillerEnd => 1, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| // impl Filler {
 | ||||
| //     pub fn len(&self) -> usize {
 | ||||
| //         match self {
 | ||||
| //             Filler::FillerStart(f) => f.len() + 1,
 | ||||
| //             Filler::FillerEnd => 1,
 | ||||
| //         }
 | ||||
| //     }
 | ||||
| // }
 | ||||
|  |  | |||
|  | @ -1,7 +1,7 @@ | |||
| mod encode; | ||||
| mod encoder; | ||||
| mod filler; | ||||
| mod zigzag; | ||||
| pub mod zigzag; | ||||
| 
 | ||||
| pub mod en { | ||||
|     pub use super::encode::*; | ||||
|  |  | |||
|  | @ -1,33 +1,11 @@ | |||
| use anyhow::anyhow; | ||||
| use flat::en::{Encode, Encoder}; | ||||
| 
 | ||||
| use crate::builtins::DefaultFunction; | ||||
| 
 | ||||
| const TERM_TAG_WIDTH: u32 = 4; | ||||
| const CONST_TAG_WIDTH: u32 = 4; | ||||
| 
 | ||||
| #[derive(Debug, Clone)] | ||||
| pub struct Program { | ||||
|     pub version: (usize, usize, usize), | ||||
|     pub term: Term, | ||||
| } | ||||
| 
 | ||||
| impl Program { | ||||
|     pub fn flat(&self) -> anyhow::Result<Vec<u8>> { | ||||
|         let bytes = flat::encode(self.clone()).map_err(|err| anyhow!("{}", err))?; | ||||
| 
 | ||||
|         Ok(bytes) | ||||
|     } | ||||
| 
 | ||||
|     pub fn flat_hex(&self) -> anyhow::Result<String> { | ||||
|         let bytes = self.flat()?; | ||||
| 
 | ||||
|         let hex = hex::encode(&bytes); | ||||
| 
 | ||||
|         Ok(hex) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone)] | ||||
| pub enum Term { | ||||
|     // tag: 0
 | ||||
|  | @ -54,22 +32,6 @@ pub enum Term { | |||
|     Builtin(DefaultFunction), | ||||
| } | ||||
| 
 | ||||
| pub fn encode_term_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     safe_encode_bits(TERM_TAG_WIDTH, tag, e) | ||||
| } | ||||
| 
 | ||||
| pub fn safe_encode_bits(num_bits: u32, byte: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     if 2_u8.pow(num_bits) < byte { | ||||
|         Err(format!( | ||||
|             "Overflow detected, cannot fit {} in {} bits.", | ||||
|             byte, num_bits | ||||
|         )) | ||||
|     } else { | ||||
|         e.bits(num_bits as i64, byte); | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[derive(Debug, Clone)] | ||||
| pub enum Constant { | ||||
|     // TODO: figure out the right size for this
 | ||||
|  | @ -86,132 +48,3 @@ pub enum Constant { | |||
|     // tag: 5
 | ||||
|     Bool(bool), | ||||
| } | ||||
| 
 | ||||
| pub fn encode_constant(tag: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     e.encode_list_with(encode_constant_tag, [tag].to_vec()) | ||||
| } | ||||
| 
 | ||||
| pub fn encode_constant_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     safe_encode_bits(CONST_TAG_WIDTH, tag, e) | ||||
| } | ||||
| 
 | ||||
| impl Encode for Program { | ||||
|     fn encode(&self, e: &mut Encoder) -> Result<(), String> { | ||||
|         let (major, minor, patch) = self.version; | ||||
| 
 | ||||
|         major.encode(e)?; | ||||
|         minor.encode(e)?; | ||||
|         patch.encode(e)?; | ||||
| 
 | ||||
|         self.term.encode(e)?; | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Encode for Term { | ||||
|     fn encode(&self, e: &mut Encoder) -> Result<(), String> { | ||||
|         // still need annotation but here we have the term tags
 | ||||
|         match self { | ||||
|             Term::Var(name) => { | ||||
|                 encode_term_tag(0, e)?; | ||||
|                 name.encode(e)?; | ||||
|             } | ||||
|             Term::Delay(term) => { | ||||
|                 encode_term_tag(1, e)?; | ||||
|                 term.encode(e)?; | ||||
|             } | ||||
|             Term::Lambda { | ||||
|                 parameter_name, | ||||
|                 body, | ||||
|             } => { | ||||
|                 encode_term_tag(2, e)?; | ||||
|                 // need to create encoding for Binder
 | ||||
|                 todo!(); | ||||
|             } | ||||
|             Term::Apply { function, argument } => { | ||||
|                 encode_term_tag(3, e)?; | ||||
|                 function.encode(e)?; | ||||
|                 argument.encode(e)?; | ||||
|             } | ||||
| 
 | ||||
|             Term::Constant(constant) => { | ||||
|                 encode_term_tag(4, e)?; | ||||
|                 constant.encode(e)?; | ||||
|             } | ||||
| 
 | ||||
|             Term::Force(term) => { | ||||
|                 encode_term_tag(5, e)?; | ||||
|                 term.encode(e)?; | ||||
|             } | ||||
| 
 | ||||
|             Term::Error => { | ||||
|                 encode_term_tag(6, e)?; | ||||
|                 todo!() | ||||
|             } | ||||
|             Term::Builtin(b) => { | ||||
|                 encode_term_tag(7, e)?; | ||||
|                 // implement encode for builtins
 | ||||
|                 todo!() | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Encode for &Constant { | ||||
|     fn encode(&self, e: &mut Encoder) -> Result<(), String> { | ||||
|         match self { | ||||
|             Constant::Integer(i) => { | ||||
|                 encode_constant(0, e)?; | ||||
|                 i.encode(e)?; | ||||
|             } | ||||
|             Constant::ByteString(bytes) => { | ||||
|                 encode_constant(1, e)?; | ||||
|                 bytes.encode(e)?; | ||||
|             } | ||||
|             Constant::String(s) => { | ||||
|                 encode_constant(2, e)?; | ||||
|                 s.as_bytes().encode(e)?; | ||||
|             } | ||||
|             // there is no char constant tag
 | ||||
|             Constant::Char(c) => { | ||||
|                 c.encode(e)?; | ||||
| 
 | ||||
|                 let mut b = [0; 4]; | ||||
| 
 | ||||
|                 let s = c.encode_utf8(&mut b); | ||||
| 
 | ||||
|                 s.as_bytes().encode(e)?; | ||||
|             } | ||||
|             Constant::Unit => encode_constant(3, e)?, | ||||
|             Constant::Bool(b) => { | ||||
|                 encode_constant(4, e)?; | ||||
|                 b.encode(e)?; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::{Constant, Program, Term}; | ||||
| 
 | ||||
|     #[test] | ||||
|     fn flat_encode_integer() { | ||||
|         let program = Program { | ||||
|             version: (11, 22, 33), | ||||
|             term: Term::Constant(Constant::Integer(11)), | ||||
|         }; | ||||
| 
 | ||||
|         let bytes = program.flat().unwrap(); | ||||
| 
 | ||||
|         assert_eq!( | ||||
|             bytes, | ||||
|             vec![0b00001011, 0b00010110, 0b00100001, 0b01001000, 0b00000101, 0b10000001] | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -1,79 +1,80 @@ | |||
| use strum_macros::EnumString; | ||||
| 
 | ||||
| #[repr(u8)] | ||||
| #[allow(non_camel_case_types)] | ||||
| #[derive(Debug, Clone, EnumString)] | ||||
| #[strum(serialize_all = "camelCase")] | ||||
| pub enum DefaultFunction { | ||||
|     // Integer functions
 | ||||
|     AddInteger, | ||||
|     SubtractInteger, | ||||
|     MultiplyInteger, | ||||
|     DivideInteger, | ||||
|     QuotientInteger, | ||||
|     RemainderInteger, | ||||
|     ModInteger, | ||||
|     EqualsInteger, | ||||
|     LessThanInteger, | ||||
|     LessThanEqualsInteger, | ||||
|     AddInteger = 0, | ||||
|     SubtractInteger = 1, | ||||
|     MultiplyInteger = 2, | ||||
|     DivideInteger = 3, | ||||
|     QuotientInteger = 4, | ||||
|     RemainderInteger = 5, | ||||
|     ModInteger = 6, | ||||
|     EqualsInteger = 7, | ||||
|     LessThanInteger = 8, | ||||
|     LessThanEqualsInteger = 9, | ||||
|     // ByteString functions
 | ||||
|     AppendByteString, | ||||
|     ConsByteString, | ||||
|     SliceByteString, | ||||
|     LengthOfByteString, | ||||
|     IndexByteString, | ||||
|     EqualsByteString, | ||||
|     LessThanByteString, | ||||
|     LessThanEqualsByteString, | ||||
|     AppendByteString = 10, | ||||
|     ConsByteString = 11, | ||||
|     SliceByteString = 12, | ||||
|     LengthOfByteString = 13, | ||||
|     IndexByteString = 14, | ||||
|     EqualsByteString = 15, | ||||
|     LessThanByteString = 16, | ||||
|     LessThanEqualsByteString = 17, | ||||
|     // Cryptography and hash functions
 | ||||
|     #[strum(serialize = "sha2_256")] | ||||
|     Sha2_256, | ||||
|     Sha3_256, | ||||
|     Blake2b_256, | ||||
|     VerifySignature, | ||||
|     VerifyEcdsaSecp256k1Signature, | ||||
|     VerifySchnorrSecp256k1Signature, | ||||
|     Sha2_256 = 18, | ||||
|     Sha3_256 = 19, | ||||
|     Blake2b_256 = 20, | ||||
|     VerifySignature = 21, | ||||
|     VerifyEcdsaSecp256k1Signature = 22, | ||||
|     VerifySchnorrSecp256k1Signature = 23, | ||||
|     // String functions
 | ||||
|     AppendString, | ||||
|     EqualsString, | ||||
|     EncodeUtf8, | ||||
|     DecodeUtf8, | ||||
|     AppendString = 24, | ||||
|     EqualsString = 25, | ||||
|     EncodeUtf8 = 26, | ||||
|     DecodeUtf8 = 27, | ||||
|     // Bool function
 | ||||
|     IfThenElse, | ||||
|     IfThenElse = 28, | ||||
|     // Unit function
 | ||||
|     ChooseUnit, | ||||
|     ChooseUnit = 29, | ||||
|     // Tracing function
 | ||||
|     Trace, | ||||
|     Trace = 30, | ||||
|     // Pairs functions
 | ||||
|     FstPair, | ||||
|     SndPair, | ||||
|     FstPair = 31, | ||||
|     SndPair = 32, | ||||
|     // List functions
 | ||||
|     ChooseList, | ||||
|     MkCons, | ||||
|     HeadList, | ||||
|     TailList, | ||||
|     NullList, | ||||
|     ChooseList = 33, | ||||
|     MkCons = 34, | ||||
|     HeadList = 35, | ||||
|     TailList = 36, | ||||
|     NullList = 37, | ||||
|     // Data functions
 | ||||
|     // It is convenient to have a "choosing" function for a data type that has more than two
 | ||||
|     // constructors to get pattern matching over it and we may end up having multiple such data
 | ||||
|     // types, hence we include the name of the data type as a suffix.
 | ||||
|     ChooseData, | ||||
|     ConstrData, | ||||
|     MapData, | ||||
|     ListData, | ||||
|     IData, | ||||
|     BData, | ||||
|     UnConstrData, | ||||
|     UnMapData, | ||||
|     UnListData, | ||||
|     UnIData, | ||||
|     UnBData, | ||||
|     EqualsData, | ||||
|     SerialiseData, | ||||
|     ChooseData = 38, | ||||
|     ConstrData = 39, | ||||
|     MapData = 40, | ||||
|     ListData = 41, | ||||
|     IData = 42, | ||||
|     BData = 43, | ||||
|     UnConstrData = 44, | ||||
|     UnMapData = 45, | ||||
|     UnListData = 46, | ||||
|     UnIData = 47, | ||||
|     UnBData = 48, | ||||
|     EqualsData = 49, | ||||
|     SerialiseData = 50, | ||||
|     // Misc constructors
 | ||||
|     // Constructors that we need for constructing e.g. Data. Polymorphic builtin
 | ||||
|     // constructors are often problematic (See note [Representable built-in
 | ||||
|     // functions over polymorphic built-in types])
 | ||||
|     MkPairData, | ||||
|     MkNilData, | ||||
|     MkNilPairData, | ||||
|     MkPairData = 51, | ||||
|     MkNilData = 52, | ||||
|     MkNilPairData = 53, | ||||
| } | ||||
|  |  | |||
|  | @ -0,0 +1,181 @@ | |||
| use anyhow::anyhow; | ||||
| 
 | ||||
| use flat::en::{Encode, Encoder}; | ||||
| 
 | ||||
| use crate::{ | ||||
|     ast::{Constant, Program, Term}, | ||||
|     builtins::DefaultFunction, | ||||
| }; | ||||
| 
 | ||||
| const BUILTIN_TAG_WIDTH: u32 = 7; | ||||
| const CONST_TAG_WIDTH: u32 = 4; | ||||
| const TERM_TAG_WIDTH: u32 = 4; | ||||
| 
 | ||||
| impl Program { | ||||
|     pub fn flat(&self) -> anyhow::Result<Vec<u8>> { | ||||
|         let bytes = flat::encode(self.clone()).map_err(|err| anyhow!("{}", err))?; | ||||
| 
 | ||||
|         Ok(bytes) | ||||
|     } | ||||
| 
 | ||||
|     pub fn flat_hex(&self) -> anyhow::Result<String> { | ||||
|         let bytes = self.flat()?; | ||||
| 
 | ||||
|         let hex = hex::encode(&bytes); | ||||
| 
 | ||||
|         Ok(hex) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Encode for Program { | ||||
|     fn encode(&self, e: &mut Encoder) -> Result<(), String> { | ||||
|         let (major, minor, patch) = self.version; | ||||
| 
 | ||||
|         major.encode(e)?; | ||||
|         minor.encode(e)?; | ||||
|         patch.encode(e)?; | ||||
| 
 | ||||
|         self.term.encode(e)?; | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Encode for Term { | ||||
|     fn encode(&self, e: &mut Encoder) -> Result<(), String> { | ||||
|         // still need annotation but here we have the term tags
 | ||||
|         match self { | ||||
|             Term::Var(name) => { | ||||
|                 encode_term_tag(0, e)?; | ||||
|                 name.encode(e)?; | ||||
|             } | ||||
|             Term::Delay(term) => { | ||||
|                 encode_term_tag(1, e)?; | ||||
|                 term.encode(e)?; | ||||
|             } | ||||
|             Term::Lambda { | ||||
|                 parameter_name: _, | ||||
|                 body: _, | ||||
|             } => { | ||||
|                 encode_term_tag(2, e)?; | ||||
|                 // need to create encoding for Binder
 | ||||
|                 todo!(); | ||||
|             } | ||||
|             Term::Apply { function, argument } => { | ||||
|                 encode_term_tag(3, e)?; | ||||
|                 function.encode(e)?; | ||||
|                 argument.encode(e)?; | ||||
|             } | ||||
| 
 | ||||
|             Term::Constant(constant) => { | ||||
|                 encode_term_tag(4, e)?; | ||||
|                 constant.encode(e)?; | ||||
|             } | ||||
| 
 | ||||
|             Term::Force(term) => { | ||||
|                 encode_term_tag(5, e)?; | ||||
|                 term.encode(e)?; | ||||
|             } | ||||
| 
 | ||||
|             Term::Error => { | ||||
|                 encode_term_tag(6, e)?; | ||||
|                 todo!() | ||||
|             } | ||||
|             Term::Builtin(builtin) => { | ||||
|                 encode_term_tag(7, e)?; | ||||
| 
 | ||||
|                 builtin.encode(e)?; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Encode for &Constant { | ||||
|     fn encode(&self, e: &mut Encoder) -> Result<(), String> { | ||||
|         match self { | ||||
|             Constant::Integer(i) => { | ||||
|                 encode_constant(0, e)?; | ||||
|                 i.encode(e)?; | ||||
|             } | ||||
|             Constant::ByteString(bytes) => { | ||||
|                 encode_constant(1, e)?; | ||||
|                 bytes.encode(e)?; | ||||
|             } | ||||
|             Constant::String(s) => { | ||||
|                 encode_constant(2, e)?; | ||||
|                 s.as_bytes().encode(e)?; | ||||
|             } | ||||
|             // there is no char constant tag
 | ||||
|             Constant::Char(c) => { | ||||
|                 c.encode(e)?; | ||||
| 
 | ||||
|                 let mut b = [0; 4]; | ||||
| 
 | ||||
|                 let s = c.encode_utf8(&mut b); | ||||
| 
 | ||||
|                 s.as_bytes().encode(e)?; | ||||
|             } | ||||
|             Constant::Unit => encode_constant(3, e)?, | ||||
|             Constant::Bool(b) => { | ||||
|                 encode_constant(4, e)?; | ||||
|                 b.encode(e)?; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| impl Encode for DefaultFunction { | ||||
|     fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> { | ||||
|         e.bits(BUILTIN_TAG_WIDTH as i64, self.clone() as u8); | ||||
| 
 | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn encode_term_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     safe_encode_bits(TERM_TAG_WIDTH, tag, e) | ||||
| } | ||||
| 
 | ||||
| fn safe_encode_bits(num_bits: u32, byte: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     if 2_u8.pow(num_bits) < byte { | ||||
|         Err(format!( | ||||
|             "Overflow detected, cannot fit {} in {} bits.", | ||||
|             byte, num_bits | ||||
|         )) | ||||
|     } else { | ||||
|         e.bits(num_bits as i64, byte); | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| pub fn encode_constant(tag: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     e.encode_list_with(encode_constant_tag, [tag].to_vec()) | ||||
| } | ||||
| 
 | ||||
| pub fn encode_constant_tag(tag: u8, e: &mut Encoder) -> Result<(), String> { | ||||
|     safe_encode_bits(CONST_TAG_WIDTH, tag, e) | ||||
| } | ||||
| 
 | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::{Constant, Program, Term}; | ||||
| 
 | ||||
|     #[test] | ||||
|     fn flat_encode_integer() { | ||||
|         let program = Program { | ||||
|             version: (11, 22, 33), | ||||
|             term: Term::Constant(Constant::Integer(11)), | ||||
|         }; | ||||
| 
 | ||||
|         let bytes = program.flat().unwrap(); | ||||
| 
 | ||||
|         assert_eq!( | ||||
|             bytes, | ||||
|             vec![0b00001011, 0b00010110, 0b00100001, 0b01001000, 0b00000101, 0b10000001] | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | @ -1,5 +1,6 @@ | |||
| pub mod ast; | ||||
| pub mod builtins; | ||||
| mod flat; | ||||
| pub mod parser; | ||||
| 
 | ||||
| #[macro_use] | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Kasey White
						Kasey White