feat: flat tweaks and string interning

Co-authored-by: rvcas <x@rvcas.dev>
This commit is contained in:
Kasey White 2022-05-31 22:57:52 -04:00
parent 31e7d63d9a
commit 581b8fc85d
7 changed files with 105 additions and 41 deletions

View File

@ -44,7 +44,7 @@ impl Decode<'_> for char {
impl Decode<'_> for String { impl Decode<'_> for String {
fn decode(d: &mut Decoder) -> Result<Self, String> { fn decode(d: &mut Decoder) -> Result<Self, String> {
d.string() d.utf8()
} }
} }

View File

@ -51,6 +51,11 @@ impl<'b> Decoder<'b> {
Ok(s) Ok(s)
} }
pub fn utf8(&mut self) -> Result<String, String> {
// TODO: Better Error Handling
Ok(String::from_utf8(Vec::<u8>::decode(self)?).unwrap())
}
pub fn filler(&mut self) -> Result<(), String> { pub fn filler(&mut self) -> Result<(), String> {
while self.zero()? {} while self.zero()? {}
Ok(()) Ok(())

View File

@ -46,7 +46,7 @@ impl Encode for char {
impl Encode for &str { impl Encode for &str {
fn encode(&self, e: &mut Encoder) -> Result<(), String> { fn encode(&self, e: &mut Encoder) -> Result<(), String> {
e.string(*self)?; e.utf8(self)?;
Ok(()) Ok(())
} }
@ -54,7 +54,7 @@ impl Encode for &str {
impl Encode for String { impl Encode for String {
fn encode(&self, e: &mut Encoder) -> Result<(), String> { fn encode(&self, e: &mut Encoder) -> Result<(), String> {
e.string(self)?; e.utf8(self)?;
Ok(()) Ok(())
} }

View File

@ -75,7 +75,7 @@ impl Encoder {
self.word(c as usize); self.word(c as usize);
Ok(self) Ok(self)
} }
// TODO: Do we need this?
pub fn string(&mut self, s: &str) -> Result<&mut Self, String> { pub fn string(&mut self, s: &str) -> Result<&mut Self, String> {
for i in s.chars() { for i in s.chars() {
self.one(); self.one();
@ -87,6 +87,10 @@ impl Encoder {
Ok(self) Ok(self)
} }
pub fn utf8(&mut self, s: &str) -> Result<&mut Self, String> {
self.bytes(s.as_bytes())
}
fn zero(&mut self) { fn zero(&mut self) {
if self.used_bits == 7 { if self.used_bits == 7 {
self.next_word(); self.next_word();

View File

@ -9,12 +9,12 @@ pub struct Program {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Term { pub enum Term {
// tag: 0 // tag: 0
Var(String), Var(Name),
// tag: 1 // tag: 1
Delay(Box<Term>), Delay(Box<Term>),
// tag: 2 // tag: 2
Lambda { Lambda {
parameter_name: String, parameter_name: Name,
body: Box<Term>, body: Box<Term>,
}, },
// tag: 3 // tag: 3
@ -47,3 +47,9 @@ pub enum Constant {
// tag: 5 // tag: 5
Bool(bool), Bool(bool),
} }
#[derive(Debug, Clone, PartialEq)]
pub struct Name {
pub text: String,
pub unique: isize,
}

View File

@ -7,7 +7,7 @@ use flat::{
}; };
use crate::{ use crate::{
ast::{Constant, Program, Term}, ast::{Constant, Name, Program, Term},
builtins::DefaultFunction, builtins::DefaultFunction,
}; };
@ -57,7 +57,6 @@ impl<'b> Decode<'b> for Program {
impl Encode for Term { impl Encode for Term {
fn encode(&self, e: &mut Encoder) -> Result<(), String> { fn encode(&self, e: &mut Encoder) -> Result<(), String> {
// still need annotation but here we have the term tags
match self { match self {
Term::Var(name) => { Term::Var(name) => {
encode_term_tag(0, e)?; encode_term_tag(0, e)?;
@ -68,12 +67,12 @@ impl Encode for Term {
term.encode(e)?; term.encode(e)?;
} }
Term::Lambda { Term::Lambda {
parameter_name: _, parameter_name,
body: _, body,
} => { } => {
encode_term_tag(2, e)?; encode_term_tag(2, e)?;
// need to create encoding for Binder parameter_name.encode(e)?;
todo!(); body.encode(e)?;
} }
Term::Apply { function, argument } => { Term::Apply { function, argument } => {
encode_term_tag(3, e)?; encode_term_tag(3, e)?;
@ -108,9 +107,12 @@ impl Encode for Term {
impl<'b> Decode<'b> for Term { impl<'b> Decode<'b> for Term {
fn decode(d: &mut Decoder) -> Result<Self, String> { fn decode(d: &mut Decoder) -> Result<Self, String> {
match decode_term_tag(d)? { match decode_term_tag(d)? {
0 => Ok(Term::Var(String::decode(d)?)), 0 => Ok(Term::Var(Name::decode(d)?)),
1 => Ok(Term::Delay(Box::new(Term::decode(d)?))), 1 => Ok(Term::Delay(Box::new(Term::decode(d)?))),
2 => todo!(), 2 => Ok(Term::Lambda {
parameter_name: Name::decode(d)?,
body: Box::new(Term::decode(d)?),
}),
3 => Ok(Term::Apply { 3 => Ok(Term::Apply {
function: Box::new(Term::decode(d)?), function: Box::new(Term::decode(d)?),
argument: Box::new(Term::decode(d)?), argument: Box::new(Term::decode(d)?),
@ -138,7 +140,7 @@ impl Encode for &Constant {
} }
Constant::String(s) => { Constant::String(s) => {
encode_constant(2, e)?; encode_constant(2, e)?;
s.as_bytes().encode(e)?; s.encode(e)?;
} }
// there is no char constant tag // there is no char constant tag
Constant::Char(c) => { Constant::Char(c) => {
@ -166,9 +168,7 @@ impl<'b> Decode<'b> for Constant {
match decode_constant(d)? { match decode_constant(d)? {
0 => Ok(Constant::Integer(isize::decode(d)?)), 0 => Ok(Constant::Integer(isize::decode(d)?)),
1 => Ok(Constant::ByteString(Vec::<u8>::decode(d)?)), 1 => Ok(Constant::ByteString(Vec::<u8>::decode(d)?)),
2 => Ok(Constant::String( 2 => Ok(Constant::String(String::decode(d)?)),
String::from_utf8(Vec::<u8>::decode(d)?).unwrap(),
)),
3 => Ok(Constant::Unit), 3 => Ok(Constant::Unit),
4 => Ok(Constant::Bool(bool::decode(d)?)), 4 => Ok(Constant::Bool(bool::decode(d)?)),
x => Err(format!("Unknown constant constructor tag: {}", x)), x => Err(format!("Unknown constant constructor tag: {}", x)),
@ -176,6 +176,24 @@ impl<'b> Decode<'b> for Constant {
} }
} }
impl Encode for Name {
fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> {
self.text.encode(e)?;
self.unique.encode(e)?;
Ok(())
}
}
impl<'b> Decode<'b> for Name {
fn decode(d: &mut Decoder) -> Result<Self, String> {
Ok(Name {
text: String::decode(d)?,
unique: isize::decode(d)?,
})
}
}
impl Encode for DefaultFunction { impl Encode for DefaultFunction {
fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> { fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> {
e.bits(BUILTIN_TAG_WIDTH as i64, self.clone() as u8); e.bits(BUILTIN_TAG_WIDTH as i64, self.clone() as u8);

View File

@ -1,4 +1,4 @@
use std::str::FromStr; use std::{collections::HashMap, str::FromStr};
use combine::{ use combine::{
attempt, between, choice, many1, attempt, between, choice, many1,
@ -9,12 +9,38 @@ use combine::{
}; };
use crate::{ use crate::{
ast::{Constant, Program, Term}, ast::{Constant, Name, Program, Term},
builtins::DefaultFunction, builtins::DefaultFunction,
}; };
struct ParserState {
identifiers: HashMap<String, isize>,
current_unique: isize,
}
impl ParserState {
fn new() -> Self {
ParserState {
identifiers: HashMap::new(),
current_unique: 0,
}
}
fn intern(&mut self, text: String) -> isize {
if let Some(u) = self.identifiers.get(&text) {
*u
} else {
let unique = self.current_unique;
self.identifiers.insert(text, unique);
self.current_unique += 1;
unique
}
}
}
pub fn program(src: &str) -> anyhow::Result<Program> { pub fn program(src: &str) -> anyhow::Result<Program> {
let mut parser = program_(); let mut state = ParserState::new();
let mut parser = program_(&mut state);
let result = parser.easy_parse(position::Stream::new(src.trim())); let result = parser.easy_parse(position::Stream::new(src.trim()));
@ -24,13 +50,13 @@ pub fn program(src: &str) -> anyhow::Result<Program> {
} }
} }
fn program_<Input>() -> impl Parser<Input, Output = Program> fn program_<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Program>
where where
Input: Stream<Token = char>, Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{ {
let prog = string("program").with(skip_many1(space())).with( let prog = string("program").with(skip_many1(space())).with(
(version(), skip_many1(space()), term().skip(spaces())) (version(), skip_many1(space()), term(state).skip(spaces()))
.map(|(version, _, term)| Program { version, term }), .map(|(version, _, term)| Program { version, term }),
); );
@ -60,31 +86,31 @@ where
) )
} }
fn term<Input>() -> impl Parser<Input, Output = Term> fn term<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where where
Input: Stream<Token = char>, Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{ {
choice(( choice((
attempt(delay()), attempt(delay(state)),
attempt(lambda()), attempt(lambda(state)),
attempt(apply()), attempt(apply(state)),
attempt(constant()), attempt(constant()),
attempt(force()), attempt(force(state)),
attempt(error()), attempt(error()),
attempt(builtin()), attempt(builtin()),
)) ))
} }
parser! { parser! {
fn term_[I]()(I) -> Term fn term_[I](state: &mut ParserState)(I) -> Term
where [I: Stream<Token = char>] where [I: Stream<Token = char>]
{ {
term() term(state)
} }
} }
fn delay<Input>() -> impl Parser<Input, Output = Term> fn delay<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where where
Input: Stream<Token = char>, Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -94,12 +120,12 @@ where
token(')'), token(')'),
string("delay") string("delay")
.with(skip_many1(space())) .with(skip_many1(space()))
.with(term_()) .with(term_(state))
.map(|term| Term::Delay(Box::new(term))), .map(|term| Term::Delay(Box::new(term))),
) )
} }
fn force<Input>() -> impl Parser<Input, Output = Term> fn force<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where where
Input: Stream<Token = char>, Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -109,12 +135,12 @@ where
token(')'), token(')'),
string("force") string("force")
.with(skip_many1(space())) .with(skip_many1(space()))
.with(term_()) .with(term_(state))
.map(|term| Term::Force(Box::new(term))), .map(|term| Term::Force(Box::new(term))),
) )
} }
fn lambda<Input>() -> impl Parser<Input, Output = Term> fn lambda<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where where
Input: Stream<Token = char>, Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -124,15 +150,18 @@ where
token(')'), token(')'),
string("lam") string("lam")
.with(skip_many1(space())) .with(skip_many1(space()))
.with((many1(alpha_num()), skip_many1(space()), term_())) .with((many1(alpha_num()), skip_many1(space()), term_(state)))
.map(|(parameter_name, _, term)| Term::Lambda { .map(|(parameter_name, _, term)| Term::Lambda {
parameter_name, parameter_name: Name {
text: parameter_name,
unique: state.intern(parameter_name),
},
body: Box::new(term), body: Box::new(term),
}), }),
) )
} }
fn apply<Input>() -> impl Parser<Input, Output = Term> fn apply<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where where
Input: Stream<Token = char>, Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -140,9 +169,11 @@ where
between( between(
token('['), token('['),
token(']'), token(']'),
(term_().skip(skip_many1(space())), term_()).map(|(function, argument)| Term::Apply { (term_(state).skip(skip_many1(space())), term_(state)).map(|(function, argument)| {
function: Box::new(function), Term::Apply {
argument: Box::new(argument), function: Box::new(function),
argument: Box::new(argument),
}
}), }),
) )
} }