feat: flat tweaks and string interning

Co-authored-by: rvcas <x@rvcas.dev>
This commit is contained in:
Kasey White 2022-05-31 22:57:52 -04:00
parent 31e7d63d9a
commit 581b8fc85d
7 changed files with 105 additions and 41 deletions

View File

@ -44,7 +44,7 @@ impl Decode<'_> for char {
impl Decode<'_> for String {
fn decode(d: &mut Decoder) -> Result<Self, String> {
d.string()
d.utf8()
}
}

View File

@ -51,6 +51,11 @@ impl<'b> Decoder<'b> {
Ok(s)
}
pub fn utf8(&mut self) -> Result<String, String> {
// TODO: Better Error Handling
Ok(String::from_utf8(Vec::<u8>::decode(self)?).unwrap())
}
pub fn filler(&mut self) -> Result<(), String> {
while self.zero()? {}
Ok(())

View File

@ -46,7 +46,7 @@ impl Encode for char {
impl Encode for &str {
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
e.string(*self)?;
e.utf8(self)?;
Ok(())
}
@ -54,7 +54,7 @@ impl Encode for &str {
impl Encode for String {
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
e.string(self)?;
e.utf8(self)?;
Ok(())
}

View File

@ -75,7 +75,7 @@ impl Encoder {
self.word(c as usize);
Ok(self)
}
// TODO: Do we need this?
pub fn string(&mut self, s: &str) -> Result<&mut Self, String> {
for i in s.chars() {
self.one();
@ -87,6 +87,10 @@ impl Encoder {
Ok(self)
}
pub fn utf8(&mut self, s: &str) -> Result<&mut Self, String> {
self.bytes(s.as_bytes())
}
fn zero(&mut self) {
if self.used_bits == 7 {
self.next_word();

View File

@ -9,12 +9,12 @@ pub struct Program {
#[derive(Debug, Clone, PartialEq)]
pub enum Term {
// tag: 0
Var(String),
Var(Name),
// tag: 1
Delay(Box<Term>),
// tag: 2
Lambda {
parameter_name: String,
parameter_name: Name,
body: Box<Term>,
},
// tag: 3
@ -47,3 +47,9 @@ pub enum Constant {
// tag: 5
Bool(bool),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Name {
pub text: String,
pub unique: isize,
}

View File

@ -7,7 +7,7 @@ use flat::{
};
use crate::{
ast::{Constant, Program, Term},
ast::{Constant, Name, Program, Term},
builtins::DefaultFunction,
};
@ -57,7 +57,6 @@ impl<'b> Decode<'b> for Program {
impl Encode for Term {
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
// still need annotation but here we have the term tags
match self {
Term::Var(name) => {
encode_term_tag(0, e)?;
@ -68,12 +67,12 @@ impl Encode for Term {
term.encode(e)?;
}
Term::Lambda {
parameter_name: _,
body: _,
parameter_name,
body,
} => {
encode_term_tag(2, e)?;
// need to create encoding for Binder
todo!();
parameter_name.encode(e)?;
body.encode(e)?;
}
Term::Apply { function, argument } => {
encode_term_tag(3, e)?;
@ -108,9 +107,12 @@ impl Encode for Term {
impl<'b> Decode<'b> for Term {
fn decode(d: &mut Decoder) -> Result<Self, String> {
match decode_term_tag(d)? {
0 => Ok(Term::Var(String::decode(d)?)),
0 => Ok(Term::Var(Name::decode(d)?)),
1 => Ok(Term::Delay(Box::new(Term::decode(d)?))),
2 => todo!(),
2 => Ok(Term::Lambda {
parameter_name: Name::decode(d)?,
body: Box::new(Term::decode(d)?),
}),
3 => Ok(Term::Apply {
function: Box::new(Term::decode(d)?),
argument: Box::new(Term::decode(d)?),
@ -138,7 +140,7 @@ impl Encode for &Constant {
}
Constant::String(s) => {
encode_constant(2, e)?;
s.as_bytes().encode(e)?;
s.encode(e)?;
}
// there is no char constant tag
Constant::Char(c) => {
@ -166,9 +168,7 @@ impl<'b> Decode<'b> for Constant {
match decode_constant(d)? {
0 => Ok(Constant::Integer(isize::decode(d)?)),
1 => Ok(Constant::ByteString(Vec::<u8>::decode(d)?)),
2 => Ok(Constant::String(
String::from_utf8(Vec::<u8>::decode(d)?).unwrap(),
)),
2 => Ok(Constant::String(String::decode(d)?)),
3 => Ok(Constant::Unit),
4 => Ok(Constant::Bool(bool::decode(d)?)),
x => Err(format!("Unknown constant constructor tag: {}", x)),
@ -176,6 +176,24 @@ impl<'b> Decode<'b> for Constant {
}
}
impl Encode for Name {
fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> {
self.text.encode(e)?;
self.unique.encode(e)?;
Ok(())
}
}
impl<'b> Decode<'b> for Name {
fn decode(d: &mut Decoder) -> Result<Self, String> {
Ok(Name {
text: String::decode(d)?,
unique: isize::decode(d)?,
})
}
}
impl Encode for DefaultFunction {
fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> {
e.bits(BUILTIN_TAG_WIDTH as i64, self.clone() as u8);

View File

@ -1,4 +1,4 @@
use std::str::FromStr;
use std::{collections::HashMap, str::FromStr};
use combine::{
attempt, between, choice, many1,
@ -9,12 +9,38 @@ use combine::{
};
use crate::{
ast::{Constant, Program, Term},
ast::{Constant, Name, Program, Term},
builtins::DefaultFunction,
};
struct ParserState {
identifiers: HashMap<String, isize>,
current_unique: isize,
}
impl ParserState {
fn new() -> Self {
ParserState {
identifiers: HashMap::new(),
current_unique: 0,
}
}
fn intern(&mut self, text: String) -> isize {
if let Some(u) = self.identifiers.get(&text) {
*u
} else {
let unique = self.current_unique;
self.identifiers.insert(text, unique);
self.current_unique += 1;
unique
}
}
}
pub fn program(src: &str) -> anyhow::Result<Program> {
let mut parser = program_();
let mut state = ParserState::new();
let mut parser = program_(&mut state);
let result = parser.easy_parse(position::Stream::new(src.trim()));
@ -24,13 +50,13 @@ pub fn program(src: &str) -> anyhow::Result<Program> {
}
}
fn program_<Input>() -> impl Parser<Input, Output = Program>
fn program_<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Program>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
let prog = string("program").with(skip_many1(space())).with(
(version(), skip_many1(space()), term().skip(spaces()))
(version(), skip_many1(space()), term(state).skip(spaces()))
.map(|(version, _, term)| Program { version, term }),
);
@ -60,31 +86,31 @@ where
)
}
fn term<Input>() -> impl Parser<Input, Output = Term>
fn term<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
{
choice((
attempt(delay()),
attempt(lambda()),
attempt(apply()),
attempt(delay(state)),
attempt(lambda(state)),
attempt(apply(state)),
attempt(constant()),
attempt(force()),
attempt(force(state)),
attempt(error()),
attempt(builtin()),
))
}
parser! {
fn term_[I]()(I) -> Term
fn term_[I](state: &mut ParserState)(I) -> Term
where [I: Stream<Token = char>]
{
term()
term(state)
}
}
fn delay<Input>() -> impl Parser<Input, Output = Term>
fn delay<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -94,12 +120,12 @@ where
token(')'),
string("delay")
.with(skip_many1(space()))
.with(term_())
.with(term_(state))
.map(|term| Term::Delay(Box::new(term))),
)
}
fn force<Input>() -> impl Parser<Input, Output = Term>
fn force<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -109,12 +135,12 @@ where
token(')'),
string("force")
.with(skip_many1(space()))
.with(term_())
.with(term_(state))
.map(|term| Term::Force(Box::new(term))),
)
}
fn lambda<Input>() -> impl Parser<Input, Output = Term>
fn lambda<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -124,15 +150,18 @@ where
token(')'),
string("lam")
.with(skip_many1(space()))
.with((many1(alpha_num()), skip_many1(space()), term_()))
.with((many1(alpha_num()), skip_many1(space()), term_(state)))
.map(|(parameter_name, _, term)| Term::Lambda {
parameter_name,
parameter_name: Name {
text: parameter_name,
unique: state.intern(parameter_name),
},
body: Box::new(term),
}),
)
}
fn apply<Input>() -> impl Parser<Input, Output = Term>
fn apply<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
where
Input: Stream<Token = char>,
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
@ -140,9 +169,11 @@ where
between(
token('['),
token(']'),
(term_().skip(skip_many1(space())), term_()).map(|(function, argument)| Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
(term_(state).skip(skip_many1(space())), term_(state)).map(|(function, argument)| {
Term::Apply {
function: Box::new(function),
argument: Box::new(argument),
}
}),
)
}