feat: flat tweaks and string interning
Co-authored-by: rvcas <x@rvcas.dev>
This commit is contained in:
parent
31e7d63d9a
commit
581b8fc85d
|
@ -44,7 +44,7 @@ impl Decode<'_> for char {
|
||||||
|
|
||||||
impl Decode<'_> for String {
|
impl Decode<'_> for String {
|
||||||
fn decode(d: &mut Decoder) -> Result<Self, String> {
|
fn decode(d: &mut Decoder) -> Result<Self, String> {
|
||||||
d.string()
|
d.utf8()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,11 @@ impl<'b> Decoder<'b> {
|
||||||
Ok(s)
|
Ok(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn utf8(&mut self) -> Result<String, String> {
|
||||||
|
// TODO: Better Error Handling
|
||||||
|
Ok(String::from_utf8(Vec::<u8>::decode(self)?).unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn filler(&mut self) -> Result<(), String> {
|
pub fn filler(&mut self) -> Result<(), String> {
|
||||||
while self.zero()? {}
|
while self.zero()? {}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -46,7 +46,7 @@ impl Encode for char {
|
||||||
|
|
||||||
impl Encode for &str {
|
impl Encode for &str {
|
||||||
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
|
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
|
||||||
e.string(*self)?;
|
e.utf8(self)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,7 @@ impl Encode for &str {
|
||||||
|
|
||||||
impl Encode for String {
|
impl Encode for String {
|
||||||
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
|
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
|
||||||
e.string(self)?;
|
e.utf8(self)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ impl Encoder {
|
||||||
self.word(c as usize);
|
self.word(c as usize);
|
||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
// TODO: Do we need this?
|
||||||
pub fn string(&mut self, s: &str) -> Result<&mut Self, String> {
|
pub fn string(&mut self, s: &str) -> Result<&mut Self, String> {
|
||||||
for i in s.chars() {
|
for i in s.chars() {
|
||||||
self.one();
|
self.one();
|
||||||
|
@ -87,6 +87,10 @@ impl Encoder {
|
||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn utf8(&mut self, s: &str) -> Result<&mut Self, String> {
|
||||||
|
self.bytes(s.as_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
fn zero(&mut self) {
|
fn zero(&mut self) {
|
||||||
if self.used_bits == 7 {
|
if self.used_bits == 7 {
|
||||||
self.next_word();
|
self.next_word();
|
||||||
|
|
|
@ -9,12 +9,12 @@ pub struct Program {
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum Term {
|
pub enum Term {
|
||||||
// tag: 0
|
// tag: 0
|
||||||
Var(String),
|
Var(Name),
|
||||||
// tag: 1
|
// tag: 1
|
||||||
Delay(Box<Term>),
|
Delay(Box<Term>),
|
||||||
// tag: 2
|
// tag: 2
|
||||||
Lambda {
|
Lambda {
|
||||||
parameter_name: String,
|
parameter_name: Name,
|
||||||
body: Box<Term>,
|
body: Box<Term>,
|
||||||
},
|
},
|
||||||
// tag: 3
|
// tag: 3
|
||||||
|
@ -47,3 +47,9 @@ pub enum Constant {
|
||||||
// tag: 5
|
// tag: 5
|
||||||
Bool(bool),
|
Bool(bool),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Name {
|
||||||
|
pub text: String,
|
||||||
|
pub unique: isize,
|
||||||
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@ use flat::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{Constant, Program, Term},
|
ast::{Constant, Name, Program, Term},
|
||||||
builtins::DefaultFunction,
|
builtins::DefaultFunction,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -57,7 +57,6 @@ impl<'b> Decode<'b> for Program {
|
||||||
|
|
||||||
impl Encode for Term {
|
impl Encode for Term {
|
||||||
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
|
fn encode(&self, e: &mut Encoder) -> Result<(), String> {
|
||||||
// still need annotation but here we have the term tags
|
|
||||||
match self {
|
match self {
|
||||||
Term::Var(name) => {
|
Term::Var(name) => {
|
||||||
encode_term_tag(0, e)?;
|
encode_term_tag(0, e)?;
|
||||||
|
@ -68,12 +67,12 @@ impl Encode for Term {
|
||||||
term.encode(e)?;
|
term.encode(e)?;
|
||||||
}
|
}
|
||||||
Term::Lambda {
|
Term::Lambda {
|
||||||
parameter_name: _,
|
parameter_name,
|
||||||
body: _,
|
body,
|
||||||
} => {
|
} => {
|
||||||
encode_term_tag(2, e)?;
|
encode_term_tag(2, e)?;
|
||||||
// need to create encoding for Binder
|
parameter_name.encode(e)?;
|
||||||
todo!();
|
body.encode(e)?;
|
||||||
}
|
}
|
||||||
Term::Apply { function, argument } => {
|
Term::Apply { function, argument } => {
|
||||||
encode_term_tag(3, e)?;
|
encode_term_tag(3, e)?;
|
||||||
|
@ -108,9 +107,12 @@ impl Encode for Term {
|
||||||
impl<'b> Decode<'b> for Term {
|
impl<'b> Decode<'b> for Term {
|
||||||
fn decode(d: &mut Decoder) -> Result<Self, String> {
|
fn decode(d: &mut Decoder) -> Result<Self, String> {
|
||||||
match decode_term_tag(d)? {
|
match decode_term_tag(d)? {
|
||||||
0 => Ok(Term::Var(String::decode(d)?)),
|
0 => Ok(Term::Var(Name::decode(d)?)),
|
||||||
1 => Ok(Term::Delay(Box::new(Term::decode(d)?))),
|
1 => Ok(Term::Delay(Box::new(Term::decode(d)?))),
|
||||||
2 => todo!(),
|
2 => Ok(Term::Lambda {
|
||||||
|
parameter_name: Name::decode(d)?,
|
||||||
|
body: Box::new(Term::decode(d)?),
|
||||||
|
}),
|
||||||
3 => Ok(Term::Apply {
|
3 => Ok(Term::Apply {
|
||||||
function: Box::new(Term::decode(d)?),
|
function: Box::new(Term::decode(d)?),
|
||||||
argument: Box::new(Term::decode(d)?),
|
argument: Box::new(Term::decode(d)?),
|
||||||
|
@ -138,7 +140,7 @@ impl Encode for &Constant {
|
||||||
}
|
}
|
||||||
Constant::String(s) => {
|
Constant::String(s) => {
|
||||||
encode_constant(2, e)?;
|
encode_constant(2, e)?;
|
||||||
s.as_bytes().encode(e)?;
|
s.encode(e)?;
|
||||||
}
|
}
|
||||||
// there is no char constant tag
|
// there is no char constant tag
|
||||||
Constant::Char(c) => {
|
Constant::Char(c) => {
|
||||||
|
@ -166,9 +168,7 @@ impl<'b> Decode<'b> for Constant {
|
||||||
match decode_constant(d)? {
|
match decode_constant(d)? {
|
||||||
0 => Ok(Constant::Integer(isize::decode(d)?)),
|
0 => Ok(Constant::Integer(isize::decode(d)?)),
|
||||||
1 => Ok(Constant::ByteString(Vec::<u8>::decode(d)?)),
|
1 => Ok(Constant::ByteString(Vec::<u8>::decode(d)?)),
|
||||||
2 => Ok(Constant::String(
|
2 => Ok(Constant::String(String::decode(d)?)),
|
||||||
String::from_utf8(Vec::<u8>::decode(d)?).unwrap(),
|
|
||||||
)),
|
|
||||||
3 => Ok(Constant::Unit),
|
3 => Ok(Constant::Unit),
|
||||||
4 => Ok(Constant::Bool(bool::decode(d)?)),
|
4 => Ok(Constant::Bool(bool::decode(d)?)),
|
||||||
x => Err(format!("Unknown constant constructor tag: {}", x)),
|
x => Err(format!("Unknown constant constructor tag: {}", x)),
|
||||||
|
@ -176,6 +176,24 @@ impl<'b> Decode<'b> for Constant {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Encode for Name {
|
||||||
|
fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> {
|
||||||
|
self.text.encode(e)?;
|
||||||
|
self.unique.encode(e)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'b> Decode<'b> for Name {
|
||||||
|
fn decode(d: &mut Decoder) -> Result<Self, String> {
|
||||||
|
Ok(Name {
|
||||||
|
text: String::decode(d)?,
|
||||||
|
unique: isize::decode(d)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Encode for DefaultFunction {
|
impl Encode for DefaultFunction {
|
||||||
fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> {
|
fn encode(&self, e: &mut flat::en::Encoder) -> Result<(), String> {
|
||||||
e.bits(BUILTIN_TAG_WIDTH as i64, self.clone() as u8);
|
e.bits(BUILTIN_TAG_WIDTH as i64, self.clone() as u8);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use std::str::FromStr;
|
use std::{collections::HashMap, str::FromStr};
|
||||||
|
|
||||||
use combine::{
|
use combine::{
|
||||||
attempt, between, choice, many1,
|
attempt, between, choice, many1,
|
||||||
|
@ -9,12 +9,38 @@ use combine::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{Constant, Program, Term},
|
ast::{Constant, Name, Program, Term},
|
||||||
builtins::DefaultFunction,
|
builtins::DefaultFunction,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ParserState {
|
||||||
|
identifiers: HashMap<String, isize>,
|
||||||
|
current_unique: isize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParserState {
|
||||||
|
fn new() -> Self {
|
||||||
|
ParserState {
|
||||||
|
identifiers: HashMap::new(),
|
||||||
|
current_unique: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn intern(&mut self, text: String) -> isize {
|
||||||
|
if let Some(u) = self.identifiers.get(&text) {
|
||||||
|
*u
|
||||||
|
} else {
|
||||||
|
let unique = self.current_unique;
|
||||||
|
self.identifiers.insert(text, unique);
|
||||||
|
self.current_unique += 1;
|
||||||
|
unique
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn program(src: &str) -> anyhow::Result<Program> {
|
pub fn program(src: &str) -> anyhow::Result<Program> {
|
||||||
let mut parser = program_();
|
let mut state = ParserState::new();
|
||||||
|
let mut parser = program_(&mut state);
|
||||||
|
|
||||||
let result = parser.easy_parse(position::Stream::new(src.trim()));
|
let result = parser.easy_parse(position::Stream::new(src.trim()));
|
||||||
|
|
||||||
|
@ -24,13 +50,13 @@ pub fn program(src: &str) -> anyhow::Result<Program> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn program_<Input>() -> impl Parser<Input, Output = Program>
|
fn program_<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Program>
|
||||||
where
|
where
|
||||||
Input: Stream<Token = char>,
|
Input: Stream<Token = char>,
|
||||||
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
||||||
{
|
{
|
||||||
let prog = string("program").with(skip_many1(space())).with(
|
let prog = string("program").with(skip_many1(space())).with(
|
||||||
(version(), skip_many1(space()), term().skip(spaces()))
|
(version(), skip_many1(space()), term(state).skip(spaces()))
|
||||||
.map(|(version, _, term)| Program { version, term }),
|
.map(|(version, _, term)| Program { version, term }),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -60,31 +86,31 @@ where
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn term<Input>() -> impl Parser<Input, Output = Term>
|
fn term<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
|
||||||
where
|
where
|
||||||
Input: Stream<Token = char>,
|
Input: Stream<Token = char>,
|
||||||
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
||||||
{
|
{
|
||||||
choice((
|
choice((
|
||||||
attempt(delay()),
|
attempt(delay(state)),
|
||||||
attempt(lambda()),
|
attempt(lambda(state)),
|
||||||
attempt(apply()),
|
attempt(apply(state)),
|
||||||
attempt(constant()),
|
attempt(constant()),
|
||||||
attempt(force()),
|
attempt(force(state)),
|
||||||
attempt(error()),
|
attempt(error()),
|
||||||
attempt(builtin()),
|
attempt(builtin()),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
parser! {
|
parser! {
|
||||||
fn term_[I]()(I) -> Term
|
fn term_[I](state: &mut ParserState)(I) -> Term
|
||||||
where [I: Stream<Token = char>]
|
where [I: Stream<Token = char>]
|
||||||
{
|
{
|
||||||
term()
|
term(state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delay<Input>() -> impl Parser<Input, Output = Term>
|
fn delay<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
|
||||||
where
|
where
|
||||||
Input: Stream<Token = char>,
|
Input: Stream<Token = char>,
|
||||||
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
||||||
|
@ -94,12 +120,12 @@ where
|
||||||
token(')'),
|
token(')'),
|
||||||
string("delay")
|
string("delay")
|
||||||
.with(skip_many1(space()))
|
.with(skip_many1(space()))
|
||||||
.with(term_())
|
.with(term_(state))
|
||||||
.map(|term| Term::Delay(Box::new(term))),
|
.map(|term| Term::Delay(Box::new(term))),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn force<Input>() -> impl Parser<Input, Output = Term>
|
fn force<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
|
||||||
where
|
where
|
||||||
Input: Stream<Token = char>,
|
Input: Stream<Token = char>,
|
||||||
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
||||||
|
@ -109,12 +135,12 @@ where
|
||||||
token(')'),
|
token(')'),
|
||||||
string("force")
|
string("force")
|
||||||
.with(skip_many1(space()))
|
.with(skip_many1(space()))
|
||||||
.with(term_())
|
.with(term_(state))
|
||||||
.map(|term| Term::Force(Box::new(term))),
|
.map(|term| Term::Force(Box::new(term))),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lambda<Input>() -> impl Parser<Input, Output = Term>
|
fn lambda<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
|
||||||
where
|
where
|
||||||
Input: Stream<Token = char>,
|
Input: Stream<Token = char>,
|
||||||
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
||||||
|
@ -124,15 +150,18 @@ where
|
||||||
token(')'),
|
token(')'),
|
||||||
string("lam")
|
string("lam")
|
||||||
.with(skip_many1(space()))
|
.with(skip_many1(space()))
|
||||||
.with((many1(alpha_num()), skip_many1(space()), term_()))
|
.with((many1(alpha_num()), skip_many1(space()), term_(state)))
|
||||||
.map(|(parameter_name, _, term)| Term::Lambda {
|
.map(|(parameter_name, _, term)| Term::Lambda {
|
||||||
parameter_name,
|
parameter_name: Name {
|
||||||
|
text: parameter_name,
|
||||||
|
unique: state.intern(parameter_name),
|
||||||
|
},
|
||||||
body: Box::new(term),
|
body: Box::new(term),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply<Input>() -> impl Parser<Input, Output = Term>
|
fn apply<Input>(state: &mut ParserState) -> impl Parser<Input, Output = Term>
|
||||||
where
|
where
|
||||||
Input: Stream<Token = char>,
|
Input: Stream<Token = char>,
|
||||||
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,
|
||||||
|
@ -140,9 +169,11 @@ where
|
||||||
between(
|
between(
|
||||||
token('['),
|
token('['),
|
||||||
token(']'),
|
token(']'),
|
||||||
(term_().skip(skip_many1(space())), term_()).map(|(function, argument)| Term::Apply {
|
(term_(state).skip(skip_many1(space())), term_(state)).map(|(function, argument)| {
|
||||||
function: Box::new(function),
|
Term::Apply {
|
||||||
argument: Box::new(argument),
|
function: Box::new(function),
|
||||||
|
argument: Box::new(argument),
|
||||||
|
}
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue