From 1d6809661c1a81efb66239f073a85d4c058a641d Mon Sep 17 00:00:00 2001 From: rvcas Date: Fri, 12 Aug 2022 19:44:34 -0400 Subject: [PATCH] feat: lexer --- Cargo.lock | 7 + crates/cli/src/args.rs | 47 +++- crates/cli/src/main.rs | 28 +++ crates/lang/Cargo.toml | 1 + crates/lang/src/ast.rs | 492 ++++++++++++++++++++++++++++++++++++-- crates/lang/src/build.rs | 4 + crates/lang/src/error.rs | 109 +++++++++ crates/lang/src/expr.rs | 269 +++++++++++++++++++++ crates/lang/src/lexer.rs | 163 +++++++++++++ crates/lang/src/lib.rs | 16 +- crates/lang/src/parser.rs | 7 + crates/lang/src/tipo.rs | 159 ++++++++++++ crates/lang/src/token.rs | 149 ++++++++++++ 13 files changed, 1414 insertions(+), 37 deletions(-) create mode 100644 crates/lang/src/build.rs create mode 100644 crates/lang/src/error.rs create mode 100644 crates/lang/src/expr.rs create mode 100644 crates/lang/src/lexer.rs create mode 100644 crates/lang/src/parser.rs create mode 100644 crates/lang/src/tipo.rs create mode 100644 crates/lang/src/token.rs diff --git a/Cargo.lock b/Cargo.lock index 43f9a7f0..dd4f828a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,6 +59,7 @@ dependencies = [ "chumsky", "internment", "miette", + "vec1", ] [[package]] @@ -885,6 +886,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "vec1" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc1631c774f0f9570797191e01247cbefde789eebfbf128074cb934115a6133" + [[package]] name = "version_check" version = "0.9.4" diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index fcbe306c..8e60cbf7 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -7,6 +7,15 @@ use clap::{Parser, Subcommand}; #[clap(version, about, long_about = None)] #[clap(propagate_version = true)] pub enum Args { + /// Build an aiken project + Build, + /// Start a development server + Dev, + /// Create a new aiken project + New { + /// Project name + name: PathBuf, + }, /// A subcommand for working with Untyped Plutus Core #[clap(subcommand)] Uplc(UplcCommand), @@ -15,33 +24,49 @@ pub enum Args { /// Commands for working with Untyped Plutus Core #[derive(Subcommand)] pub enum UplcCommand { + /// Evaluate an Untyped Plutus Core program + Eval { + /// Handle input as flat bytes + #[clap(short, long)] + flat: bool, + + /// File to load and evaluate + input: PathBuf, + }, /// Encode textual Untyped Plutus Core to flat bytes Flat { + /// Textual Untyped Plutus Core file input: PathBuf, - #[clap(short, long)] - print: bool, + + /// Output file name #[clap(short, long)] out: Option, - }, - /// Decode flat bytes to textual Untyped Plutus Core - Unflat { - input: PathBuf, + + /// Print output instead of saving to file #[clap(short, long)] print: bool, - #[clap(short, long)] - out: Option, }, /// Format an Untyped Plutus Core program Fmt { + /// Textual Untyped Plutus Core file input: PathBuf, + + /// Print output instead of saving to file #[clap(short, long)] print: bool, }, - /// Evaluate an Untyped Plutus Core program - Eval { + /// Decode flat bytes to textual Untyped Plutus Core + Unflat { + /// Flat encoded Untyped Plutus Core file input: PathBuf, + + /// Output file name #[clap(short, long)] - flat: bool, + out: Option, + + /// Print output instead of saving to file + #[clap(short, long)] + print: bool, }, } diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 4bc26704..bd973694 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -14,6 +14,31 @@ fn main() -> anyhow::Result<()> { let args = Args::default(); match args { + Args::Build => { + // 1. load and parse modules + // * lib - contains modules, types, and functions + // * contracts - contains validators + // * scripts - contains native scripts dsl + // 2. type check everything + // 3. generate uplc and policy/address if relevant + todo!() + } + + Args::Dev => { + // launch a development server + // this should allow people to test + // their contracts over http + todo!() + } + + Args::New { name } => { + if !name.exists() { + fs::create_dir_all(name.join("lib"))?; + fs::create_dir_all(name.join("policies"))?; + fs::create_dir_all(name.join("scripts"))?; + } + } + Args::Uplc(uplc) => match uplc { UplcCommand::Flat { input, print, out } => { let code = std::fs::read_to_string(&input)?; @@ -48,6 +73,7 @@ fn main() -> anyhow::Result<()> { fs::write(&out_name, &bytes)?; } } + UplcCommand::Fmt { input, print } => { let code = std::fs::read_to_string(&input)?; @@ -61,6 +87,7 @@ fn main() -> anyhow::Result<()> { fs::write(&input, pretty)?; } } + UplcCommand::Unflat { input, print, out } => { let bytes = std::fs::read(&input)?; @@ -82,6 +109,7 @@ fn main() -> anyhow::Result<()> { fs::write(&out_name, pretty)?; } } + UplcCommand::Eval { input, flat } => { let program = if flat { let bytes = std::fs::read(&input)?; diff --git a/crates/lang/Cargo.toml b/crates/lang/Cargo.toml index d2f2d709..d04d8465 100644 --- a/crates/lang/Cargo.toml +++ b/crates/lang/Cargo.toml @@ -9,3 +9,4 @@ edition = "2021" chumsky = "0.8.0" internment = "0.7.0" miette = "5.2.0" +vec1 = "1.8.0" diff --git a/crates/lang/src/ast.rs b/crates/lang/src/ast.rs index c8f8c0e4..d4eae330 100644 --- a/crates/lang/src/ast.rs +++ b/crates/lang/src/ast.rs @@ -1,47 +1,505 @@ -pub struct Module { - pub name: Vec, - pub docs: Vec, - pub is_script: bool, - pub is_lib: bool, - pub is_policy: bool, +use std::{collections::HashMap, fmt, ops::Range, sync::Arc}; + +use internment::Intern; + +use crate::{ + expr::{TypedExpr, UntypedExpr}, + tipo::{self, PatternConstructor, Type, ValueConstructor}, +}; + +pub type TypedModule = Module; +pub type UntypedModule = Module<(), UntypedDefinition>; + +pub enum ModuleKind { + Contract, + Lib, + Script, } -pub enum Definition { +pub struct Module { + pub name: Vec, + pub docs: Vec, + pub type_info: Info, + pub definitons: Vec, + pub kind: ModuleKind, +} + +pub type TypedDefinition = Definition, TypedExpr, String, String>; +pub type UntypedDefinition = Definition<(), UntypedExpr, (), ()>; + +pub enum Definition { Fn { - arguments: Vec, + location: Span, + arguments: Vec>>, body: Expr, doc: Option, name: String, public: bool, - return_annotation: Option<()>, - return_type: (), + return_annotation: Option, + return_type: T, }, TypeAlias { + location: Span, alias: String, - annotation: (), + annotation: Annotation, doc: Option, parameters: Vec, public: bool, - tipo: (), + tipo: T, }, DataType { - constructors: Vec<()>, + location: Span, + constructors: Vec>, doc: Option, name: String, opaque: bool, parameters: Vec, public: bool, - typed_parameters: Vec<()>, + typed_parameters: Vec, }, Use { module: Vec, as_name: Option, - // unqualified: Vec, - // package: PackageName, + unqualified: Vec, + package: PackageName, + }, + + ModuleConstant { + doc: Option, + location: Span, + public: bool, + name: String, + annotation: Option, + value: Box>, + tipo: T, }, } -pub enum Expr {} +pub type TypedConstant = Constant, String>; +pub type UntypedConstant = Constant<(), ()>; + +pub enum Constant { + Int { + location: Span, + value: String, + }, + + String { + location: Span, + value: String, + }, + + Pair { + location: Span, + elements: Vec, + }, + + List { + location: Span, + elements: Vec, + tipo: T, + }, + + Record { + location: Span, + module: Option, + name: String, + args: Vec>, + tag: RecordTag, + tipo: T, + field_map: Option, + }, + + ByteString { + location: Span, + // segments: Vec>, + }, + + Var { + location: Span, + module: Option, + name: String, + constructor: Option>, + tipo: T, + }, +} + +pub struct CallArg { + pub label: Option, + pub location: Span, + pub value: A, +} + +pub struct FieldMap { + pub arity: usize, + pub fields: HashMap, +} + +pub struct RecordConstructor { + pub location: Span, + pub name: String, + pub arguments: Vec>, + pub documentation: Option, +} + +pub struct RecordConstructorArg { + pub label: Option, + // ast + pub annotation: Annotation, + pub location: Span, + pub tipo: T, + pub doc: Option, +} + +pub struct Arg { + pub names: ArgName, + pub location: Span, + pub annotation: Option, + pub tipo: T, +} + +pub enum ArgName { + Discard { name: String }, + LabeledDiscard { label: String, name: String }, + Named { name: String }, + NamedLabeled { name: String, label: String }, +} + +pub struct UnqualifiedImport { + pub location: Span, + pub name: String, + pub as_name: Option, + pub layer: Layer, +} + +// TypeAst +pub enum Annotation { + Constructor { + location: Span, + module: Option, + name: String, + arguments: Vec, + }, + + Fn { + location: Span, + arguments: Vec, + ret: Box, + }, + + Var { + location: Span, + name: String, + }, + + Tuple { + location: Span, + elems: Vec, + }, + + Hole { + location: Span, + name: String, + }, +} + +pub enum Layer { + Value, + Type, +} + +impl Default for Layer { + fn default() -> Self { + Layer::Value + } +} + +pub enum BinOp { + // Boolean logic + And, + Or, + + // Equality + Eq, + NotEq, + + // Order comparison + LtInt, + LtEqInt, + GtEqInt, + GtInt, + + // Maths + AddInt, + SubInt, + MultInt, + DivInt, + ModInt, +} + +pub enum Pattern { + Int { + location: Span, + value: String, + }, + + Float { + location: Span, + value: String, + }, + + String { + location: Span, + value: String, + }, + + /// The creation of a variable. + /// e.g. `assert [this_is_a_var, .._] = x` + Var { + location: Span, + name: String, + }, + + /// A reference to a variable in a bit string. This is always a variable + /// being used rather than a new variable being assigned. + VarUsage { + location: Span, + name: String, + tipo: Type, + }, + + /// A name given to a sub-pattern using the `as` keyword. + /// e.g. `assert #(1, [_, _] as the_list) = x` + Assign { + name: String, + location: Span, + pattern: Box, + }, + + /// A pattern that binds to any value but does not assign a variable. + /// Always starts with an underscore. + Discard { + name: String, + location: Span, + }, + + List { + location: Span, + elements: Vec, + tail: Option>, + }, + + /// The constructor for a custom type. Starts with an uppercase letter. + Constructor { + location: Span, + name: String, + arguments: Vec>, + module: Option, + constructor: Constructor, + with_spread: bool, + tipo: Type, + }, + + Tuple { + location: Span, + elems: Vec, + }, +} + +pub enum AssignmentKind { + Let, + Assert, +} + +pub type MultiPattern = Vec>; + +pub type UntypedMultiPattern = MultiPattern<(), ()>; +pub type TypedMultiPattern = MultiPattern>; + +pub type TypedClause = Clause, String>; + +pub type UntypedClause = Clause; + +pub struct Clause { + pub location: Span, + pub pattern: MultiPattern, + pub alternative_patterns: Vec>, + pub guard: Option>, + pub then: Expr, +} + +pub enum ClauseGuard { + Equals { + location: Span, + left: Box, + right: Box, + }, + + NotEquals { + location: Span, + left: Box, + right: Box, + }, + + GtInt { + location: Span, + left: Box, + right: Box, + }, + + GtEqInt { + location: Span, + left: Box, + right: Box, + }, + + LtInt { + location: Span, + left: Box, + right: Box, + }, + + LtEqInt { + location: Span, + left: Box, + right: Box, + }, + + Or { + location: Span, + left: Box, + right: Box, + }, + + And { + location: Span, + left: Box, + right: Box, + }, + + Var { + location: Span, + tipo: Type, + name: String, + }, + + TupleIndex { + location: Span, + index: u64, + tipo: Type, + tuple: Box, + }, + + Constant(Constant), +} + +pub struct TypedRecordUpdateArg { + pub label: String, + pub location: Span, + pub value: TypedExpr, + pub index: usize, +} + +pub struct UntypedRecordUpdateArg { + pub label: String, + // pub location: SrcSpan, + pub value: UntypedExpr, +} + +pub struct RecordUpdateSpread { + pub base: Box, + pub location: Span, +} + +pub enum TodoKind { + Keyword, + EmptyFunction, +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct SrcId(Intern>); + +impl SrcId { + #[cfg(test)] + pub fn empty() -> Self { + SrcId(Intern::new(Vec::new())) + } +} + +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct Span { + pub src: SrcId, + pub start: usize, + pub end: usize, +} + +impl Span { + #[cfg(test)] + pub fn empty() -> Self { + use chumsky::Span; + + Self::new(SrcId::empty(), 0..0) + } + + pub fn src(&self) -> SrcId { + self.src + } + + pub fn range(&self) -> Range { + use chumsky::Span; + + self.start()..self.end() + } + + pub fn union(self, other: Self) -> Self { + use chumsky::Span; + + assert_eq!( + self.src, other.src, + "attempted to union spans with different sources" + ); + + Self { + start: self.start().min(other.start()), + end: self.end().max(other.end()), + ..self + } + } +} + +impl fmt::Debug for Span { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}:{:?}", self.src, self.range()) + } +} + +impl chumsky::Span for Span { + type Context = SrcId; + + type Offset = usize; + + fn new(context: Self::Context, range: Range) -> Self { + assert!(range.start <= range.end); + + Self { + src: context, + start: range.start, + end: range.end, + } + } + + fn context(&self) -> Self::Context { + self.src + } + + fn start(&self) -> Self::Offset { + self.start + } + + fn end(&self) -> Self::Offset { + self.end + } +} diff --git a/crates/lang/src/build.rs b/crates/lang/src/build.rs new file mode 100644 index 00000000..9070f46a --- /dev/null +++ b/crates/lang/src/build.rs @@ -0,0 +1,4 @@ +pub enum Origin { + Src, + Test, +} diff --git a/crates/lang/src/error.rs b/crates/lang/src/error.rs new file mode 100644 index 00000000..3c441615 --- /dev/null +++ b/crates/lang/src/error.rs @@ -0,0 +1,109 @@ +use std::{collections::HashSet, fmt}; + +use crate::{ast::Span, token::Token}; + +#[derive(Debug)] +pub struct ParseError { + kind: ErrorKind, + span: Span, + while_parsing: Option<(Span, &'static str)>, + expected: HashSet, + label: Option<&'static str>, +} + +impl ParseError { + pub fn merge(mut self, other: Self) -> Self { + // TODO: Use HashSet + for expected in other.expected.into_iter() { + self.expected.insert(expected); + } + self + } +} + +impl PartialEq for ParseError { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.span == other.span && self.label == other.label + } +} + +impl> chumsky::Error for ParseError { + type Span = Span; + + type Label = &'static str; + + fn expected_input_found>>( + span: Self::Span, + expected: Iter, + found: Option, + ) -> Self { + Self { + kind: found + .map(Into::into) + .map(ErrorKind::Unexpected) + .unwrap_or(ErrorKind::UnexpectedEnd), + span, + while_parsing: None, + expected: expected + .into_iter() + .map(|x| x.map(Into::into).unwrap_or(Pattern::End)) + .collect(), + label: None, + } + } + + fn with_label(mut self, label: Self::Label) -> Self { + self.label.get_or_insert(label); + self + } + + fn merge(self, other: Self) -> Self { + ParseError::merge(self, other) + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum ErrorKind { + UnexpectedEnd, + Unexpected(Pattern), + Unclosed { + start: Pattern, + before_span: Span, + before: Option, + }, + NoEndBranch, +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum Pattern { + Char(char), + Token(Token), + Literal, + TypeIdent, + TermIdent, + End, +} + +impl From for Pattern { + fn from(c: char) -> Self { + Self::Char(c) + } +} +impl From for Pattern { + fn from(tok: Token) -> Self { + Self::Token(tok) + } +} + +impl fmt::Display for Pattern { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Pattern::Token(token) => write!(f, "{}", token), + Pattern::Char(c) => write!(f, "{:?}", c), + Pattern::Literal => write!(f, "literal"), + Pattern::TypeIdent => write!(f, "type name"), + Pattern::TermIdent => write!(f, "identifier"), + Pattern::End => write!(f, "end of input"), + } + } +} diff --git a/crates/lang/src/expr.rs b/crates/lang/src/expr.rs new file mode 100644 index 00000000..e3fd064e --- /dev/null +++ b/crates/lang/src/expr.rs @@ -0,0 +1,269 @@ +use std::sync::Arc; + +use vec1::Vec1; + +use crate::{ + ast::{ + Annotation, Arg, AssignmentKind, BinOp, CallArg, Clause, Pattern, RecordUpdateSpread, Span, + TodoKind, TypedRecordUpdateArg, UntypedRecordUpdateArg, + }, + tipo::{ModuleValueConstructor, PatternConstructor, Type, ValueConstructor}, +}; + +pub enum TypedExpr { + Int { + location: Span, + tipo: Arc, + value: String, + }, + + Float { + location: Span, + tipo: Arc, + value: String, + }, + + String { + location: Span, + tipo: Arc, + value: String, + }, + + Sequence { + location: Span, + expressions: Vec, + }, + + /// A chain of pipe expressions. + /// By this point the type checker has expanded it into a series of + /// assignments and function calls, but we still have a Pipeline AST node as + /// even though it is identical to `Sequence` we want to use different + /// locations when showing it in error messages, etc. + Pipeline { + location: Span, + expressions: Vec, + }, + + Var { + location: Span, + constructor: ValueConstructor, + name: String, + }, + + Fn { + location: Span, + tipo: Arc, + is_capture: bool, + args: Vec>>, + body: Box, + return_annotation: Option, + }, + + List { + location: Span, + tipo: Arc, + elements: Vec, + tail: Option>, + }, + + Call { + location: Span, + tipo: Arc, + fun: Box, + args: Vec>, + }, + + BinOp { + location: Span, + tipo: Arc, + name: BinOp, + left: Box, + right: Box, + }, + + Assignment { + location: Span, + tipo: Arc, + value: Box, + pattern: Pattern>, + kind: AssignmentKind, + }, + + Try { + location: Span, + tipo: Arc, + value: Box, + then: Box, + pattern: Pattern>, + }, + + When { + location: Span, + tipo: Arc, + subjects: Vec, + clauses: Vec, String>>, + }, + + RecordAccess { + location: Span, + tipo: Arc, + label: String, + index: u64, + record: Box, + }, + + ModuleSelect { + location: Span, + tipo: Arc, + label: String, + module_name: String, + module_alias: String, + constructor: ModuleValueConstructor, + }, + + Tuple { + location: Span, + tipo: Arc, + elems: Vec, + }, + + TupleIndex { + location: Span, + tipo: Arc, + index: u64, + tuple: Box, + }, + + Todo { + location: Span, + label: Option, + tipo: Arc, + }, + + RecordUpdate { + location: Span, + tipo: Arc, + spread: Box, + args: Vec, + }, + + Negate { + location: Span, + value: Box, + }, +} + +pub enum UntypedExpr { + Int { + location: Span, + value: String, + }, + + Float { + location: Span, + value: String, + }, + + String { + location: Span, + value: String, + }, + + Sequence { + location: Span, + expressions: Vec, + }, + + Var { + location: Span, + name: String, + }, + + Fn { + location: Span, + is_capture: bool, + arguments: Vec>, + body: Box, + return_annotation: Option, + }, + + List { + location: Span, + elements: Vec, + tail: Option>, + }, + + Call { + location: Span, + fun: Box, + arguments: Vec>, + }, + + BinOp { + location: Span, + name: BinOp, + left: Box, + right: Box, + }, + + PipeLine { + expressions: Vec1, + }, + + Assignment { + location: Span, + value: Box, + pattern: Pattern<(), ()>, + kind: AssignmentKind, + annotation: Option, + }, + + Try { + location: Span, + value: Box, + pattern: Pattern<(), ()>, + then: Box, + annotation: Option, + }, + + Case { + location: Span, + subjects: Vec, + clauses: Vec>, + }, + + FieldAccess { + location: Span, + label: String, + container: Box, + }, + + Tuple { + location: Span, + elems: Vec, + }, + + TupleIndex { + location: Span, + index: u64, + tuple: Box, + }, + + Todo { + kind: TodoKind, + location: Span, + label: Option, + }, + + RecordUpdate { + location: Span, + constructor: Box, + spread: RecordUpdateSpread, + arguments: Vec, + }, + + Negate { + location: Span, + value: Box, + }, +} diff --git a/crates/lang/src/lexer.rs b/crates/lang/src/lexer.rs new file mode 100644 index 00000000..958538ba --- /dev/null +++ b/crates/lang/src/lexer.rs @@ -0,0 +1,163 @@ +use chumsky::prelude::*; +use internment::Intern; + +use crate::{ast::Span, error::ParseError, token::Token}; + +pub fn lexer() -> impl Parser, Error = ParseError> { + let int = text::int(10) + .map(Intern::new) + .map(|value| Token::Int { value }); + + let op = choice(( + just("==").to(Token::EqualEqual), + just('=').to(Token::Equal), + just("..").to(Token::Dot), + just('.').to(Token::Dot), + just("!=").to(Token::NotEqual), + just('!').to(Token::Bang), + just("<=").to(Token::LessEqual), + just('<').to(Token::Less), + just(">=").to(Token::GreaterEqual), + just('>').to(Token::Greater), + just('+').to(Token::Plus), + just("->").to(Token::RArrow), + just('-').to(Token::Minus), + just('*').to(Token::Star), + just('/').to(Token::Slash), + just('%').to(Token::Percent), + just("|>").to(Token::Pipe), + )); + + let grouping = choice(( + just('(').to(Token::LeftParen), + just(')').to(Token::RightParen), + just('[').to(Token::LeftSquare), + just(']').to(Token::RightSquare), + just('{').to(Token::LeftBrace), + just('}').to(Token::RightBrace), + )); + + let escape = just('\\').ignore_then( + just('\\') + .or(just('/')) + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')), + ); + + let string = just('"') + .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) + .then_ignore(just('"')) + .collect::() + .map(Intern::new) + .map(|value| Token::String { value }) + .labelled("string"); + + let keyword = text::ident().map(|s: String| match s.as_str() { + "as" => Token::As, + "assert" => Token::Assert, + "const" => Token::Const, + "fn" => Token::Fn, + "if" => Token::If, + "is" => Token::Is, + "let" => Token::Let, + "opaque" => Token::Opaque, + "pub" => Token::Pub, + "use" => Token::Use, + "todo" => Token::Todo, + "try" => Token::Try, + "type" => Token::Type, + "when" => Token::When, + _ => { + if s.chars().next().map_or(false, |c| c.is_uppercase()) { + Token::UpName { + // TODO: do not allow _ in upname + name: Intern::new(s), + } + } else if s.starts_with('_') { + Token::DiscardName { + // TODO: do not allow uppercase letters in discard name + name: Intern::new(s), + } + } else { + Token::Name { + // TODO: do not allow uppercase letters in name + name: Intern::new(s), + } + } + } + }); + + let token = choice((keyword, int, op, grouping, string)) + .or(any().map(Token::Error).validate(|t, span, emit| { + emit(ParseError::expected_input_found(span, None, Some(t))); + t + })) + .map_with_span(move |token, span| (token, span)) + .padded() + .recover_with(skip_then_retry_until([])); + + let comments = just("//") + .then_ignore( + just('(') + .ignore_then(take_until(just(")#")).ignored()) + .or(none_of('\n').ignored().repeated().ignored()), + ) + .padded() + .ignored() + .repeated(); + + token + .padded_by(comments) + .repeated() + .padded() + .then_ignore(end()) +} + +#[cfg(test)] +mod tests { + use chumsky::prelude::*; + use internment::Intern; + + use crate::{ + ast::{Span, SrcId}, + lexer, + token::Token, + }; + + #[test] + fn simple() { + let code = "pub type |> >=\n{ Thing _na_thing name"; + let len = code.chars().count(); + + let span = |i| Span::new(SrcId::empty(), i..i + 1); + + assert_eq!( + lexer::lexer() + .parse(chumsky::Stream::from_iter( + span(len), + code.chars().enumerate().map(|(i, c)| (c, span(i))), + )) + .map(|tokens| tokens.into_iter().map(|(tok, _)| tok).collect::>()), + Ok(vec![ + Token::Pub, + Token::Type, + Token::Pipe, + Token::GreaterEqual, + Token::LeftBrace, + Token::UpName { + name: Intern::new("Thing".to_string()) + }, + Token::DiscardName { + name: Intern::new("_na_thing".to_string()) + }, + Token::Name { + name: Intern::new("name".to_string()) + } + ]), + ); + } +} diff --git a/crates/lang/src/lib.rs b/crates/lang/src/lib.rs index 660c3de9..49da21d2 100644 --- a/crates/lang/src/lib.rs +++ b/crates/lang/src/lib.rs @@ -1,10 +1,8 @@ pub mod ast; - -#[cfg(test)] -mod tests { - #[test] - fn it_works() { - let result = 2 + 2; - assert_eq!(result, 4); - } -} +pub mod build; +pub mod error; +pub mod expr; +pub mod lexer; +pub mod parser; +pub mod tipo; +pub mod token; diff --git a/crates/lang/src/parser.rs b/crates/lang/src/parser.rs new file mode 100644 index 00000000..e2dcf2f6 --- /dev/null +++ b/crates/lang/src/parser.rs @@ -0,0 +1,7 @@ +use chumsky::prelude::*; + +use crate::{ast, error::ParseError, token::Token}; + +pub fn module_parser() -> impl Parser { + let imports = just(Token::Use).ignore_then(); +} diff --git a/crates/lang/src/tipo.rs b/crates/lang/src/tipo.rs new file mode 100644 index 00000000..85299bae --- /dev/null +++ b/crates/lang/src/tipo.rs @@ -0,0 +1,159 @@ +use std::{cell::RefCell, collections::HashMap, sync::Arc}; + +use crate::{ + ast::{Constant, FieldMap, Span, TypedConstant}, + build::Origin, +}; + +pub enum Type { + /// A nominal (named) type such as `Int`, `Float`, or a programmer defined + /// custom type such as `Person`. The type can take other types as + /// arguments (aka "generics" or "parametric polymorphism"). + /// + /// If the type is defined in the Gleam prelude the `module` field will be + /// empty, otherwise it will contain the name of the module that + /// defines the type. + /// + App { + public: bool, + module: Vec, + name: String, + args: Vec>, + }, + + /// The type of a function. It takes arguments and returns a value. + /// + Fn { + args: Vec>, + retrn: Arc, + }, + + /// A type variable. See the contained `TypeVar` enum for more information. + /// + Var { tipo: Arc> }, + + /// A tuple is an ordered collection of 0 or more values, each of which + /// can have a different type, so the `tuple` type is the sum of all the + /// contained types. + /// + Tuple { elems: Vec> }, +} + +pub enum TypeVar { + /// Unbound is an unbound variable. It is one specific type but we don't + /// know what yet in the inference process. It has a unique id which can be used to + /// identify if two unbound variable Rust values are the same Gleam type variable + /// instance or not. + /// + Unbound { id: u64 }, + /// Link is type variable where it was an unbound variable but we worked out + /// that it is some other type and now we point to that one. + /// + Link { tipo: Arc }, + /// A Generic variable stands in for any possible type and cannot be + /// specialised to any one type + /// + /// # Example + /// + /// ```gleam + /// type Cat(a) { + /// Cat(name: a) + /// } + /// // a is TypeVar::Generic + /// ``` + /// + Generic { id: u64 }, +} + +pub struct ValueConstructor { + pub public: bool, + pub variant: ValueConstructorVariant, + pub tipo: Arc, +} + +pub enum ValueConstructorVariant { + /// A locally defined variable or function parameter + LocalVariable { location: Span }, + + /// A module constant + ModuleConstant { + location: Span, + module: String, + literal: Constant, String>, + }, + + /// A function belonging to the module + ModuleFn { + name: String, + field_map: Option, + module: Vec, + arity: usize, + location: Span, + }, + + /// A constructor for a custom type + Record { + name: String, + arity: usize, + field_map: Option, + location: Span, + module: String, + }, +} + +pub struct Module { + pub name: Vec, + pub origin: Origin, + pub package: String, + pub types: HashMap, + pub types_constructors: HashMap>, + pub values: HashMap, + pub accessors: HashMap, +} + +pub struct TypeConstructor { + pub public: bool, + pub origin: Span, + pub module: Vec, + pub parameters: Vec>, + pub typ: Arc, +} + +pub struct AccessorsMap { + pub public: bool, + pub tipo: Arc, + pub accessors: HashMap, +} + +pub struct RecordAccessor { + // TODO: smaller int. Doesn't need to be this big + pub index: u64, + pub label: String, + pub tipo: Arc, +} + +pub enum PatternConstructor { + Record { + name: String, + field_map: Option, + }, +} + +pub enum ModuleValueConstructor { + Record { + name: String, + arity: usize, + type_: Arc, + field_map: Option, + location: Span, + }, + + Fn { + location: Span, + }, + + Constant { + literal: TypedConstant, + location: Span, + }, +} diff --git a/crates/lang/src/token.rs b/crates/lang/src/token.rs new file mode 100644 index 00000000..3b2d947b --- /dev/null +++ b/crates/lang/src/token.rs @@ -0,0 +1,149 @@ +use std::fmt; + +use internment::Intern; + +#[derive(Copy, Clone, Debug, PartialEq, Hash, Eq)] +pub enum Token { + Error(char), + Name { name: Intern }, + UpName { name: Intern }, + DiscardName { name: Intern }, + Int { value: Intern }, + String { value: Intern }, + // Groupings + LeftParen, // ( + RightParen, // ) + LeftSquare, // [ + RightSquare, // } + LeftBrace, // { + RightBrace, // } + // Int Operators + Plus, + Minus, + Star, + Slash, + Less, + Greater, + LessEqual, + GreaterEqual, + Percent, + // ByteString Operators + PlusDot, // '+.' + MinusDot, // '-.' + StarDot, // '*.' + SlashDot, // '/.' + LessDot, // '<.' + GreaterDot, // '>.' + LessEqualDot, // '<=.' + GreaterEqualDot, // '>=.' + // Other Punctuation + Colon, + Comma, + Hash, // '#' + Bang, // '!' + Equal, + EqualEqual, // '==' + NotEqual, // '!=' + Vbar, // '|' + VbarVbar, // '||' + AmperAmper, // '&&' + Pipe, // '|>' + Dot, // '.' + RArrow, // '->' + DotDot, // '..' + EndOfFile, + // Extra + CommentNormal, + CommentDoc, + CommentModule, + EmptyLine, + // Keywords (alphabetically): + As, + Assert, + Const, + Fn, + If, + Is, + Let, + Opaque, + Pub, + Use, + Todo, + Try, + Type, + When, +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + Token::Error(c) => { + write!(f, "\"{}\"", c)?; + + return Ok(()); + } + Token::Name { name } => &**name, + Token::UpName { name } => &**name, + Token::DiscardName { name } => &**name, + Token::Int { value } => &**value, + Token::String { value } => &**value, + Token::LeftParen => "(", + Token::RightParen => ")", + Token::LeftSquare => "[", + Token::RightSquare => "]", + Token::LeftBrace => "{", + Token::RightBrace => "}", + Token::Plus => "+", + Token::Minus => "-", + Token::Star => "*", + Token::Slash => "/", + Token::Less => "<", + Token::Greater => ">", + Token::LessEqual => "<=", + Token::GreaterEqual => ">=", + Token::Percent => "%", + Token::PlusDot => "+.", + Token::MinusDot => "-.", + Token::StarDot => "*.", + Token::SlashDot => "/.", + Token::LessDot => "<.", + Token::GreaterDot => ">.", + Token::LessEqualDot => "<=.", + Token::GreaterEqualDot => ">=.", + Token::Colon => ":", + Token::Comma => ",", + Token::Hash => "#", + Token::Bang => "!", + Token::Equal => "=", + Token::EqualEqual => "==", + Token::NotEqual => "!=", + Token::Vbar => "|", + Token::VbarVbar => "||", + Token::AmperAmper => "&&", + Token::Pipe => "|>", + Token::Dot => ".", + Token::RArrow => "->", + Token::DotDot => "..", + Token::EndOfFile => "EOF", + Token::CommentNormal => "//", + Token::CommentDoc => "///", + Token::CommentModule => "////", + Token::EmptyLine => "EMPTYLINE", + Token::As => "as", + Token::Assert => "assert", + Token::When => "when", + Token::Is => "is", + Token::Const => "const", + Token::Fn => "fn", + Token::If => "if", + Token::Use => "import", + Token::Let => "let", + Token::Opaque => "opaque", + Token::Pub => "pub", + Token::Todo => "todo", + Token::Try => "try", + Token::Type => "type", + }; + write!(f, "\"{}\"", s) + } +}