From bf7cdfba73d11c75d6472f4e5e256704c1ce1f63 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Wed, 21 Dec 2022 19:24:27 +0100 Subject: [PATCH] Implement parser & type-checker for tuple indexes. ```aiken fn foo() { let tuple = #(1, 2, 3, 4) tuple.1st + tuple.2nd + tuple.3rd + tuple.4th } ``` --- Cargo.lock | 20 +++++ crates/aiken-lang/Cargo.toml | 1 + crates/aiken-lang/src/air.rs | 9 +-- crates/aiken-lang/src/expr.rs | 44 +++++------ crates/aiken-lang/src/format.rs | 9 +++ crates/aiken-lang/src/parser.rs | 30 ++++++-- crates/aiken-lang/src/parser/error.rs | 19 ++++- crates/aiken-lang/src/parser/lexer.rs | 23 +++++- crates/aiken-lang/src/parser/token.rs | 6 ++ crates/aiken-lang/src/tests/parser.rs | 103 ++++++++++++++++++++++++++ crates/aiken-lang/src/tipo/error.rs | 16 ++++ crates/aiken-lang/src/tipo/expr.rs | 45 +++++++++-- crates/aiken-lang/src/uplc.rs | 4 + 13 files changed, 287 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea7e6236..5b459652 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,6 +79,7 @@ dependencies = [ "indoc", "itertools", "miette", + "ordinal", "pretty_assertions", "strum", "thiserror", @@ -1125,6 +1126,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.15" @@ -1204,6 +1215,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "ordinal" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c80c1530f46e9d8985706d7deb80b83172b250538902f607dea6cd6028851083" +dependencies = [ + "num-integer", +] + [[package]] name = "os_str_bytes" version = "6.3.0" diff --git a/crates/aiken-lang/Cargo.toml b/crates/aiken-lang/Cargo.toml index a3648b27..93351d58 100644 --- a/crates/aiken-lang/Cargo.toml +++ b/crates/aiken-lang/Cargo.toml @@ -15,6 +15,7 @@ chumsky = "0.8.0" indexmap = "1.9.1" itertools = "0.10.5" miette = "5.2.0" +ordinal = "0.3.2" strum = "0.24.1" thiserror = "1.0.37" uplc = { path = '../uplc', version = "0.0.25" } diff --git a/crates/aiken-lang/src/air.rs b/crates/aiken-lang/src/air.rs index 10189f40..b41118b0 100644 --- a/crates/aiken-lang/src/air.rs +++ b/crates/aiken-lang/src/air.rs @@ -191,11 +191,10 @@ pub enum Air { }, // TupleIndex { - // scope: Vec, - // - // tipo: Arc, - // index: u64, - // tuple: Box, + // scope: Vec, + // tipo: Arc, + // index: u64, + // tuple: Box, // }, Todo { scope: Vec, diff --git a/crates/aiken-lang/src/expr.rs b/crates/aiken-lang/src/expr.rs index 1ea26268..e420b7be 100644 --- a/crates/aiken-lang/src/expr.rs +++ b/crates/aiken-lang/src/expr.rs @@ -135,12 +135,13 @@ pub enum TypedExpr { elems: Vec, }, - // TupleIndex { - // location: Span, - // tipo: Arc, - // index: u64, - // tuple: Box, - // }, + TupleIndex { + location: Span, + tipo: Arc, + index: usize, + tuple: Box, + }, + Todo { location: Span, label: Option, @@ -165,7 +166,7 @@ impl TypedExpr { match self { Self::Negate { .. } => bool(), Self::Var { constructor, .. } => constructor.tipo.clone(), - Self::Trace {then, ..} => then.tipo(), + Self::Trace { then, .. } => then.tipo(), Self::Fn { tipo, .. } | Self::Int { tipo, .. } | Self::Todo { tipo, .. } @@ -177,7 +178,7 @@ impl TypedExpr { | Self::Tuple { tipo, .. } | Self::String { tipo, .. } | Self::ByteArray { tipo, .. } - // | Self::TupleIndex { tipo, .. } + | Self::TupleIndex { tipo, .. } | Self::Assignment { tipo, .. } | Self::ModuleSelect { tipo, .. } | Self::RecordAccess { tipo, .. } @@ -221,9 +222,9 @@ impl TypedExpr { | TypedExpr::Pipeline { .. } | TypedExpr::ByteArray { .. } | TypedExpr::Assignment { .. } - // | TypedExpr::TupleIndex { .. } + | TypedExpr::TupleIndex { .. } | TypedExpr::RecordAccess { .. } => None, - | TypedExpr::If { .. } => None, + TypedExpr::If { .. } => None, // TODO: test // TODO: definition @@ -261,15 +262,12 @@ impl TypedExpr { | Self::Pipeline { location, .. } | Self::ByteArray { location, .. } | Self::Assignment { location, .. } - // | Self::TupleIndex { location, .. } + | Self::TupleIndex { location, .. } | Self::ModuleSelect { location, .. } | Self::RecordAccess { location, .. } | Self::RecordUpdate { location, .. } => *location, - Self::If { - branches, - .. - } => branches.first().body.type_defining_location(), + Self::If { branches, .. } => branches.first().body.type_defining_location(), Self::Sequence { expressions, @@ -301,7 +299,7 @@ impl TypedExpr { | Self::Pipeline { location, .. } | Self::ByteArray { location, .. } | Self::Assignment { location, .. } - // | Self::TupleIndex { location, .. } + | Self::TupleIndex { location, .. } | Self::ModuleSelect { location, .. } | Self::RecordAccess { location, .. } | Self::RecordUpdate { location, .. } => *location, @@ -401,11 +399,13 @@ pub enum UntypedExpr { location: Span, elems: Vec, }, - // TupleIndex { - // location: Span, - // index: u64, - // tuple: Box, - // }, + + TupleIndex { + location: Span, + index: usize, + tuple: Box, + }, + Todo { kind: TodoKind, location: Span, @@ -490,7 +490,7 @@ impl UntypedExpr { | Self::Tuple { location, .. } | Self::String { location, .. } | Self::Assignment { location, .. } - // | Self::TupleIndex { location, .. } + | Self::TupleIndex { location, .. } | Self::FieldAccess { location, .. } | Self::RecordUpdate { location, .. } | Self::Negate { location, .. } diff --git a/crates/aiken-lang/src/format.rs b/crates/aiken-lang/src/format.rs index c9a05a2f..05826321 100644 --- a/crates/aiken-lang/src/format.rs +++ b/crates/aiken-lang/src/format.rs @@ -1,4 +1,5 @@ use itertools::Itertools; +use ordinal::Ordinal; use std::sync::Arc; use vec1::Vec1; @@ -796,6 +797,14 @@ impl<'comments> Formatter<'comments> { elems.iter().map(|e| (self.wrap_expr(e), false)), )) .group(), + + UntypedExpr::TupleIndex { index, tuple, .. } => { + let suffix = Ordinal(*index + 1).suffix().to_doc(); + self.expr(tuple) + .append(".".to_doc()) + .append((index + 1).to_doc()) + .append(suffix) + } }; commented(document, comments) } diff --git a/crates/aiken-lang/src/parser.rs b/crates/aiken-lang/src/parser.rs index 06802f9a..88eec7d4 100644 --- a/crates/aiken-lang/src/parser.rs +++ b/crates/aiken-lang/src/parser.rs @@ -1089,6 +1089,7 @@ pub fn expr_parser( enum Chain { Call(Vec, Span), FieldAccess(String, Span), + TupleIndex(usize, Span), } let field_access_parser = just(Token::Dot) @@ -1097,6 +1098,19 @@ pub fn expr_parser( }) .map_with_span(Chain::FieldAccess); + let tuple_index_parser = just(Token::Dot) + .ignore_then(select! { + Token::Ordinal { index } => index, + }) + .validate(|index, span, emit| { + if index < 1 { + emit(ParseError::invalid_tuple_index(span, index, None)); + Chain::TupleIndex(0, span) + } else { + Chain::TupleIndex(index as usize - 1, span) + } + }); + let call_parser = choice(( select! { Token::Name { name } => name } .then_ignore(just(Token::Colon)) @@ -1123,11 +1137,11 @@ pub fn expr_parser( .delimited_by(just(Token::LeftParen), just(Token::RightParen)) .map_with_span(Chain::Call); - let chain = choice((field_access_parser, call_parser)); + let chain = choice((tuple_index_parser, field_access_parser, call_parser)); let chained = expr_unit_parser .then(chain.repeated()) - .foldl(|e, chain| match chain { + .foldl(|expr, chain| match chain { Chain::Call(args, span) => { let mut holes = Vec::new(); @@ -1161,7 +1175,7 @@ pub fn expr_parser( let call = expr::UntypedExpr::Call { location: span, - fun: Box::new(e), + fun: Box::new(expr), arguments: args, }; @@ -1179,9 +1193,15 @@ pub fn expr_parser( } Chain::FieldAccess(label, span) => expr::UntypedExpr::FieldAccess { - location: e.location().union(span), + location: expr.location().union(span), label, - container: Box::new(e), + container: Box::new(expr), + }, + + Chain::TupleIndex(index, span) => expr::UntypedExpr::TupleIndex { + location: expr.location().union(span), + index, + tuple: Box::new(expr), }, }); diff --git a/crates/aiken-lang/src/parser/error.rs b/crates/aiken-lang/src/parser/error.rs index f9da4c52..0d93b280 100644 --- a/crates/aiken-lang/src/parser/error.rs +++ b/crates/aiken-lang/src/parser/error.rs @@ -24,6 +24,17 @@ impl ParseError { } self } + + pub fn invalid_tuple_index(span: Span, index: u32, suffix: Option) -> Self { + let hint = suffix.map(|suffix| format!("{index}{suffix}")); + Self { + kind: ErrorKind::InvalidTupleIndex { hint }, + span, + while_parsing: None, + expected: HashSet::new(), + label: None, + } + } } impl PartialEq for ParseError { @@ -69,20 +80,22 @@ impl> chumsky::Error for ParseError { #[derive(Debug, PartialEq, Eq, Diagnostic, thiserror::Error)] pub enum ErrorKind { - #[error("unexpected end")] + #[error("Unexpected end")] UnexpectedEnd, #[error("{0}")] #[diagnostic(help("{}", .0.help().unwrap_or_else(|| Box::new(""))))] Unexpected(Pattern), - #[error("unclosed {start}")] + #[error("Unclosed {start}")] Unclosed { start: Pattern, #[label] before_span: Span, before: Option, }, - #[error("no end branch")] + #[error("No end branch")] NoEndBranch, + #[error("Invalid tuple index{}", hint.as_ref().map(|s| format!("; did you mean '{s}' ?")).unwrap_or_default())] + InvalidTupleIndex { hint: Option }, } #[derive(Debug, PartialEq, Eq, Hash, Diagnostic, thiserror::Error)] diff --git a/crates/aiken-lang/src/parser/lexer.rs b/crates/aiken-lang/src/parser/lexer.rs index fd7eedbf..d218c2ea 100644 --- a/crates/aiken-lang/src/parser/lexer.rs +++ b/crates/aiken-lang/src/parser/lexer.rs @@ -2,6 +2,8 @@ use chumsky::prelude::*; use crate::ast::Span; +use ordinal::Ordinal; + use super::{error::ParseError, token::Token}; pub fn lexer() -> impl Parser, Error = ParseError> { @@ -17,6 +19,25 @@ pub fn lexer() -> impl Parser, Error = ParseError> { )) .map(|value| Token::Int { value }); + let ordinal = text::int(10) + .from_str() + .unwrapped() + .then_with(|index: u32| { + choice((just("st"), just("nd"), just("rd"), just("th"))) + .map(move |suffix| (index, suffix)) + }) + .validate(|(index, suffix), span, emit| { + let expected_suffix = Ordinal(index).suffix(); + if expected_suffix != suffix { + emit(ParseError::invalid_tuple_index( + span, + index, + Some(expected_suffix.to_string()), + )) + } + Token::Ordinal { index } + }); + let op = choice(( just("==").to(Token::EqualEqual), just('=').to(Token::Equal), @@ -132,7 +153,7 @@ pub fn lexer() -> impl Parser, Error = ParseError> { module_comments, doc_comments, comments, - choice((keyword, int, op, grouping, string)) + choice((ordinal, keyword, int, op, grouping, string)) .or(any().map(Token::Error).validate(|t, span, emit| { emit(ParseError::expected_input_found( span, diff --git a/crates/aiken-lang/src/parser/token.rs b/crates/aiken-lang/src/parser/token.rs index 76e9a6ac..5c681a06 100644 --- a/crates/aiken-lang/src/parser/token.rs +++ b/crates/aiken-lang/src/parser/token.rs @@ -4,6 +4,7 @@ use std::fmt; pub enum Token { Error(char), Name { name: String }, + Ordinal { index: u32 }, UpName { name: String }, DiscardName { name: String }, Int { value: String }, @@ -78,12 +79,17 @@ pub enum Token { impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let index_str; let s = match self { Token::Error(c) => { write!(f, "\"{}\"", c)?; return Ok(()); } Token::Name { name } => name, + Token::Ordinal { index } => { + index_str = index.to_string(); + &index_str[..] + } Token::UpName { name } => name, Token::DiscardName { name } => name, Token::Int { value } => value, diff --git a/crates/aiken-lang/src/tests/parser.rs b/crates/aiken-lang/src/tests/parser.rs index a25369f6..e167f277 100644 --- a/crates/aiken-lang/src/tests/parser.rs +++ b/crates/aiken-lang/src/tests/parser.rs @@ -1436,3 +1436,106 @@ fn record_create_unlabeled() { }), ) } + +#[test] +fn parse_tuple() { + let code = indoc! {r#" + fn foo() { + let tuple = #(1, 2, 3, 4) + tuple.1st + tuple.2nd + tuple.3rd + tuple.4th + } + "#}; + + assert_definition( + code, + ast::UntypedDefinition::Fn(Function { + arguments: vec![], + body: expr::UntypedExpr::Sequence { + location: Span::new((), 13..86), + expressions: vec![ + expr::UntypedExpr::Assignment { + location: Span::new((), 13..38), + value: Box::new(expr::UntypedExpr::Tuple { + location: Span::new((), 25..38), + elems: vec![ + expr::UntypedExpr::Int { + location: Span::new((), 27..28), + value: "1".to_string(), + }, + expr::UntypedExpr::Int { + location: Span::new((), 30..31), + value: "2".to_string(), + }, + expr::UntypedExpr::Int { + location: Span::new((), 33..34), + value: "3".to_string(), + }, + expr::UntypedExpr::Int { + location: Span::new((), 36..37), + value: "4".to_string(), + }, + ], + }), + pattern: ast::Pattern::Var { + location: Span::new((), 17..22), + name: "tuple".to_string(), + }, + kind: ast::AssignmentKind::Let, + annotation: None, + }, + expr::UntypedExpr::BinOp { + location: Span::new((), 41..86), + name: ast::BinOp::AddInt, + left: Box::new(expr::UntypedExpr::BinOp { + location: Span::new((), 41..74), + name: ast::BinOp::AddInt, + left: Box::new(expr::UntypedExpr::BinOp { + location: Span::new((), 41..62), + name: ast::BinOp::AddInt, + left: Box::new(expr::UntypedExpr::TupleIndex { + location: Span::new((), 41..50), + index: 0, + tuple: Box::new(expr::UntypedExpr::Var { + location: Span::new((), 41..46), + name: "tuple".to_string(), + }), + }), + right: Box::new(expr::UntypedExpr::TupleIndex { + location: Span::new((), 53..62), + index: 1, + tuple: Box::new(expr::UntypedExpr::Var { + location: Span::new((), 53..58), + name: "tuple".to_string(), + }), + }), + }), + right: Box::new(expr::UntypedExpr::TupleIndex { + location: Span::new((), 65..74), + index: 2, + tuple: Box::new(expr::UntypedExpr::Var { + location: Span::new((), 65..70), + name: "tuple".to_string(), + }), + }), + }), + right: Box::new(expr::UntypedExpr::TupleIndex { + location: Span::new((), 77..86), + index: 3, + tuple: Box::new(expr::UntypedExpr::Var { + location: Span::new((), 77..82), + name: "tuple".to_string(), + }), + }), + }, + ], + }, + doc: None, + location: Span::new((), 0..8), + name: "foo".to_string(), + public: false, + return_annotation: None, + return_type: (), + end_position: 87, + }), + ) +} diff --git a/crates/aiken-lang/src/tipo/error.rs b/crates/aiken-lang/src/tipo/error.rs index 40a230e2..24b21d80 100644 --- a/crates/aiken-lang/src/tipo/error.rs +++ b/crates/aiken-lang/src/tipo/error.rs @@ -1,5 +1,7 @@ use std::{collections::HashMap, sync::Arc}; +use ordinal::Ordinal; + use miette::Diagnostic; use crate::ast::{BinOp, Span, TodoKind}; @@ -283,6 +285,20 @@ pub enum Error { #[label] location: Span, }, + + #[error("Trying to access tuple elements on something else than a tuple\n")] + NotATuple { + #[label] + location: Span, + }, + + #[error("Trying to access the {} element of a {}-tuple\n", Ordinal(*index + 1).to_string(), size)] + TupleIndexOutOfBound { + #[label] + location: Span, + index: usize, + size: usize, + }, } impl Error { diff --git a/crates/aiken-lang/src/tipo/expr.rs b/crates/aiken-lang/src/tipo/expr.rs index 488cb004..ba35a1e7 100644 --- a/crates/aiken-lang/src/tipo/expr.rs +++ b/crates/aiken-lang/src/tipo/expr.rs @@ -321,12 +321,13 @@ impl<'a, 'b> ExprTyper<'a, 'b> { .. } => self.infer_field_access(*container, label, location), - // UntypedExpr::TupleIndex { - // location, - // index, - // tuple, - // .. - // } => self.infer_tuple_index(*tuple, index, location), + UntypedExpr::TupleIndex { + location, + index, + tuple, + .. + } => self.infer_tuple_index(*tuple, index, location), + UntypedExpr::ByteArray { location, bytes } => { Ok(self.infer_byte_array(bytes, location)) } @@ -1701,6 +1702,38 @@ impl<'a, 'b> ExprTyper<'a, 'b> { }) } + fn infer_tuple_index( + &mut self, + tuple: UntypedExpr, + index: usize, + location: Span, + ) -> Result { + let tuple = self.infer(tuple)?; + + let tipo = match *tuple.tipo() { + Type::Tuple { ref elems, .. } => { + let size = elems.len(); + if index >= size { + Err(Error::TupleIndexOutOfBound { + location, + index, + size, + }) + } else { + Ok(elems[index].clone()) + } + } + _ => Err(Error::NotATuple { location }), + }?; + + Ok(TypedExpr::TupleIndex { + location, + tipo, + index, + tuple: Box::new(tuple), + }) + } + fn infer_todo(&mut self, location: Span, kind: TodoKind, label: Option) -> TypedExpr { let tipo = self.new_unbound_var(); diff --git a/crates/aiken-lang/src/uplc.rs b/crates/aiken-lang/src/uplc.rs index aa1c6a16..a2c99997 100644 --- a/crates/aiken-lang/src/uplc.rs +++ b/crates/aiken-lang/src/uplc.rs @@ -532,6 +532,10 @@ impl<'a> CodeGenerator<'a> { self.build_ir(then, ir_stack, scope); } + + TypedExpr::TupleIndex { .. } => { + todo!("Tuple indexing not implementing yet"); + } } }