From 1d3315005c47e4fc49dfe2a97ba794f687d2d650 Mon Sep 17 00:00:00 2001 From: KtorZ Date: Thu, 30 Mar 2023 11:46:44 +0200 Subject: [PATCH] Support some single-character escape sequences in UPLC Fixes #472. This also partially addresses #195. However, as pointed out in one of the comment, there's no 'official rule' when it comes to what should be considered valid escape sequences. Haskell relies mostly on the AttoParsec library and Rust also has its own set of rules. This is in particular true for unicode escape sequences, but there is a common middleground for some usual single character escapes such as \n or \\. So we now at least support these. For more complicated escape sequence, please refer to #195 for now and keep the discussion going there. --- Cargo.lock | 1 + crates/uplc/Cargo.toml | 1 + crates/uplc/src/flat.rs | 56 ++++++++++++++++++++++++++++++--------- crates/uplc/src/parser.rs | 12 ++++++++- crates/uplc/src/pretty.rs | 14 +++++++--- 5 files changed, 67 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2711ef84..7bcd6344 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2720,6 +2720,7 @@ dependencies = [ "flat-rs", "hex", "indexmap", + "indoc", "itertools", "k256", "miette", diff --git a/crates/uplc/Cargo.toml b/crates/uplc/Cargo.toml index 87e367e1..e729ac64 100644 --- a/crates/uplc/Cargo.toml +++ b/crates/uplc/Cargo.toml @@ -41,6 +41,7 @@ thiserror = "1.0.39" [dev-dependencies] hex = "0.4.3" +indoc = "2.0.1" proptest = "1.1.0" [features] diff --git a/crates/uplc/src/flat.rs b/crates/uplc/src/flat.rs index 65a74716..c4cf9eeb 100644 --- a/crates/uplc/src/flat.rs +++ b/crates/uplc/src/flat.rs @@ -1,5 +1,9 @@ -use std::{collections::VecDeque, fmt::Debug, rc::Rc}; - +use crate::{ + ast::{ + Constant, DeBruijn, FakeNamedDeBruijn, Name, NamedDeBruijn, Program, Term, Type, Unique, + }, + builtins::DefaultFunction, +}; use anyhow::anyhow; use flat_rs::{ de::{self, Decode, Decoder}, @@ -7,13 +11,7 @@ use flat_rs::{ Flat, }; use pallas_primitives::{babbage::PlutusData, Fragment}; - -use crate::{ - ast::{ - Constant, DeBruijn, FakeNamedDeBruijn, Name, NamedDeBruijn, Program, Term, Type, Unique, - }, - builtins::DefaultFunction, -}; +use std::{collections::VecDeque, fmt::Debug, rc::Rc}; const BUILTIN_TAG_WIDTH: u32 = 7; const CONST_TAG_WIDTH: u32 = 4; @@ -810,11 +808,13 @@ pub fn decode_constant_tag(d: &mut Decoder) -> Result { #[cfg(test)] mod test { - use flat_rs::Flat; - - use crate::ast::{Name, Type}; - use super::{Constant, Program, Term}; + use crate::{ + ast::{DeBruijn, Name, Type}, + parser, + }; + use flat_rs::Flat; + use indoc::indoc; #[test] fn flat_encode_integer() { @@ -961,4 +961,34 @@ mod test { assert_eq!(actual_program, expected_program) } + + #[test] + fn unflat_string_escape() { + let cbor = "490000004901015c0001"; + + let program = + Program::::from_hex(cbor, &mut Vec::new(), &mut Vec::new()).unwrap(); + + assert_eq!( + program.to_pretty().as_str(), + indoc! { r#" + (program + 0.0.0 + (con string "\\") + )"#} + ); + } + + #[test] + fn uplc_parser_string_escape() { + let source = indoc! { r#" + (program + 0.0.0 + (con string "\n\t\\\"\'\r") + )"#}; + + let program = parser::program(source).unwrap(); + + assert_eq!(program.to_pretty(), source); + } } diff --git a/crates/uplc/src/parser.rs b/crates/uplc/src/parser.rs index e14c8b73..a0ecfde0 100644 --- a/crates/uplc/src/parser.rs +++ b/crates/uplc/src/parser.rs @@ -168,7 +168,17 @@ peg::parser! { = "#" i:ident()* { hex::decode(String::from_iter(i)).unwrap() } rule string() -> String - = "\"" s:[^ '"']* "\"" { String::from_iter(s) } + = "\"" s:character()* "\"" { String::from_iter(s) } + + rule character() -> char + = "\\n" { '\n' } // newline (line feed) + / "\\r" { '\r' } // carriage return + / "\\t" { '\t' } // horizontal tab + / "\\\"" { '\"' } // double quote + / "\\'" { '\'' } // single quote + / "\\\\" { '\\' } // backslash + / [ ^ '"' ] + / expected!("or any valid ascii character") rule data() -> PlutusData = "#" i:ident()* { diff --git a/crates/uplc/src/pretty.rs b/crates/uplc/src/pretty.rs index 6403dd03..227031a6 100644 --- a/crates/uplc/src/pretty.rs +++ b/crates/uplc/src/pretty.rs @@ -1,10 +1,10 @@ -use pretty::RcDoc; - use crate::{ ast::{Constant, Program, Term, Type}, flat::Binder, plutus_data_to_bytes, }; +use pretty::RcDoc; +use std::ascii::escape_default; impl<'a, T> Program where @@ -185,7 +185,15 @@ impl Constant { Constant::String(s) => RcDoc::text("string") .append(RcDoc::line()) .append(RcDoc::text("\"")) - .append(RcDoc::text(s)) + .append(RcDoc::text( + String::from_utf8( + s.as_bytes() + .iter() + .flat_map(|c| escape_default(*c).collect::>()) + .collect(), + ) + .unwrap(), + )) .append(RcDoc::text("\"")), Constant::Unit => RcDoc::text("unit") .append(RcDoc::line())