From 15cfb22c8fb78858cdcccf3b6a00c12317c913e9 Mon Sep 17 00:00:00 2001 From: Kasey White Date: Sat, 22 Oct 2022 18:35:16 -0400 Subject: [PATCH] more detailed parse errors when decoding with flat --- crates/flat/src/decode/error.rs | 2 + crates/uplc/src/flat.rs | 187 +++++++++++++++++++++++++++++++- crates/uplc/src/pretty.rs | 22 ++++ 3 files changed, 207 insertions(+), 4 deletions(-) diff --git a/crates/flat/src/decode/error.rs b/crates/flat/src/decode/error.rs index fcf90b97..f4de82a5 100644 --- a/crates/flat/src/decode/error.rs +++ b/crates/flat/src/decode/error.rs @@ -18,6 +18,8 @@ pub enum Error { DecodeChar(u32), #[error("{0}")] Message(String), + #[error("Parse error: So far we parsed {0} and we ran into error: {1}")] + ParseError(String, anyhow::Error), #[error(transparent)] Custom(#[from] anyhow::Error), } diff --git a/crates/uplc/src/flat.rs b/crates/uplc/src/flat.rs index 7a3008af..6c4045e9 100644 --- a/crates/uplc/src/flat.rs +++ b/crates/uplc/src/flat.rs @@ -1,5 +1,6 @@ use std::{collections::VecDeque, fmt::Debug, rc::Rc}; +use anyhow::anyhow; use flat_rs::{ de::{self, Decode, Decoder}, en::{self, Encode, Encoder}, @@ -109,10 +110,14 @@ where T: Binder<'b>, { fn decode(d: &mut Decoder) -> Result { + let mut state_log: Vec = vec![]; let version = (usize::decode(d)?, usize::decode(d)?, usize::decode(d)?); - let term = Term::decode(d)?; + let term_option = Term::decode_debug(d, &mut state_log); - Ok(Program { version, term }) + match term_option { + Ok(term) => Ok(Program { version, term }), + Err(error) => Err(de::Error::ParseError(state_log.join(""), anyhow!(error))), + } } } @@ -190,15 +195,189 @@ where 6 => Ok(Term::Error), 7 => Ok(Term::Builtin(DefaultFunction::decode(d)?)), x => Err(de::Error::Message(format!( - "Unknown term constructor tag: {} and buffer position is {} and buffer length is {}", + "Unknown term constructor tag: {}{} {:02X?} {} {} {} {}", x, - d.buffer.len() - d.pos, + ".\n\nHere are the buffer bytes (5 preceding) ", + d.buffer + .iter() + .skip(if d.pos - 5 > 0 { d.pos - 5 } else { 0 }) + .take(10), + "\n\nBuffer position is", + d.pos, + "and buffer length is", d.buffer.len() ))), } } } +impl<'b, T> Term +where + T: Binder<'b>, +{ + fn decode_debug(d: &mut Decoder, state_log: &mut Vec) -> Result, de::Error> { + match decode_term_tag(d)? { + 0 => { + state_log.push("(var ".to_string()); + let var_option = T::decode(d); + match var_option { + Ok(var) => { + state_log.push(format!("{})", var.text())); + Ok(Term::Var(var)) + } + Err(error) => { + state_log.push("parse error)".to_string()); + Err(error) + } + } + } + + 1 => { + state_log.push("(delay ".to_string()); + let term_option = Term::decode_debug(d, state_log); + match term_option { + Ok(term) => { + state_log.push(")".to_string()); + Ok(Term::Delay(Rc::new(term))) + } + Err(error) => { + state_log.push(")".to_string()); + Err(error) + } + } + } + 2 => { + state_log.push("(lam ".to_string()); + + let var_option = T::binder_decode(d); + match var_option { + Ok(var) => { + state_log.push(var.text().to_string()); + let term_option = Term::decode_debug(d, state_log); + match term_option { + Ok(term) => { + state_log.push(")".to_string()); + Ok(Term::Lambda { + parameter_name: var, + body: Rc::new(term), + }) + } + Err(error) => { + state_log.push(")".to_string()); + Err(error) + } + } + } + Err(error) => { + state_log.push(")".to_string()); + Err(error) + } + } + } + 3 => { + state_log.push("[ ".to_string()); + + let function_term_option = Term::decode_debug(d, state_log); + match function_term_option { + Ok(function) => { + state_log.push(" ".to_string()); + let arg_term_option = Term::decode_debug(d, state_log); + match arg_term_option { + Ok(argument) => { + state_log.push("]".to_string()); + Ok(Term::Apply { + function: Rc::new(function), + argument: Rc::new(argument), + }) + } + Err(error) => { + state_log.push("]".to_string()); + Err(error) + } + } + } + Err(error) => { + state_log.push(" not parsed]".to_string()); + Err(error) + } + } + } + // Need size limit for Constant + 4 => { + state_log.push("(con ".to_string()); + + let con_option = Constant::decode(d); + match con_option { + Ok(constant) => { + state_log.push(format!("{})", constant.to_pretty())); + Ok(Term::Constant(constant)) + } + Err(error) => { + state_log.push("parse error)".to_string()); + Err(error) + } + } + } + 5 => { + state_log.push("(force ".to_string()); + let term_option = Term::decode_debug(d, state_log); + match term_option { + Ok(term) => { + state_log.push(")".to_string()); + Ok(Term::Force(Rc::new(term))) + } + Err(error) => { + state_log.push(")".to_string()); + Err(error) + } + } + } + 6 => { + state_log.push("(error)".to_string()); + Ok(Term::Error) + } + 7 => { + state_log.push("(builtin ".to_string()); + + let builtin_option = DefaultFunction::decode(d); + match builtin_option { + Ok(builtin) => { + state_log.push(format!("{})", builtin)); + Ok(Term::Builtin(builtin)) + } + Err(error) => { + state_log.push("parse error)".to_string()); + Err(error) + } + } + } + x => { + state_log.push("parse error".to_string()); + + let buffer_slice: Vec = d + .buffer + .to_vec() + .iter() + .skip(if d.pos - 5 > 0 { d.pos - 5 } else { 0 }) + .take(10) + .cloned() + .collect(); + + Err(de::Error::Message(format!( + "Unknown term constructor tag: {}{} {:02X?} {} {} {} {}", + x, + ".\n\nHere are the buffer bytes (5 preceding) ", + buffer_slice, + "\n\nBuffer position is", + d.pos, + "and buffer length is", + d.buffer.len() + ))) + } + } + } +} + /// Integers are typically smaller so we save space /// by encoding them in 7 bits and this allows it to be byte alignment agnostic. /// Strings and bytestrings span multiple bytes so using bytestring is diff --git a/crates/uplc/src/pretty.rs b/crates/uplc/src/pretty.rs index a025cf55..b360805d 100644 --- a/crates/uplc/src/pretty.rs +++ b/crates/uplc/src/pretty.rs @@ -151,6 +151,28 @@ where } impl Constant { + pub fn to_pretty(&self) -> String { + let mut w = Vec::new(); + + self.to_doc().render(80, &mut w).unwrap(); + + String::from_utf8(w) + .unwrap() + .lines() + // This is a hack to deal with blank newlines + // that end up with a bunch of useless whitespace + // because of the nesting + .map(|l| { + if l.chars().all(|c| c.is_whitespace()) { + "".to_string() + } else { + l.to_string() + } + }) + .collect::>() + .join("\n") + } + fn to_doc(&self) -> RcDoc<()> { match self { Constant::Integer(i) => RcDoc::text("integer")