Emit warning when detecting an hex string interpreted as UTF-8 bytes.

This will probably save people minutes/hours of puzzled debugging. This is only a warning because there may be cases where one do actually want to specify an hex-encoded bytearray. In which case, they can get rid of the warning by using the plain bytearray syntax (i.e. as an array of bytes).
This commit is contained in:
KtorZ 2023-02-18 11:36:45 +01:00
parent d72e13c7c8
commit 78770d14b7
No known key found for this signature in database
GPG Key ID: 33173CB6F77F4277
3 changed files with 74 additions and 15 deletions

View File

@ -305,3 +305,17 @@ fn trace_if_false_ko() {
Err((_, Error::CouldNotUnify { .. })) Err((_, Error::CouldNotUnify { .. }))
)) ))
} }
#[test]
fn utf8_hex_literal_warning() {
let source_code = r#"
pub const policy_id = "f43a62fdc3965df486de8a0d32fe800963589c41b38946602a0dc535"
"#;
let (warnings, _) = check(parse(source_code)).unwrap();
assert!(matches!(
warnings[0],
Warning::Utf8ByteArrayIsValidHexString { .. }
))
}

View File

@ -1244,6 +1244,31 @@ pub enum Warning {
#[label("unused")] #[label("unused")]
location: Span, location: Span,
}, },
#[error(
"I noticed a suspicious {type_ByteArray} UTF-8 literal which resembles a hash digest.",
type_ByteArray = "ByteArray".bold().bright_blue()
)]
#[diagnostic(help("{}", formatdoc! {
r#"When you specify a {type_ByteArray} literal using plain double-quotes, it's interpreted as an array of UTF-8 bytes. For example, the literal {literal_foo} is interpreted as the byte sequence {foo_bytes}.
However here, you have specified a literal that resembles a hash digest encoded as an hexadecimal string. This is a common case, but you probably want to capture the raw bytes represented by this sequence, and not the hexadecimal sequence. Fear not! Aiken provides a convenient syntax for that: just prefix the literal with {symbol_hash}. This will decode the hexadecimal string for you and capture the non-encoded bytes as a {type_ByteArray}.
{symbol_hash}{value}
"#,
type_ByteArray = "ByteArray".bold().bright_blue(),
literal_foo = "\"foo\"".purple(),
foo_bytes = "#[102, 111, 111]".purple(),
value = "\"{value}\"".purple(),
symbol_hash = "#".purple(),
}))]
#[diagnostic(code("syntax::bytearray_literal_is_hex_string"))]
#[diagnostic(url("https://aiken-lang.org/language-tour/primitive-types#bytearray"))]
Utf8ByteArrayIsValidHexString {
#[label("missing '#' to decode hex string")]
location: Span,
value: String,
},
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]

View File

@ -4,11 +4,11 @@ use vec1::Vec1;
use crate::{ use crate::{
ast::{ ast::{
Annotation, Arg, ArgName, AssignmentKind, BinOp, CallArg, Clause, ClauseGuard, Constant, Annotation, Arg, ArgName, AssignmentKind, BinOp, ByteArrayFormatPreference, CallArg,
IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing, TypedArg, TypedCallArg, Clause, ClauseGuard, Constant, IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing,
TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern, TypedRecordUpdateArg, TypedArg, TypedCallArg, TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern,
UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch, UntypedMultiPattern, TypedRecordUpdateArg, UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch,
UntypedPattern, UntypedRecordUpdateArg, UntypedMultiPattern, UntypedPattern, UntypedRecordUpdateArg,
}, },
builtins::{bool, byte_array, function, int, list, string, tuple}, builtins::{bool, byte_array, function, int, list, string, tuple},
expr::{TypedExpr, UntypedExpr}, expr::{TypedExpr, UntypedExpr},
@ -351,8 +351,10 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
} => self.infer_tuple_index(*tuple, index, location), } => self.infer_tuple_index(*tuple, index, location),
UntypedExpr::ByteArray { UntypedExpr::ByteArray {
location, bytes, .. bytes,
} => Ok(self.infer_byte_array(bytes, location)), preferred_format,
location,
} => self.infer_bytearray(bytes, preferred_format, location),
UntypedExpr::RecordUpdate { UntypedExpr::RecordUpdate {
location, location,
@ -373,12 +375,27 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
} }
} }
fn infer_byte_array(&mut self, bytes: Vec<u8>, location: Span) -> TypedExpr { fn infer_bytearray(
TypedExpr::ByteArray { &mut self,
bytes: Vec<u8>,
preferred_format: ByteArrayFormatPreference,
location: Span,
) -> Result<TypedExpr, Error> {
if let ByteArrayFormatPreference::Utf8String = preferred_format {
let value = String::from_utf8(bytes.clone()).unwrap();
let is_hex_string = hex::decode(&value).is_ok();
if bytes.len() >= 56 && is_hex_string {
self.environment
.warnings
.push(Warning::Utf8ByteArrayIsValidHexString { location, value });
}
}
Ok(TypedExpr::ByteArray {
location, location,
bytes, bytes,
tipo: byte_array(), tipo: byte_array(),
} })
} }
fn infer_trace_if_false( fn infer_trace_if_false(
@ -1357,11 +1374,14 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
location, location,
bytes, bytes,
preferred_format, preferred_format,
} => Ok(Constant::ByteArray { } => {
let _ = self.infer_bytearray(bytes.clone(), preferred_format, location)?;
Ok(Constant::ByteArray {
location, location,
bytes, bytes,
preferred_format, preferred_format,
}), })
}
}?; }?;
// Check type annotation is accurate. // Check type annotation is accurate.