Emit warning when detecting an hex string interpreted as UTF-8 bytes.
This will probably save people minutes/hours of puzzled debugging. This is only a warning because there may be cases where one do actually want to specify an hex-encoded bytearray. In which case, they can get rid of the warning by using the plain bytearray syntax (i.e. as an array of bytes).
This commit is contained in:
parent
d72e13c7c8
commit
78770d14b7
|
@ -305,3 +305,17 @@ fn trace_if_false_ko() {
|
||||||
Err((_, Error::CouldNotUnify { .. }))
|
Err((_, Error::CouldNotUnify { .. }))
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn utf8_hex_literal_warning() {
|
||||||
|
let source_code = r#"
|
||||||
|
pub const policy_id = "f43a62fdc3965df486de8a0d32fe800963589c41b38946602a0dc535"
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let (warnings, _) = check(parse(source_code)).unwrap();
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
warnings[0],
|
||||||
|
Warning::Utf8ByteArrayIsValidHexString { .. }
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
|
@ -1244,6 +1244,31 @@ pub enum Warning {
|
||||||
#[label("unused")]
|
#[label("unused")]
|
||||||
location: Span,
|
location: Span,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
#[error(
|
||||||
|
"I noticed a suspicious {type_ByteArray} UTF-8 literal which resembles a hash digest.",
|
||||||
|
type_ByteArray = "ByteArray".bold().bright_blue()
|
||||||
|
)]
|
||||||
|
#[diagnostic(help("{}", formatdoc! {
|
||||||
|
r#"When you specify a {type_ByteArray} literal using plain double-quotes, it's interpreted as an array of UTF-8 bytes. For example, the literal {literal_foo} is interpreted as the byte sequence {foo_bytes}.
|
||||||
|
|
||||||
|
However here, you have specified a literal that resembles a hash digest encoded as an hexadecimal string. This is a common case, but you probably want to capture the raw bytes represented by this sequence, and not the hexadecimal sequence. Fear not! Aiken provides a convenient syntax for that: just prefix the literal with {symbol_hash}. This will decode the hexadecimal string for you and capture the non-encoded bytes as a {type_ByteArray}.
|
||||||
|
|
||||||
|
╰─▶ {symbol_hash}{value}
|
||||||
|
"#,
|
||||||
|
type_ByteArray = "ByteArray".bold().bright_blue(),
|
||||||
|
literal_foo = "\"foo\"".purple(),
|
||||||
|
foo_bytes = "#[102, 111, 111]".purple(),
|
||||||
|
value = "\"{value}\"".purple(),
|
||||||
|
symbol_hash = "#".purple(),
|
||||||
|
}))]
|
||||||
|
#[diagnostic(code("syntax::bytearray_literal_is_hex_string"))]
|
||||||
|
#[diagnostic(url("https://aiken-lang.org/language-tour/primitive-types#bytearray"))]
|
||||||
|
Utf8ByteArrayIsValidHexString {
|
||||||
|
#[label("missing '#' to decode hex string")]
|
||||||
|
location: Span,
|
||||||
|
value: String,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
|
|
@ -4,11 +4,11 @@ use vec1::Vec1;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
ast::{
|
ast::{
|
||||||
Annotation, Arg, ArgName, AssignmentKind, BinOp, CallArg, Clause, ClauseGuard, Constant,
|
Annotation, Arg, ArgName, AssignmentKind, BinOp, ByteArrayFormatPreference, CallArg,
|
||||||
IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing, TypedArg, TypedCallArg,
|
Clause, ClauseGuard, Constant, IfBranch, RecordUpdateSpread, Span, TraceKind, Tracing,
|
||||||
TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern, TypedRecordUpdateArg,
|
TypedArg, TypedCallArg, TypedClause, TypedClauseGuard, TypedIfBranch, TypedMultiPattern,
|
||||||
UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch, UntypedMultiPattern,
|
TypedRecordUpdateArg, UnOp, UntypedArg, UntypedClause, UntypedClauseGuard, UntypedIfBranch,
|
||||||
UntypedPattern, UntypedRecordUpdateArg,
|
UntypedMultiPattern, UntypedPattern, UntypedRecordUpdateArg,
|
||||||
},
|
},
|
||||||
builtins::{bool, byte_array, function, int, list, string, tuple},
|
builtins::{bool, byte_array, function, int, list, string, tuple},
|
||||||
expr::{TypedExpr, UntypedExpr},
|
expr::{TypedExpr, UntypedExpr},
|
||||||
|
@ -351,8 +351,10 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
|
||||||
} => self.infer_tuple_index(*tuple, index, location),
|
} => self.infer_tuple_index(*tuple, index, location),
|
||||||
|
|
||||||
UntypedExpr::ByteArray {
|
UntypedExpr::ByteArray {
|
||||||
location, bytes, ..
|
bytes,
|
||||||
} => Ok(self.infer_byte_array(bytes, location)),
|
preferred_format,
|
||||||
|
location,
|
||||||
|
} => self.infer_bytearray(bytes, preferred_format, location),
|
||||||
|
|
||||||
UntypedExpr::RecordUpdate {
|
UntypedExpr::RecordUpdate {
|
||||||
location,
|
location,
|
||||||
|
@ -373,12 +375,27 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn infer_byte_array(&mut self, bytes: Vec<u8>, location: Span) -> TypedExpr {
|
fn infer_bytearray(
|
||||||
TypedExpr::ByteArray {
|
&mut self,
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
preferred_format: ByteArrayFormatPreference,
|
||||||
|
location: Span,
|
||||||
|
) -> Result<TypedExpr, Error> {
|
||||||
|
if let ByteArrayFormatPreference::Utf8String = preferred_format {
|
||||||
|
let value = String::from_utf8(bytes.clone()).unwrap();
|
||||||
|
let is_hex_string = hex::decode(&value).is_ok();
|
||||||
|
if bytes.len() >= 56 && is_hex_string {
|
||||||
|
self.environment
|
||||||
|
.warnings
|
||||||
|
.push(Warning::Utf8ByteArrayIsValidHexString { location, value });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(TypedExpr::ByteArray {
|
||||||
location,
|
location,
|
||||||
bytes,
|
bytes,
|
||||||
tipo: byte_array(),
|
tipo: byte_array(),
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn infer_trace_if_false(
|
fn infer_trace_if_false(
|
||||||
|
@ -1357,11 +1374,14 @@ impl<'a, 'b> ExprTyper<'a, 'b> {
|
||||||
location,
|
location,
|
||||||
bytes,
|
bytes,
|
||||||
preferred_format,
|
preferred_format,
|
||||||
} => Ok(Constant::ByteArray {
|
} => {
|
||||||
|
let _ = self.infer_bytearray(bytes.clone(), preferred_format, location)?;
|
||||||
|
Ok(Constant::ByteArray {
|
||||||
location,
|
location,
|
||||||
bytes,
|
bytes,
|
||||||
preferred_format,
|
preferred_format,
|
||||||
}),
|
})
|
||||||
|
}
|
||||||
}?;
|
}?;
|
||||||
|
|
||||||
// Check type annotation is accurate.
|
// Check type annotation is accurate.
|
||||||
|
|
Loading…
Reference in New Issue