diff --git a/crates/aiken-lang/src/lib.rs b/crates/aiken-lang/src/lib.rs index 3e7dc7cc..7c041339 100644 --- a/crates/aiken-lang/src/lib.rs +++ b/crates/aiken-lang/src/lib.rs @@ -10,6 +10,7 @@ pub mod expr; pub mod format; pub mod gen_uplc; pub mod levenshtein; +pub mod line_numbers; pub mod parser; pub mod pretty; pub mod tipo; diff --git a/crates/aiken-lang/src/line_numbers.rs b/crates/aiken-lang/src/line_numbers.rs new file mode 100644 index 00000000..643cd972 --- /dev/null +++ b/crates/aiken-lang/src/line_numbers.rs @@ -0,0 +1,126 @@ +use std::fmt::{self, Display}; + +#[derive(Debug)] +pub struct LineNumbers { + line_starts: Vec, + length: usize, + last: Option, +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub struct LineColumn { + pub line: usize, + pub column: usize, +} + +impl Display for LineColumn { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + f.write_str(&format!("L{};{}", self.line, self.column)) + } +} + +impl LineNumbers { + pub fn new(src: &str) -> Self { + let line_starts: Vec = std::iter::once(0) + .chain(src.match_indices('\n').map(|(i, _)| i + 1)) + .collect(); + + let length = src.len(); + + Self { + length, + last: line_starts.last().cloned(), + line_starts: if length > 0 { line_starts } else { Vec::new() }, + } + } + + /// Get the line number for a byte index + pub fn line_number(&self, byte_index: usize) -> Option { + self.line_starts + .binary_search(&byte_index) + .map(|l| Some(l + 1)) + .unwrap_or_else(|next_index| { + if Some(next_index) >= self.last { + None + } else { + Some(next_index) + } + }) + } + + pub fn line_and_column_number(&self, byte_index: usize) -> Option { + let line = self.line_number(byte_index)?; + let column = byte_index - self.line_starts.get(line - 1).copied().unwrap_or_default() + 1; + Some(LineColumn { line, column }) + } + + #[allow(dead_code)] + pub fn byte_index(&self, line: usize, character: usize) -> usize { + match self.line_starts.get(line) { + Some(line_index) => *line_index + character, + None => self.length, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chumsky::text::Character; + use indoc::indoc; + + fn assert_line_column(src: &str, ix: usize, lcol: Option) { + let lines = LineNumbers::new(src); + + println!("{lines:?}"); + + let byte = src + .as_bytes() + .get(ix) + .map(|b| { + if b.is_ascii() { + format!("{}", b.to_char()) + } else { + format!("{b}") + } + }) + .unwrap_or_else(|| "OUT-OF-BOUNDS".to_string()); + + assert_eq!( + lines.line_and_column_number(ix), + lcol, + "\n{src}\n--> at index {ix} ({byte})\n", + ); + } + + #[test] + fn out_of_range_byte_index() { + let src = indoc! { r#""# }; + assert_line_column(src, 42, None); + assert_line_column(src, 0, None); + } + + #[test] + fn basic() { + let src = indoc! { r#" + foo + bar + "# }; + + assert_line_column(src, 0, Some(LineColumn { line: 1, column: 1 })); + assert_line_column(src, 2, Some(LineColumn { line: 1, column: 3 })); + assert_line_column(src, 4, Some(LineColumn { line: 2, column: 1 })); + } + + #[test] + fn unicode() { + let src = indoc! { r#" + 💩 + foo + "# }; + + assert_line_column(src, 0, Some(LineColumn { line: 1, column: 1 })); + assert_line_column(src, 2, Some(LineColumn { line: 1, column: 3 })); + assert_line_column(src, 5, Some(LineColumn { line: 2, column: 1 })); + } +} diff --git a/crates/aiken-lsp/src/edits.rs b/crates/aiken-lsp/src/edits.rs index 2fc53741..0b035309 100644 --- a/crates/aiken-lsp/src/edits.rs +++ b/crates/aiken-lsp/src/edits.rs @@ -1,5 +1,8 @@ -use crate::{line_numbers::LineNumbers, utils::span_to_lsp_range}; -use aiken_lang::ast::{Definition, ModuleKind, Span, UntypedDefinition, Use}; +use crate::utils::span_to_lsp_range; +use aiken_lang::{ + ast::{Definition, ModuleKind, Span, UntypedDefinition, Use}, + line_numbers::LineNumbers, +}; use aiken_project::module::CheckedModule; use itertools::Itertools; use std::fs; diff --git a/crates/aiken-lsp/src/lib.rs b/crates/aiken-lsp/src/lib.rs index b627081f..b1683aa6 100644 --- a/crates/aiken-lsp/src/lib.rs +++ b/crates/aiken-lsp/src/lib.rs @@ -7,7 +7,6 @@ use std::env; mod cast; mod edits; pub mod error; -mod line_numbers; mod quickfix; pub mod server; mod utils; diff --git a/crates/aiken-lsp/src/line_numbers.rs b/crates/aiken-lsp/src/line_numbers.rs deleted file mode 100644 index 395c424c..00000000 --- a/crates/aiken-lsp/src/line_numbers.rs +++ /dev/null @@ -1,48 +0,0 @@ -#[allow(dead_code)] -#[derive(Debug)] -pub struct LineNumbers { - line_starts: Vec, - length: usize, -} - -impl LineNumbers { - pub fn new(src: &str) -> Self { - Self { - length: src.len(), - line_starts: std::iter::once(0) - .chain(src.match_indices('\n').map(|(i, _)| i + 1)) - .collect(), - } - } - - /// Get the line number for a byte index - pub fn line_number(&self, byte_index: usize) -> usize { - self.line_starts - .binary_search(&byte_index) - .unwrap_or_else(|next_line| next_line - 1) - + 1 - } - - // TODO: handle unicode characters that may be more than 1 byte in width - pub fn line_and_column_number(&self, byte_index: usize) -> LineColumn { - let line = self.line_number(byte_index); - let column = byte_index - self.line_starts.get(line - 1).copied().unwrap_or_default() + 1; - LineColumn { line, column } - } - - // TODO: handle unicode characters that may be more than 1 byte in width - /// 0 indexed line and character to byte index - #[allow(dead_code)] - pub fn byte_index(&self, line: usize, character: usize) -> usize { - match self.line_starts.get(line) { - Some(line_index) => *line_index + character, - None => self.length, - } - } -} - -#[derive(Debug, Clone, Copy)] -pub struct LineColumn { - pub line: usize, - pub column: usize, -} diff --git a/crates/aiken-lsp/src/server.rs b/crates/aiken-lsp/src/server.rs index 7d02331e..f2b1300c 100644 --- a/crates/aiken-lsp/src/server.rs +++ b/crates/aiken-lsp/src/server.rs @@ -8,6 +8,7 @@ use std::{ use aiken_lang::{ ast::{Definition, Located, ModuleKind, Span, Use}, error::ExtraData, + line_numbers::LineNumbers, parser, tipo::pretty::Printer, }; @@ -35,7 +36,6 @@ use miette::Diagnostic; use crate::{ cast::{cast_notification, cast_request}, error::Error as ServerError, - line_numbers::LineNumbers, quickfix, utils::{ path_to_uri, span_to_lsp_range, text_edit_replace, uri_to_module_name, diff --git a/crates/aiken-lsp/src/server/lsp_project.rs b/crates/aiken-lsp/src/server/lsp_project.rs index b2c8ac70..1134689f 100644 --- a/crates/aiken-lsp/src/server/lsp_project.rs +++ b/crates/aiken-lsp/src/server/lsp_project.rs @@ -1,10 +1,8 @@ use std::{collections::HashMap, path::PathBuf}; -use aiken_lang::ast::Tracing; +use aiken_lang::{ast::Tracing, line_numbers::LineNumbers}; use aiken_project::{config::Config, error::Error as ProjectError, module::CheckedModule, Project}; -use crate::line_numbers::LineNumbers; - #[derive(Debug)] pub struct SourceInfo { /// The path to the source file from within the project root diff --git a/crates/aiken-lsp/src/utils.rs b/crates/aiken-lsp/src/utils.rs index 54411d1a..fd6d445a 100644 --- a/crates/aiken-lsp/src/utils.rs +++ b/crates/aiken-lsp/src/utils.rs @@ -1,11 +1,11 @@ use std::path::{Path, PathBuf}; -use aiken_lang::ast::Span; +use aiken_lang::{ast::Span, line_numbers::LineNumbers}; use itertools::Itertools; use lsp_types::TextEdit; use urlencoding::decode; -use crate::{error::Error, line_numbers::LineNumbers}; +use crate::error::Error; pub const COMPILING_PROGRESS_TOKEN: &str = "compiling-aiken"; pub const CREATE_COMPILING_PROGRESS_TOKEN: &str = "create-compiling-progress-token"; @@ -37,8 +37,12 @@ pub fn path_to_uri(path: PathBuf) -> Result { } pub fn span_to_lsp_range(location: Span, line_numbers: &LineNumbers) -> lsp_types::Range { - let start = line_numbers.line_and_column_number(location.start); - let end = line_numbers.line_and_column_number(location.end); + let start = line_numbers + .line_and_column_number(location.start) + .expect("Spans are within bounds"); + let end = line_numbers + .line_and_column_number(location.end) + .expect("Spans are within bounds"); lsp_types::Range { start: lsp_types::Position {