Add function to calculate lenvenshtein distance of two strings
Will be useful to make import or usage suggestions.
This commit is contained in:
parent
666761efef
commit
70b1ec4324
|
@ -0,0 +1,42 @@
|
|||
use std::cmp;
|
||||
|
||||
/// Calculate Levenshtein distance for two UTF-8 encoded strings.
|
||||
///
|
||||
/// Returns a minimum number of edits to transform from source to target string.
|
||||
///
|
||||
/// Levenshtein distance accepts three edit operations: insertion, deletion,
|
||||
/// and substitution.
|
||||
///
|
||||
/// References:
|
||||
///
|
||||
/// - [Levenshtein distance in Cargo][1]
|
||||
/// - [Ilia Schelokov: Optimizing loop heavy Rust code][2]
|
||||
///
|
||||
/// [1]: https://github.com/rust-lang/cargo/blob/7d7fe6797ad07f313706380d251796702272b150/src/cargo/util/lev_distance.rs
|
||||
/// [2]: https://thaumant.me/optimizing-loop-heavy-rust/
|
||||
pub fn distance(source: &str, target: &str) -> usize {
|
||||
if source.is_empty() {
|
||||
return target.len();
|
||||
}
|
||||
if target.is_empty() {
|
||||
return source.len();
|
||||
}
|
||||
|
||||
let mut distances = (0..=target.chars().count()).collect::<Vec<_>>();
|
||||
|
||||
for (i, ch1) in source.chars().enumerate() {
|
||||
let mut sub = i;
|
||||
distances[0] = sub + 1;
|
||||
for (j, ch2) in target.chars().enumerate() {
|
||||
let dist = cmp::min(
|
||||
cmp::min(distances[j], distances[j + 1]) + 1,
|
||||
sub + (ch1 != ch2) as usize,
|
||||
);
|
||||
|
||||
sub = distances[j + 1];
|
||||
distances[j + 1] = dist;
|
||||
}
|
||||
}
|
||||
|
||||
*distances.last().unwrap()
|
||||
}
|
|
@ -9,6 +9,7 @@ pub mod builder;
|
|||
pub mod builtins;
|
||||
pub mod expr;
|
||||
pub mod format;
|
||||
pub mod levenshtein;
|
||||
pub mod parser;
|
||||
pub mod pretty;
|
||||
pub mod tipo;
|
||||
|
|
Loading…
Reference in New Issue