diff --git a/src/db/stream.rs b/src/db/stream.rs index b047bb8..0daa6ea 100644 --- a/src/db/stream.rs +++ b/src/db/stream.rs @@ -86,7 +86,7 @@ impl<'a> Stream<'a> { let path = util::to_lowercase(path); let mut path = path.as_str(); - let (idx, end) = match rfind_component_match(path, keywords_last) { + let (idx, _end) = match rfind_component_match(path, keywords_last) { Some((idx, end)) => { if path[end..].contains(path::is_separator) { return false; @@ -181,22 +181,26 @@ fn rfind_component_match(path: &str, keyword: &str) -> Option<(usize, usize)> { return None; } - // Favor exact substring matches; fall back to fuzzy subsequence within a - // single component. Keywords that contain a path separator are matched - // literally to preserve existing slash semantics. - if !keyword.contains(path::is_separator) { - if let Some(idx) = path.rfind(keyword) { - return Some((idx, idx + keyword.len())); - } - } else if let Some(idx) = path.rfind(keyword) { + if keyword.contains(path::is_separator) { + return path.rfind(keyword).map(|idx| (idx, idx + keyword.len())); + } + + if let Some(idx) = path.rfind(keyword) { return Some((idx, idx + keyword.len())); } - // Fuzzy: rightmost component where keyword is a subsequence. + let keyword_len = keyword.chars().count(); + + // Fuzzy: rightmost component where keyword is a subsequence, or edit + // distance 1 (equal-length typo) within a single component. for (component_start, component) in rsplit_components_with_indices(path) { if let Some((start, end)) = subsequence_bounds(component, keyword) { return Some((component_start + start, component_start + end)); } + + if keyword_len == component.chars().count() && edit_distance_leq1(component, keyword) { + return Some((component_start, component_start + component.len())); + } } None @@ -245,6 +249,71 @@ fn subsequence_bounds(haystack: &str, needle: &str) -> Option<(usize, usize)> { None } +fn edit_distance_leq1(a: &str, b: &str) -> bool { + if a == b { + return true; + } + + let a_chars: Vec<_> = a.chars().collect(); + let b_chars: Vec<_> = b.chars().collect(); + let a_len = a_chars.len(); + let b_len = b_chars.len(); + + if (a_len as isize - b_len as isize).abs() > 1 { + return false; + } + + if a_len == b_len { + // Single substitution? + let mut diffs = 0; + for (ac, bc) in a_chars.iter().zip(b_chars.iter()) { + if ac != bc { + diffs += 1; + if diffs > 1 { + break; + } + } + } + if diffs == 1 { + return true; + } + + // Single adjacent transposition? + for i in 0..a_len - 1 { + if a_chars[i] != b_chars[i] { + return i + 1 < a_len + && a_chars[i] == b_chars[i + 1] + && a_chars[i + 1] == b_chars[i] + && a_chars[i + 2..] == b_chars[i + 2..] + && a_chars[..i] == b_chars[..i]; + } + } + + return false; + } + + // Lengths differ by exactly 1: check single insertion/deletion. + let (short, long) = if a_len < b_len { (&a_chars, &b_chars) } else { (&b_chars, &a_chars) }; + let mut i = 0; + let mut j = 0; + let mut edits = 0; + + while i < short.len() && j < long.len() { + if short[i] == long[j] { + i += 1; + j += 1; + } else { + edits += 1; + if edits > 1 { + return false; + } + j += 1; // skip one char in longer string + } + } + + true +} + #[cfg(test)] mod tests { use std::path::PathBuf; @@ -276,6 +345,8 @@ mod tests { // Fuzzy subsequence within component #[case(&["docs"], "/home/Documents", true)] #[case(&["dcmts"], "/home/Documents", true)] + // Typo tolerance (edit distance 1) + #[case(&["doucments"], "/home/Documents", true)] fn query(#[case] keywords: &[&str], #[case] path: &str, #[case] is_match: bool) { let db = &mut Database::new(PathBuf::new(), Vec::new(), |_| Vec::new(), false); let options = StreamOptions::new(0).with_keywords(keywords.iter());