Add typo-tolerant fuzzy matching

2025-11-17 17:34:35 +09:00 · 2025-11-17 17:34:35 +09:00 · 97f9d8a61d
parent 11d7dfa28d
commit 97f9d8a61d
1 changed files with 81 additions and 10 deletions
--- a/src/db/stream.rs
+++ b/src/db/stream.rs
@ -86,7 +86,7 @@ impl<'a> Stream<'a> {
        let path = util::to_lowercase(path);
        let mut path = path.as_str();
-        let (idx, end) = match rfind_component_match(path, keywords_last) {
+        let (idx, _end) = match rfind_component_match(path, keywords_last) {
            Some((idx, end)) => {
                if path[end..].contains(path::is_separator) {
                    return false;
@ -181,22 +181,26 @@ fn rfind_component_match(path: &str, keyword: &str) -> Option<(usize, usize)> {
        return None;
    }
-    // Favor exact substring matches; fall back to fuzzy subsequence within a
+    if keyword.contains(path::is_separator) {
-    // single component. Keywords that contain a path separator are matched
+        return path.rfind(keyword).map(|idx| (idx, idx + keyword.len()));
-    // literally to preserve existing slash semantics.
+    }
-    if !keyword.contains(path::is_separator) {
+
-        if let Some(idx) = path.rfind(keyword) {
+    if let Some(idx) = path.rfind(keyword) {
            return Some((idx, idx + keyword.len()));
        }
    } else if let Some(idx) = path.rfind(keyword) {
        return Some((idx, idx + keyword.len()));
    }
-    // Fuzzy: rightmost component where keyword is a subsequence.
+    let keyword_len = keyword.chars().count();
    // Fuzzy: rightmost component where keyword is a subsequence, or edit
    // distance 1 (equal-length typo) within a single component.
    for (component_start, component) in rsplit_components_with_indices(path) {
        if let Some((start, end)) = subsequence_bounds(component, keyword) {
            return Some((component_start + start, component_start + end));
        }
        if keyword_len == component.chars().count() && edit_distance_leq1(component, keyword) {
            return Some((component_start, component_start + component.len()));
        }
    }
    None
@ -245,6 +249,71 @@ fn subsequence_bounds(haystack: &str, needle: &str) -> Option<(usize, usize)> {
    None
 }
 fn edit_distance_leq1(a: &str, b: &str) -> bool {
    if a == b {
        return true;
    }
    let a_chars: Vec<_> = a.chars().collect();
    let b_chars: Vec<_> = b.chars().collect();
    let a_len = a_chars.len();
    let b_len = b_chars.len();
    if (a_len as isize - b_len as isize).abs() > 1 {
        return false;
    }
    if a_len == b_len {
        // Single substitution?
        let mut diffs = 0;
        for (ac, bc) in a_chars.iter().zip(b_chars.iter()) {
            if ac != bc {
                diffs += 1;
                if diffs > 1 {
                    break;
                }
            }
        }
        if diffs == 1 {
            return true;
        }
        // Single adjacent transposition?
        for i in 0..a_len - 1 {
            if a_chars[i] != b_chars[i] {
                return i + 1 < a_len
                    && a_chars[i] == b_chars[i + 1]
                    && a_chars[i + 1] == b_chars[i]
                    && a_chars[i + 2..] == b_chars[i + 2..]
                    && a_chars[..i] == b_chars[..i];
            }
        }
        return false;
    }
    // Lengths differ by exactly 1: check single insertion/deletion.
    let (short, long) = if a_len < b_len { (&a_chars, &b_chars) } else { (&b_chars, &a_chars) };
    let mut i = 0;
    let mut j = 0;
    let mut edits = 0;
    while i < short.len() && j < long.len() {
        if short[i] == long[j] {
            i += 1;
            j += 1;
        } else {
            edits += 1;
            if edits > 1 {
                return false;
            }
            j += 1; // skip one char in longer string
        }
    }
    true
 }
 #[cfg(test)]
 mod tests {
    use std::path::PathBuf;
@ -276,6 +345,8 @@ mod tests {
    // Fuzzy subsequence within component
    #[case(&["docs"], "/home/Documents", true)]
    #[case(&["dcmts"], "/home/Documents", true)]
    // Typo tolerance (edit distance 1)
    #[case(&["doucments"], "/home/Documents", true)]
    fn query(#[case] keywords: &[&str], #[case] path: &str, #[case] is_match: bool) {
        let db = &mut Database::new(PathBuf::new(), Vec::new(), |_| Vec::new(), false);
        let options = StreamOptions::new(0).with_keywords(keywords.iter());