From efd615b910563ca66c8595ca23ebd4fbbe56a685 Mon Sep 17 00:00:00 2001 From: Ajeet D'Souza <98ajeet@gmail.com> Date: Fri, 15 May 2020 19:40:54 +0530 Subject: [PATCH] Optimize query algorithm --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + src/db.rs | 40 +++++++++------------------------------- src/dir.rs | 4 ++-- src/subcommand/query.rs | 36 +++++++++++++++++++++++++++--------- 5 files changed, 46 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7cdb83e..f5ac3fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,11 @@ name = "dunce" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "float-ord" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "getrandom" version = "0.1.14" @@ -438,6 +443,7 @@ dependencies = [ "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -463,6 +469,7 @@ dependencies = [ "checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" "checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" "checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f" +"checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e" "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4" diff --git a/Cargo.toml b/Cargo.toml index 9c79762..e47927a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ bincode = "1.2.1" clap = "2.33.0" dirs = "2.0.2" dunce = "1.0.0" +float-ord = "0.2.0" serde = { version = "1.0.106", features = ["derive"] } structopt = "0.3.12" uuid = { version = "0.8.1", features = ["v4"] } diff --git a/src/db.rs b/src/db.rs index 0c5422a..ffbc941 100644 --- a/src/db.rs +++ b/src/db.rs @@ -4,7 +4,6 @@ use anyhow::{bail, Context, Result}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use std::cmp::Ordering; use std::fs::{self, File, OpenOptions}; use std::io::{self, BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; @@ -15,9 +14,9 @@ pub use i32 as DBVersion; struct DbVersion(u32); pub struct Db { + pub dirs: Vec, + pub modified: bool, data_dir: PathBuf, - dirs: Vec, - modified: bool, } impl Db { @@ -34,9 +33,9 @@ impl Db { Ok(buffer) => buffer, Err(e) if e.kind() == io::ErrorKind::NotFound => { return Ok(Db { - data_dir, - modified: false, dirs: Vec::new(), + modified: false, + data_dir, }) } Err(e) => { @@ -48,9 +47,9 @@ impl Db { if buffer.is_empty() { return Ok(Db { - data_dir, - modified: false, dirs: Vec::new(), + modified: false, + data_dir, }); } @@ -87,9 +86,9 @@ impl Db { }; Ok(Db { - data_dir, - modified: false, dirs, + modified: false, + data_dir, }) } @@ -271,27 +270,6 @@ impl Db { Ok(()) } - pub fn query(&mut self, keywords: &[String], now: Epoch) -> Option<&Dir> { - let (idx, dir, _) = self - .dirs - .iter() - .enumerate() - .filter(|(_, dir)| dir.is_match(&keywords)) - .map(|(idx, dir)| (idx, dir, dir.get_frecency(now))) - .max_by(|(_, _, frecency1), (_, _, frecency2)| { - frecency1.partial_cmp(frecency2).unwrap_or(Ordering::Equal) - })?; - - if dir.is_dir() { - // FIXME: change this to Some(dir) once the MIR borrow checker comes to stable Rust - Some(&self.dirs[idx]) - } else { - self.dirs.swap_remove(idx); - self.modified = true; - self.query(keywords, now) - } - } - pub fn query_many<'a>(&'a mut self, keywords: &'a [String]) -> impl Iterator { self.query_all() .iter() @@ -300,7 +278,7 @@ impl Db { pub fn query_all(&mut self) -> &[Dir] { let orig_len = self.dirs.len(); - self.dirs.retain(Dir::is_dir); + self.dirs.retain(Dir::is_valid); if orig_len != self.dirs.len() { self.modified = true; diff --git a/src/dir.rs b/src/dir.rs index b07cd53..dc03447 100644 --- a/src/dir.rs +++ b/src/dir.rs @@ -13,8 +13,8 @@ pub struct Dir { } impl Dir { - pub fn is_dir(&self) -> bool { - self.path.is_dir() + pub fn is_valid(&self) -> bool { + self.rank.is_finite() && self.rank >= 1.0 && self.path.is_dir() } #[cfg(unix)] diff --git a/src/subcommand/query.rs b/src/subcommand/query.rs index 30d04a2..49e7366 100644 --- a/src/subcommand/query.rs +++ b/src/subcommand/query.rs @@ -1,6 +1,8 @@ +use crate::db::Db; use crate::util; use anyhow::{bail, Result}; +use float_ord::FloatOrd; use structopt::StructOpt; use std::io::{self, Write}; @@ -19,7 +21,8 @@ impl Query { let path_opt = if self.interactive { self.query_interactive()? } else { - self.query()? + let mut db = util::get_db()?; + self.query(&mut db)? }; match path_opt { @@ -35,7 +38,8 @@ impl Query { Ok(()) } - fn query(&self) -> Result>> { + fn query(&self, db: &mut Db) -> Result>> { + // if the input is already a valid path, simply return it if let [path] = self.keywords.as_slice() { if Path::new(path).is_dir() { return Ok(Some(path.as_bytes().to_vec())); @@ -50,14 +54,28 @@ impl Query { .map(|keyword| keyword.to_lowercase()) .collect::>(); - let path_opt = util::get_db()?.query(&keywords, now).map(|dir| { - // `path_to_bytes` is guaranteed to succeed here since - // the path has already been queried successfully - let path_bytes = util::path_to_bytes(&dir.path).unwrap(); - path_bytes.to_vec() - }); + db.dirs + .sort_unstable_by_key(|dir| FloatOrd(dir.get_frecency(now))); - Ok(path_opt) + // Iterating in reverse order ensures that the directory indices do not + // change as we remove them. + for idx in (0..db.dirs.len()).rev() { + let dir = &db.dirs[idx]; + if !dir.is_match(&keywords) { + continue; + } + + if !dir.is_valid() { + db.dirs.swap_remove(idx); + db.modified = true; + continue; + } + + let path = util::path_to_bytes(&dir.path)?.to_vec(); + return Ok(Some(path)); + } + + Ok(None) } fn query_interactive(&self) -> Result>> {