Optimize query algorithm

This commit is contained in:
Ajeet D'Souza 2020-05-15 19:40:54 +05:30
parent 08cf7dfa86
commit efd615b910
5 changed files with 46 additions and 42 deletions

7
Cargo.lock generated
View File

@ -141,6 +141,11 @@ name = "dunce"
version = "1.0.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "float-ord"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.14" version = "0.1.14"
@ -438,6 +443,7 @@ dependencies = [
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", "structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -463,6 +469,7 @@ dependencies = [
"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" "checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
"checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" "checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b"
"checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f" "checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f"
"checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e"
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
"checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4" "checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"

View File

@ -18,6 +18,7 @@ bincode = "1.2.1"
clap = "2.33.0" clap = "2.33.0"
dirs = "2.0.2" dirs = "2.0.2"
dunce = "1.0.0" dunce = "1.0.0"
float-ord = "0.2.0"
serde = { version = "1.0.106", features = ["derive"] } serde = { version = "1.0.106", features = ["derive"] }
structopt = "0.3.12" structopt = "0.3.12"
uuid = { version = "0.8.1", features = ["v4"] } uuid = { version = "0.8.1", features = ["v4"] }

View File

@ -4,7 +4,6 @@ use anyhow::{bail, Context, Result};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use uuid::Uuid; use uuid::Uuid;
use std::cmp::Ordering;
use std::fs::{self, File, OpenOptions}; use std::fs::{self, File, OpenOptions};
use std::io::{self, BufRead, BufReader, Write}; use std::io::{self, BufRead, BufReader, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -15,9 +14,9 @@ pub use i32 as DBVersion;
struct DbVersion(u32); struct DbVersion(u32);
pub struct Db { pub struct Db {
pub dirs: Vec<Dir>,
pub modified: bool,
data_dir: PathBuf, data_dir: PathBuf,
dirs: Vec<Dir>,
modified: bool,
} }
impl Db { impl Db {
@ -34,9 +33,9 @@ impl Db {
Ok(buffer) => buffer, Ok(buffer) => buffer,
Err(e) if e.kind() == io::ErrorKind::NotFound => { Err(e) if e.kind() == io::ErrorKind::NotFound => {
return Ok(Db { return Ok(Db {
data_dir,
modified: false,
dirs: Vec::new(), dirs: Vec::new(),
modified: false,
data_dir,
}) })
} }
Err(e) => { Err(e) => {
@ -48,9 +47,9 @@ impl Db {
if buffer.is_empty() { if buffer.is_empty() {
return Ok(Db { return Ok(Db {
data_dir,
modified: false,
dirs: Vec::new(), dirs: Vec::new(),
modified: false,
data_dir,
}); });
} }
@ -87,9 +86,9 @@ impl Db {
}; };
Ok(Db { Ok(Db {
data_dir,
modified: false,
dirs, dirs,
modified: false,
data_dir,
}) })
} }
@ -271,27 +270,6 @@ impl Db {
Ok(()) Ok(())
} }
pub fn query(&mut self, keywords: &[String], now: Epoch) -> Option<&Dir> {
let (idx, dir, _) = self
.dirs
.iter()
.enumerate()
.filter(|(_, dir)| dir.is_match(&keywords))
.map(|(idx, dir)| (idx, dir, dir.get_frecency(now)))
.max_by(|(_, _, frecency1), (_, _, frecency2)| {
frecency1.partial_cmp(frecency2).unwrap_or(Ordering::Equal)
})?;
if dir.is_dir() {
// FIXME: change this to Some(dir) once the MIR borrow checker comes to stable Rust
Some(&self.dirs[idx])
} else {
self.dirs.swap_remove(idx);
self.modified = true;
self.query(keywords, now)
}
}
pub fn query_many<'a>(&'a mut self, keywords: &'a [String]) -> impl Iterator<Item = &'a Dir> { pub fn query_many<'a>(&'a mut self, keywords: &'a [String]) -> impl Iterator<Item = &'a Dir> {
self.query_all() self.query_all()
.iter() .iter()
@ -300,7 +278,7 @@ impl Db {
pub fn query_all(&mut self) -> &[Dir] { pub fn query_all(&mut self) -> &[Dir] {
let orig_len = self.dirs.len(); let orig_len = self.dirs.len();
self.dirs.retain(Dir::is_dir); self.dirs.retain(Dir::is_valid);
if orig_len != self.dirs.len() { if orig_len != self.dirs.len() {
self.modified = true; self.modified = true;

View File

@ -13,8 +13,8 @@ pub struct Dir {
} }
impl Dir { impl Dir {
pub fn is_dir(&self) -> bool { pub fn is_valid(&self) -> bool {
self.path.is_dir() self.rank.is_finite() && self.rank >= 1.0 && self.path.is_dir()
} }
#[cfg(unix)] #[cfg(unix)]

View File

@ -1,6 +1,8 @@
use crate::db::Db;
use crate::util; use crate::util;
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use float_ord::FloatOrd;
use structopt::StructOpt; use structopt::StructOpt;
use std::io::{self, Write}; use std::io::{self, Write};
@ -19,7 +21,8 @@ impl Query {
let path_opt = if self.interactive { let path_opt = if self.interactive {
self.query_interactive()? self.query_interactive()?
} else { } else {
self.query()? let mut db = util::get_db()?;
self.query(&mut db)?
}; };
match path_opt { match path_opt {
@ -35,7 +38,8 @@ impl Query {
Ok(()) Ok(())
} }
fn query(&self) -> Result<Option<Vec<u8>>> { fn query(&self, db: &mut Db) -> Result<Option<Vec<u8>>> {
// if the input is already a valid path, simply return it
if let [path] = self.keywords.as_slice() { if let [path] = self.keywords.as_slice() {
if Path::new(path).is_dir() { if Path::new(path).is_dir() {
return Ok(Some(path.as_bytes().to_vec())); return Ok(Some(path.as_bytes().to_vec()));
@ -50,14 +54,28 @@ impl Query {
.map(|keyword| keyword.to_lowercase()) .map(|keyword| keyword.to_lowercase())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let path_opt = util::get_db()?.query(&keywords, now).map(|dir| { db.dirs
// `path_to_bytes` is guaranteed to succeed here since .sort_unstable_by_key(|dir| FloatOrd(dir.get_frecency(now)));
// the path has already been queried successfully
let path_bytes = util::path_to_bytes(&dir.path).unwrap();
path_bytes.to_vec()
});
Ok(path_opt) // Iterating in reverse order ensures that the directory indices do not
// change as we remove them.
for idx in (0..db.dirs.len()).rev() {
let dir = &db.dirs[idx];
if !dir.is_match(&keywords) {
continue;
}
if !dir.is_valid() {
db.dirs.swap_remove(idx);
db.modified = true;
continue;
}
let path = util::path_to_bytes(&dir.path)?.to_vec();
return Ok(Some(path));
}
Ok(None)
} }
fn query_interactive(&self) -> Result<Option<Vec<u8>>> { fn query_interactive(&self) -> Result<Option<Vec<u8>>> {