From 6c1da6ec96692b5515b775626c9329f4227f4295 Mon Sep 17 00:00:00 2001 From: Ajeet D'Souza <98ajeet@gmail.com> Date: Sun, 11 Dec 2022 21:13:05 +0530 Subject: [PATCH] Rename --- CHANGELOG.md | 3 +- src/cmd/add.rs | 4 +- src/cmd/edit.rs | 54 ++++--- src/cmd/import.rs | 14 +- src/cmd/query.rs | 2 +- src/cmd/remove.rs | 4 +- src/config.rs | 2 +- src/db/dir.rs | 130 +---------------- src/db/mod.rs | 293 ++++++++++++++++++++++++++++++------- src/db2/dir.rs | 160 +++++++++++++++++++++ src/db2/mod.rs | 75 ++++++++++ src/{db => db2}/stream.rs | 9 +- src/main.rs | 2 +- src/store/mod.rs | 296 -------------------------------------- src/util.rs | 92 +++++++++++- 15 files changed, 614 insertions(+), 526 deletions(-) create mode 100644 src/db2/dir.rs create mode 100644 src/db2/mod.rs rename src/{db => db2}/stream.rs (94%) delete mode 100644 src/store/mod.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index f103d62..abfcee7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,8 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Zsh: don't hide output from `chpwd` hooks. - Nushell: upgrade minimum supported version to v0.73.0. - Zsh: fix extra space in interactive completions when no match is found. -- Fzf: `` now cycles through completions. -- Fzf: enable colors in preview when possible on macOS / BSD. +- Fzf: various improvements. ### Removed diff --git a/src/cmd/add.rs b/src/cmd/add.rs index 48a448f..b7ba404 100644 --- a/src/cmd/add.rs +++ b/src/cmd/add.rs @@ -3,7 +3,7 @@ use std::path::Path; use anyhow::{bail, Result}; use crate::cmd::{Add, Run}; -use crate::store::Store; +use crate::db::Database; use crate::{config, util}; impl Run for Add { @@ -16,7 +16,7 @@ impl Run for Add { let max_age = config::maxage()?; let now = util::current_time()?; - let mut db = Store::open()?; + let mut db = Database::open()?; for path in &self.paths { let path = if config::resolve_symlinks() { util::canonicalize } else { util::resolve_path }(path)?; diff --git a/src/cmd/edit.rs b/src/cmd/edit.rs index 674b01e..2b93a5c 100644 --- a/src/cmd/edit.rs +++ b/src/cmd/edit.rs @@ -1,39 +1,35 @@ use std::io::{self, Write}; -use std::process::Command; use anyhow::Result; use crate::cmd::{Edit, EditCommand, Run}; -use crate::store::{Epoch, Store}; -use crate::util; +use crate::db::{Database, Epoch}; +use crate::error::BrokenPipeHandler; +use crate::util::{self, Fz}; impl Run for Edit { fn run(&self) -> Result<()> { let now = util::current_time()?; - let db = &mut Store::open()?; + let db = &mut Database::open()?; match &self.cmd { - Some(EditCommand::Decrement { path }) => { - db.add(path, -1.0, now); + Some(cmd) => { + match cmd { + EditCommand::Decrement { path } => db.add(path, -1.0, now), + EditCommand::Delete { path } => { + db.remove(path); + } + EditCommand::Increment { path } => db.add(path, 1.0, now), + EditCommand::Reload => {} + } db.save()?; - print_dirs(db, now); + print_dirs(db, now) } - Some(EditCommand::Delete { path }) => { - db.remove(path); - db.save()?; - print_dirs(db, now); - } - Some(EditCommand::Increment { path }) => { - db.add(path, 1.0, now); - db.save()?; - print_dirs(db, now); - } - Some(EditCommand::Reload) => print_dirs(db, now), None => { db.sort_by_score(now); db.save()?; - let mut fzf = Command::new("fzf"); + let mut fzf = Fz::new()?; fzf.args([ // Search mode "--delimiter=\t", @@ -54,18 +50,18 @@ enter:abort", "--keep-right", // Layout "--border=rounded", - "--border-label= zoxide-edit ", + "--border-label= zoxide-edit ", "--header=\ ctrl-r:reload \tctrl-w:delete ctrl-a:increment\tctrl-d:decrement -SCORE\tPATH", + SCORE\tPATH", "--info=inline", "--layout=reverse", - "--padding=1", + "--padding=1,0,0,0", // Display "--color=label:bold", - "--tabstop=2", + "--tabstop=1", // Scripting "--read0", ]) @@ -85,18 +81,18 @@ SCORE\tPATH", fzf.args([PREVIEW_ARG, "--preview-window=down,30%"]).env("SHELL", "sh"); } - let mut fzf = fzf.spawn().unwrap(); - fzf.wait().unwrap(); + let mut fzf = fzf.spawn()?; + fzf.wait() } } - - Ok(()) } } -fn print_dirs(db: &Store, now: Epoch) { +fn print_dirs(db: &Database, now: Epoch) -> Result<()> { let stdout = &mut io::stdout().lock(); for dir in db.dirs().iter().rev() { - write!(stdout, "{:>5}\t{}\x00", dir.score(now), &dir.path).unwrap(); + let score = dir.score(now).clamp(0.0, 9999.0); + write!(stdout, "{:>6.1}\t{}\x00", score, &dir.path).pipe_exit("fzf")?; } + Ok(()) } diff --git a/src/cmd/import.rs b/src/cmd/import.rs index 8771d5f..3a2c206 100644 --- a/src/cmd/import.rs +++ b/src/cmd/import.rs @@ -3,14 +3,14 @@ use std::fs; use anyhow::{bail, Context, Result}; use crate::cmd::{Import, ImportFrom, Run}; -use crate::store::Store; +use crate::db::Database; impl Run for Import { fn run(&self) -> Result<()> { let buffer = fs::read_to_string(&self.path) .with_context(|| format!("could not open database for importing: {}", &self.path.display()))?; - let mut db = Store::open()?; + let mut db = Database::open()?; if !self.merge && !db.dirs().is_empty() { bail!("current database is not empty, specify --merge to continue anyway"); } @@ -25,7 +25,7 @@ impl Run for Import { } } -fn import_autojump(db: &mut Store, buffer: &str) -> Result<()> { +fn import_autojump(db: &mut Database, buffer: &str) -> Result<()> { for line in buffer.lines() { if line.is_empty() { continue; @@ -49,7 +49,7 @@ fn import_autojump(db: &mut Store, buffer: &str) -> Result<()> { Ok(()) } -fn import_z(db: &mut Store, buffer: &str) -> Result<()> { +fn import_z(db: &mut Database, buffer: &str) -> Result<()> { for line in buffer.lines() { if line.is_empty() { continue; @@ -80,12 +80,12 @@ fn sigmoid(x: f64) -> f64 { #[cfg(test)] mod tests { use super::*; - use crate::store::Dir; + use crate::db::Dir; #[test] fn from_autojump() { let data_dir = tempfile::tempdir().unwrap(); - let mut db = Store::open_dir(data_dir.path()).unwrap(); + let mut db = Database::open_dir(data_dir.path()).unwrap(); for (path, rank, last_accessed) in [ ("/quux/quuz", 1.0, 100), ("/corge/grault/garply", 6.0, 600), @@ -125,7 +125,7 @@ mod tests { #[test] fn from_z() { let data_dir = tempfile::tempdir().unwrap(); - let mut db = Store::open_dir(data_dir.path()).unwrap(); + let mut db = Database::open_dir(data_dir.path()).unwrap(); for (path, rank, last_accessed) in [ ("/quux/quuz", 1.0, 100), ("/corge/grault/garply", 6.0, 600), diff --git a/src/cmd/query.rs b/src/cmd/query.rs index 397a5b4..210231c 100644 --- a/src/cmd/query.rs +++ b/src/cmd/query.rs @@ -4,7 +4,7 @@ use anyhow::{Context, Result}; use crate::cmd::{Query, Run}; use crate::config; -use crate::db::{Database, DatabaseFile}; +use crate::db2::{Database, DatabaseFile}; use crate::error::BrokenPipeHandler; use crate::util::{self, Fzf}; diff --git a/src/cmd/remove.rs b/src/cmd/remove.rs index 9a5f474..55c6989 100644 --- a/src/cmd/remove.rs +++ b/src/cmd/remove.rs @@ -1,12 +1,12 @@ use anyhow::{bail, Result}; use crate::cmd::{Remove, Run}; -use crate::store::Store; +use crate::db::Database; use crate::util; impl Run for Remove { fn run(&self) -> Result<()> { - let mut db = Store::open()?; + let mut db = Database::open()?; for path in &self.paths { if !db.remove(path) { diff --git a/src/config.rs b/src/config.rs index c5cb6ae..d7980fb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use anyhow::{Context, Result}; use glob::Pattern; -use crate::db::Rank; +use crate::db2::Rank; pub fn data_dir() -> Result { let path = match env::var_os("_ZO_DATA_DIR") { diff --git a/src/db/dir.rs b/src/db/dir.rs index c59a441..2b87054 100644 --- a/src/db/dir.rs +++ b/src/db/dir.rs @@ -1,83 +1,8 @@ use std::borrow::Cow; -use std::fmt::{self, Display, Formatter}; -use std::ops::{Deref, DerefMut}; -use anyhow::{bail, Context, Result}; -use bincode::Options as _; use serde::{Deserialize, Serialize}; -#[derive(Debug, Deserialize, Serialize)] -pub struct DirList<'a>(#[serde(borrow)] pub Vec>); - -impl DirList<'_> { - const VERSION: u32 = 3; - - pub fn new() -> DirList<'static> { - DirList(Vec::new()) - } - - pub fn from_bytes(bytes: &[u8]) -> Result { - // Assume a maximum size for the database. This prevents bincode from throwing strange - // errors when it encounters invalid data. - const MAX_SIZE: u64 = 32 << 20; // 32 MiB - let deserializer = &mut bincode::options().with_fixint_encoding().with_limit(MAX_SIZE); - - // Split bytes into sections. - let version_size = deserializer.serialized_size(&Self::VERSION).unwrap() as _; - if bytes.len() < version_size { - bail!("could not deserialize database: corrupted data"); - } - let (bytes_version, bytes_dirs) = bytes.split_at(version_size); - - // Deserialize sections. - (|| { - let version = deserializer.deserialize(bytes_version)?; - match version { - Self::VERSION => Ok(deserializer.deserialize(bytes_dirs)?), - version => { - bail!("unsupported version (got {version}, supports {})", Self::VERSION) - } - } - })() - .context("could not deserialize database") - } - - pub fn to_bytes(&self) -> Result> { - (|| -> bincode::Result<_> { - // Preallocate buffer with combined size of sections. - let version_size = bincode::serialized_size(&Self::VERSION)?; - let dirs_size = bincode::serialized_size(&self)?; - let buffer_size = version_size + dirs_size; - let mut buffer = Vec::with_capacity(buffer_size as _); - - // Serialize sections into buffer. - bincode::serialize_into(&mut buffer, &Self::VERSION)?; - bincode::serialize_into(&mut buffer, &self)?; - Ok(buffer) - })() - .context("could not serialize database") - } -} - -impl<'a> Deref for DirList<'a> { - type Target = Vec>; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl<'a> DerefMut for DirList<'a> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -impl<'a> From>> for DirList<'a> { - fn from(dirs: Vec>) -> Self { - DirList(dirs) - } -} +use crate::util::{DAY, HOUR, WEEK}; #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Dir<'a> { @@ -89,10 +14,6 @@ pub struct Dir<'a> { impl Dir<'_> { pub fn score(&self, now: Epoch) -> Rank { - const HOUR: Epoch = 60 * 60; - const DAY: Epoch = 24 * HOUR; - const WEEK: Epoch = 7 * DAY; - // The older the entry, the lesser its importance. let duration = now.saturating_sub(self.last_accessed); if duration < HOUR { @@ -105,56 +26,7 @@ impl Dir<'_> { self.rank * 0.25 } } - - pub fn display(&self) -> DirDisplay { - DirDisplay { dir: self } - } - - pub fn display_score(&self, now: Epoch) -> DirDisplayScore { - DirDisplayScore { dir: self, now } - } -} - -pub struct DirDisplay<'a> { - dir: &'a Dir<'a>, -} - -impl Display for DirDisplay<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.dir.path) - } -} - -pub struct DirDisplayScore<'a> { - dir: &'a Dir<'a>, - now: Epoch, -} - -impl Display for DirDisplayScore<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let score = self.dir.score(self.now).clamp(0.0, 9999.0) as u32; - write!(f, "{:>4} {}", score, self.dir.path) - } } pub type Rank = f64; pub type Epoch = u64; - -#[cfg(test)] -mod tests { - use std::borrow::Cow; - - use super::*; - - #[test] - fn zero_copy() { - let dirs = DirList(vec![Dir { path: "/".into(), rank: 0.0, last_accessed: 0 }]); - - let bytes = dirs.to_bytes().unwrap(); - let dirs = DirList::from_bytes(&bytes).unwrap(); - - for dir in dirs.iter() { - assert!(matches!(dir.path, Cow::Borrowed(_))) - } - } -} diff --git a/src/db/mod.rs b/src/db/mod.rs index aa3eae4..71173b6 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,75 +1,266 @@ mod dir; -mod stream; use std::path::{Path, PathBuf}; use std::{fs, io}; -use anyhow::{Context, Result}; -pub use dir::{Dir, DirList, Epoch, Rank}; -pub use stream::Stream; +use anyhow::{bail, Context, Result}; +use bincode::Options; +use ouroboros::self_referencing; -use crate::util; +pub use crate::db::dir::{Dir, Epoch, Rank}; +use crate::{config, util}; -#[derive(Debug)] -pub struct Database<'file> { - pub dirs: DirList<'file>, - pub modified: bool, - pub data_dir: &'file Path, +#[self_referencing] +pub struct Database { + path: PathBuf, + bytes: Vec, + #[borrows(bytes)] + #[covariant] + dirs: Vec>, + dirty: bool, } -impl<'file> Database<'file> { - pub fn save(&mut self) -> Result<()> { - if !self.modified { - return Ok(()); - } +impl Database { + const VERSION: u32 = 3; - let buffer = self.dirs.to_bytes()?; - let path = db_path(self.data_dir); - util::write(&path, &buffer).context("could not write to database")?; - self.modified = false; - Ok(()) + pub fn open() -> Result { + let data_dir = config::data_dir()?; + Self::open_dir(&data_dir) } - // Streaming iterator for directories. - pub fn stream(&mut self, now: Epoch) -> Stream<'_, 'file> { - Stream::new(self, now) - } -} + pub fn open_dir(data_dir: &Path) -> Result { + let path = data_dir.join("db.zo"); -pub struct DatabaseFile { - buffer: Vec, - data_dir: PathBuf, -} - -impl DatabaseFile { - pub fn new>(data_dir: P) -> Self { - DatabaseFile { buffer: Vec::new(), data_dir: data_dir.into() } - } - - pub fn open(&mut self) -> Result { - // Read the entire database to memory. For smaller files, this is faster than - // mmap / streaming, and allows for zero-copy deserialization. - let path = db_path(&self.data_dir); match fs::read(&path) { - Ok(buffer) => { - self.buffer = buffer; - let dirs = DirList::from_bytes(&self.buffer) - .with_context(|| format!("could not deserialize database: {}", path.display()))?; - Ok(Database { dirs, modified: false, data_dir: &self.data_dir }) - } + Ok(bytes) => Self::try_new(path, bytes, |bytes| Self::deserialize(bytes), false), Err(e) if e.kind() == io::ErrorKind::NotFound => { // Create data directory, but don't create any file yet. The file will be created // later by [`Database::save`] if any data is modified. - fs::create_dir_all(&self.data_dir) - .with_context(|| format!("unable to create data directory: {}", self.data_dir.display()))?; - Ok(Database { dirs: DirList::new(), modified: false, data_dir: &self.data_dir }) + fs::create_dir_all(data_dir) + .with_context(|| format!("unable to create data directory: {}", data_dir.display()))?; + Ok(Self::new(path, Vec::new(), |_| Vec::new(), false)) } Err(e) => Err(e).with_context(|| format!("could not read from database: {}", path.display())), } } + + pub fn save(&mut self) -> Result<()> { + // Only write to disk if the database is modified. + if !self.dirty() { + return Ok(()); + } + + let bytes = Self::serialize(self.dirs())?; + util::write(self.borrow_path(), &bytes).context("could not write to database")?; + self.with_dirty_mut(|dirty| *dirty = false); + + Ok(()) + } + + /// Increments the rank of a directory, or creates it if it does not exist. + pub fn add(&mut self, path: impl AsRef + Into, by: Rank, now: Epoch) { + self.with_dirs_mut(|dirs| match dirs.iter_mut().find(|dir| dir.path == path.as_ref()) { + Some(dir) => dir.rank = (dir.rank + by).max(0.0), + None => dirs.push(Dir { path: path.into().into(), rank: by.max(0.0), last_accessed: now }), + }); + self.with_dirty_mut(|dirty| *dirty = true); + } + + /// Creates a new directory. This will create a duplicate entry if this + /// directory is always in the database, it is expected that the user either + /// does a check before calling this, or calls `dedup()` afterward. + pub fn add_unchecked(&mut self, path: impl AsRef + Into, rank: Rank, now: Epoch) { + self.with_dirs_mut(|dirs| dirs.push(Dir { path: path.into().into(), rank, last_accessed: now })); + self.with_dirty_mut(|dirty| *dirty = true); + } + + /// Increments the rank and updates the last_accessed of a directory, or + /// creates it if it does not exist. + pub fn add_update(&mut self, path: impl AsRef + Into, by: Rank, now: Epoch) { + self.with_dirs_mut(|dirs| match dirs.iter_mut().find(|dir| dir.path == path.as_ref()) { + Some(dir) => { + dir.rank = (dir.rank + by).max(0.0); + dir.last_accessed = now; + } + None => dirs.push(Dir { path: path.into().into(), rank: by.max(0.0), last_accessed: now }), + }); + self.with_dirty_mut(|dirty| *dirty = true); + } + + /// Removes the directory with `path` from the store. This does not preserve + /// ordering, but is O(1). + pub fn remove(&mut self, path: impl AsRef) -> bool { + let deleted = self.with_dirs_mut(|dirs| match dirs.iter().position(|dir| dir.path == path.as_ref()) { + Some(idx) => { + dirs.swap_remove(idx); + true + } + None => false, + }); + self.with_dirty_mut(|dirty| *dirty |= deleted); + deleted + } + + pub fn age(&mut self, max_age: Rank) { + let mut dirty = false; + self.with_dirs_mut(|dirs| { + let total_age = dirs.iter().map(|dir| dir.rank).sum::(); + if total_age > max_age { + let factor = 0.9 * max_age / total_age; + for idx in (0..dirs.len()).rev() { + let dir = &mut dirs[idx]; + dir.rank *= factor; + if dir.rank < 1.0 { + dirs.swap_remove(idx); + } + } + dirty = true; + } + }); + self.with_dirty_mut(|dirty_prev| *dirty_prev |= dirty); + } + + pub fn dedup(&mut self) { + // Sort by path, so that equal paths are next to each other. + self.sort_by_path(); + + let mut dirty = false; + self.with_dirs_mut(|dirs| { + for idx in (1..dirs.len()).rev() { + // Check if curr_dir and next_dir have equal paths. + let curr_dir = &dirs[idx]; + let next_dir = &dirs[idx - 1]; + if next_dir.path != curr_dir.path { + continue; + } + + // Merge curr_dir's rank and last_accessed into next_dir. + let rank = curr_dir.rank; + let last_accessed = curr_dir.last_accessed; + let next_dir = &mut dirs[idx - 1]; + next_dir.last_accessed = next_dir.last_accessed.max(last_accessed); + next_dir.rank += rank; + + // Delete curr_dir. + dirs.swap_remove(idx); + dirty = true; + } + }); + self.with_dirty_mut(|dirty_prev| *dirty_prev |= dirty); + } + + pub fn sort_by_path(&mut self) { + self.with_dirs_mut(|dirs| dirs.sort_unstable_by(|dir1, dir2| dir1.path.cmp(&dir2.path))); + self.with_dirty_mut(|dirty| *dirty = true); + } + + pub fn sort_by_score(&mut self, now: Epoch) { + self.with_dirs_mut(|dirs| { + dirs.sort_unstable_by(|dir1: &Dir, dir2: &Dir| dir1.score(now).total_cmp(&dir2.score(now))) + }); + self.with_dirty_mut(|dirty| *dirty = true); + } + + pub fn dirty(&self) -> bool { + *self.borrow_dirty() + } + + pub fn dirs(&self) -> &[Dir] { + self.borrow_dirs() + } + + fn serialize(dirs: &[Dir<'_>]) -> Result> { + (|| -> bincode::Result<_> { + // Preallocate buffer with combined size of sections. + let buffer_size = bincode::serialized_size(&Self::VERSION)? + bincode::serialized_size(&dirs)?; + let mut buffer = Vec::with_capacity(buffer_size as usize); + + // Serialize sections into buffer. + bincode::serialize_into(&mut buffer, &Self::VERSION)?; + bincode::serialize_into(&mut buffer, &dirs)?; + + Ok(buffer) + })() + .context("could not serialize database") + } + + fn deserialize(bytes: &[u8]) -> Result> { + // Assume a maximum size for the database. This prevents bincode from throwing strange + // errors when it encounters invalid data. + const MAX_SIZE: u64 = 32 << 20; // 32 MiB + let deserializer = &mut bincode::options().with_fixint_encoding().with_limit(MAX_SIZE); + + // Split bytes into sections. + let version_size = deserializer.serialized_size(&Self::VERSION).unwrap() as _; + if bytes.len() < version_size { + bail!("could not deserialize database: corrupted data"); + } + let (bytes_version, bytes_dirs) = bytes.split_at(version_size); + + // Deserialize sections. + let version = deserializer.deserialize(bytes_version)?; + let dirs = match version { + Self::VERSION => deserializer.deserialize(bytes_dirs).context("could not deserialize database")?, + version => { + bail!("unsupported version (got {version}, supports {})", Self::VERSION) + } + }; + + Ok(dirs) + } } -fn db_path>(data_dir: P) -> PathBuf { - const DB_FILENAME: &str = "db.zo"; - data_dir.as_ref().join(DB_FILENAME) +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn add() { + let data_dir = tempfile::tempdir().unwrap(); + let path = if cfg!(windows) { r"C:\foo\bar" } else { "/foo/bar" }; + let now = 946684800; + + { + let mut db = Database::open_dir(data_dir.path()).unwrap(); + db.add(path, 1.0, now); + db.add(path, 1.0, now); + db.save().unwrap(); + } + + { + let db = Database::open_dir(data_dir.path()).unwrap(); + assert_eq!(db.dirs().len(), 1); + + let dir = &db.dirs()[0]; + assert_eq!(dir.path, path); + assert!((dir.rank - 2.0).abs() < 0.01); + assert_eq!(dir.last_accessed, now); + } + } + + #[test] + fn remove() { + let data_dir = tempfile::tempdir().unwrap(); + let path = if cfg!(windows) { r"C:\foo\bar" } else { "/foo/bar" }; + let now = 946684800; + + { + let mut db = Database::open_dir(data_dir.path()).unwrap(); + db.add(path, 1.0, now); + db.save().unwrap(); + } + + { + let mut db = Database::open_dir(data_dir.path()).unwrap(); + assert!(db.remove(path)); + db.save().unwrap(); + } + + { + let mut db = Database::open_dir(data_dir.path()).unwrap(); + assert!(db.dirs().is_empty()); + assert!(!db.remove(path)); + db.save().unwrap(); + } + } } diff --git a/src/db2/dir.rs b/src/db2/dir.rs new file mode 100644 index 0000000..d0bda75 --- /dev/null +++ b/src/db2/dir.rs @@ -0,0 +1,160 @@ +use std::borrow::Cow; +use std::fmt::{self, Display, Formatter}; +use std::ops::{Deref, DerefMut}; + +use anyhow::{bail, Context, Result}; +use bincode::Options as _; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Deserialize, Serialize)] +pub struct DirList<'a>(#[serde(borrow)] pub Vec>); + +impl DirList<'_> { + const VERSION: u32 = 3; + + pub fn new() -> DirList<'static> { + DirList(Vec::new()) + } + + pub fn from_bytes(bytes: &[u8]) -> Result { + // Assume a maximum size for the database. This prevents bincode from throwing strange + // errors when it encounters invalid data. + const MAX_SIZE: u64 = 32 << 20; // 32 MiB + let deserializer = &mut bincode::options().with_fixint_encoding().with_limit(MAX_SIZE); + + // Split bytes into sections. + let version_size = deserializer.serialized_size(&Self::VERSION).unwrap() as _; + if bytes.len() < version_size { + bail!("could not deserialize database: corrupted data"); + } + let (bytes_version, bytes_dirs) = bytes.split_at(version_size); + + // Deserialize sections. + (|| { + let version = deserializer.deserialize(bytes_version)?; + match version { + Self::VERSION => Ok(deserializer.deserialize(bytes_dirs)?), + version => { + bail!("unsupported version (got {version}, supports {})", Self::VERSION) + } + } + })() + .context("could not deserialize database") + } + + pub fn to_bytes(&self) -> Result> { + (|| -> bincode::Result<_> { + // Preallocate buffer with combined size of sections. + let version_size = bincode::serialized_size(&Self::VERSION)?; + let dirs_size = bincode::serialized_size(&self)?; + let buffer_size = version_size + dirs_size; + let mut buffer = Vec::with_capacity(buffer_size as _); + + // Serialize sections into buffer. + bincode::serialize_into(&mut buffer, &Self::VERSION)?; + bincode::serialize_into(&mut buffer, &self)?; + Ok(buffer) + })() + .context("could not serialize database") + } +} + +impl<'a> Deref for DirList<'a> { + type Target = Vec>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> DerefMut for DirList<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<'a> From>> for DirList<'a> { + fn from(dirs: Vec>) -> Self { + DirList(dirs) + } +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct Dir<'a> { + #[serde(borrow)] + pub path: Cow<'a, str>, + pub rank: Rank, + pub last_accessed: Epoch, +} + +impl Dir<'_> { + pub fn score(&self, now: Epoch) -> Rank { + const HOUR: Epoch = 60 * 60; + const DAY: Epoch = 24 * HOUR; + const WEEK: Epoch = 7 * DAY; + + // The older the entry, the lesser its importance. + let duration = now.saturating_sub(self.last_accessed); + if duration < HOUR { + self.rank * 4.0 + } else if duration < DAY { + self.rank * 2.0 + } else if duration < WEEK { + self.rank * 0.5 + } else { + self.rank * 0.25 + } + } + + pub fn display(&self) -> DirDisplay { + DirDisplay { dir: self } + } + + pub fn display_score(&self, now: Epoch) -> DirDisplayScore { + DirDisplayScore { dir: self, now } + } +} + +pub struct DirDisplay<'a> { + dir: &'a Dir<'a>, +} + +impl Display for DirDisplay<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.dir.path) + } +} + +pub struct DirDisplayScore<'a> { + dir: &'a Dir<'a>, + now: Epoch, +} + +impl Display for DirDisplayScore<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let score = self.dir.score(self.now).clamp(0.0, 9999.0) as u64; + write!(f, "{:>4} {}", score, self.dir.path) + } +} + +pub type Rank = f64; +pub type Epoch = u64; + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + + use super::*; + + #[test] + fn zero_copy() { + let dirs = DirList(vec![Dir { path: "/".into(), rank: 0.0, last_accessed: 0 }]); + + let bytes = dirs.to_bytes().unwrap(); + let dirs = DirList::from_bytes(&bytes).unwrap(); + + for dir in dirs.iter() { + assert!(matches!(dir.path, Cow::Borrowed(_))) + } + } +} diff --git a/src/db2/mod.rs b/src/db2/mod.rs new file mode 100644 index 0000000..aa3eae4 --- /dev/null +++ b/src/db2/mod.rs @@ -0,0 +1,75 @@ +mod dir; +mod stream; + +use std::path::{Path, PathBuf}; +use std::{fs, io}; + +use anyhow::{Context, Result}; +pub use dir::{Dir, DirList, Epoch, Rank}; +pub use stream::Stream; + +use crate::util; + +#[derive(Debug)] +pub struct Database<'file> { + pub dirs: DirList<'file>, + pub modified: bool, + pub data_dir: &'file Path, +} + +impl<'file> Database<'file> { + pub fn save(&mut self) -> Result<()> { + if !self.modified { + return Ok(()); + } + + let buffer = self.dirs.to_bytes()?; + let path = db_path(self.data_dir); + util::write(&path, &buffer).context("could not write to database")?; + self.modified = false; + Ok(()) + } + + // Streaming iterator for directories. + pub fn stream(&mut self, now: Epoch) -> Stream<'_, 'file> { + Stream::new(self, now) + } +} + +pub struct DatabaseFile { + buffer: Vec, + data_dir: PathBuf, +} + +impl DatabaseFile { + pub fn new>(data_dir: P) -> Self { + DatabaseFile { buffer: Vec::new(), data_dir: data_dir.into() } + } + + pub fn open(&mut self) -> Result { + // Read the entire database to memory. For smaller files, this is faster than + // mmap / streaming, and allows for zero-copy deserialization. + let path = db_path(&self.data_dir); + match fs::read(&path) { + Ok(buffer) => { + self.buffer = buffer; + let dirs = DirList::from_bytes(&self.buffer) + .with_context(|| format!("could not deserialize database: {}", path.display()))?; + Ok(Database { dirs, modified: false, data_dir: &self.data_dir }) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + // Create data directory, but don't create any file yet. The file will be created + // later by [`Database::save`] if any data is modified. + fs::create_dir_all(&self.data_dir) + .with_context(|| format!("unable to create data directory: {}", self.data_dir.display()))?; + Ok(Database { dirs: DirList::new(), modified: false, data_dir: &self.data_dir }) + } + Err(e) => Err(e).with_context(|| format!("could not read from database: {}", path.display())), + } + } +} + +fn db_path>(data_dir: P) -> PathBuf { + const DB_FILENAME: &str = "db.zo"; + data_dir.as_ref().join(DB_FILENAME) +} diff --git a/src/db/stream.rs b/src/db2/stream.rs similarity index 94% rename from src/db/stream.rs rename to src/db2/stream.rs index 275d52e..be0f836 100644 --- a/src/db/stream.rs +++ b/src/db2/stream.rs @@ -2,8 +2,8 @@ use std::iter::Rev; use std::ops::Range; use std::{fs, path}; -use crate::db::{Database, Dir, Epoch}; -use crate::util; +use crate::db2::{Database, Dir, Epoch}; +use crate::util::{self, MONTH}; pub struct Stream<'db, 'file> { db: &'db mut Database<'file>, @@ -24,8 +24,9 @@ impl<'db, 'file> Stream<'db, 'file> { db.dirs.sort_unstable_by(|dir1, dir2| dir1.score(now).total_cmp(&dir2.score(now))); let idxs = (0..db.dirs.len()).rev(); - // If a directory is deleted and hasn't been used for 90 days, delete it from the database. - let expire_below = now.saturating_sub(90 * 24 * 60 * 60); + // If a directory is deleted and hasn't been used for 3 months, delete + // it from the database. + let expire_below = now.saturating_sub(3 * MONTH); Stream { db, diff --git a/src/main.rs b/src/main.rs index 3a5f0a7..12448f4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,9 +7,9 @@ use rstest_reuse; mod cmd; mod config; mod db; +mod db2; mod error; mod shell; -mod store; mod util; use std::env; diff --git a/src/store/mod.rs b/src/store/mod.rs deleted file mode 100644 index 0152950..0000000 --- a/src/store/mod.rs +++ /dev/null @@ -1,296 +0,0 @@ -use std::borrow::Cow; -use std::path::{Path, PathBuf}; -use std::{fs, io}; - -use anyhow::{bail, Context, Result}; -use bincode::Options; -use ouroboros::self_referencing; -use serde::{Deserialize, Serialize}; - -use crate::{config, util}; - -#[self_referencing] -pub struct Store { - path: PathBuf, - bytes: Vec, - #[borrows(bytes)] - #[covariant] - dirs: Vec>, - dirty: bool, -} - -impl Store { - const VERSION: u32 = 3; - - pub fn open() -> Result { - let data_dir = config::data_dir()?; - Self::open_dir(&data_dir) - } - - pub fn open_dir(data_dir: &Path) -> Result { - let path = data_dir.join("db.zo"); - - match fs::read(&path) { - Ok(bytes) => Self::try_new(path, bytes, |bytes| Self::deserialize(bytes), false), - Err(e) if e.kind() == io::ErrorKind::NotFound => { - // Create data directory, but don't create any file yet. The file will be created - // later by [`Database::save`] if any data is modified. - fs::create_dir_all(data_dir) - .with_context(|| format!("unable to create data directory: {}", data_dir.display()))?; - Ok(Self::new(path, Vec::new(), |_| Vec::new(), false)) - } - Err(e) => Err(e).with_context(|| format!("could not read from database: {}", path.display())), - } - } - - pub fn save(&mut self) -> Result<()> { - // Only write to disk if the database is modified. - if !self.dirty() { - return Ok(()); - } - - let bytes = Self::serialize(self.dirs())?; - util::write(self.borrow_path(), &bytes).context("could not write to database")?; - self.with_dirty_mut(|dirty| *dirty = false); - - Ok(()) - } - - /// Increments the rank of a directory, or creates it if it does not exist. - pub fn add(&mut self, path: impl AsRef + Into, by: Rank, now: Epoch) { - self.with_dirs_mut(|dirs| match dirs.iter_mut().find(|dir| dir.path == path.as_ref()) { - Some(dir) => dir.rank = (dir.rank + by).max(0.0), - None => dirs.push(Dir { path: path.into().into(), rank: by.max(0.0), last_accessed: now }), - }); - self.with_dirty_mut(|dirty| *dirty = true); - } - - /// Creates a new directory. This will create a duplicate entry if this - /// directory is always in the database, it is expected that the user either - /// does a check before calling this, or calls `dedup()` afterward. - pub fn add_unchecked(&mut self, path: impl AsRef + Into, rank: Rank, now: Epoch) { - self.with_dirs_mut(|dirs| dirs.push(Dir { path: path.into().into(), rank, last_accessed: now })); - self.with_dirty_mut(|dirty| *dirty = true); - } - - /// Increments the rank and updates the last_accessed of a directory, or - /// creates it if it does not exist. - pub fn add_update(&mut self, path: impl AsRef + Into, by: Rank, now: Epoch) { - self.with_dirs_mut(|dirs| match dirs.iter_mut().find(|dir| dir.path == path.as_ref()) { - Some(dir) => { - dir.rank = (dir.rank + by).max(0.0); - dir.last_accessed = now; - } - None => dirs.push(Dir { path: path.into().into(), rank: by.max(0.0), last_accessed: now }), - }); - self.with_dirty_mut(|dirty| *dirty = true); - } - - /// Removes the directory with `path` from the store. This does not preserve ordering, but is - /// O(1). - pub fn remove(&mut self, path: impl AsRef) -> bool { - let deleted = self.with_dirs_mut(|dirs| match dirs.iter().position(|dir| dir.path == path.as_ref()) { - Some(idx) => { - dirs.swap_remove(idx); - true - } - None => false, - }); - self.with_dirty_mut(|dirty| *dirty |= deleted); - deleted - } - - pub fn age(&mut self, max_age: Rank) { - let mut dirty = false; - self.with_dirs_mut(|dirs| { - let total_age = dirs.iter().map(|dir| dir.rank).sum::(); - if total_age > max_age { - let factor = 0.9 * max_age / total_age; - for idx in (0..dirs.len()).rev() { - let dir = &mut dirs[idx]; - dir.rank *= factor; - if dir.rank < 1.0 { - dirs.swap_remove(idx); - } - } - dirty = true; - } - }); - self.with_dirty_mut(|dirty_prev| *dirty_prev |= dirty); - } - - pub fn dedup(&mut self) { - // Sort by path, so that equal paths are next to each other. - self.sort_by_path(); - - let mut dirty = false; - self.with_dirs_mut(|dirs| { - for idx in (1..dirs.len()).rev() { - // Check if curr_dir and next_dir have equal paths. - let curr_dir = &dirs[idx]; - let next_dir = &dirs[idx - 1]; - if next_dir.path != curr_dir.path { - continue; - } - - // Merge curr_dir's rank and last_accessed into next_dir. - let rank = curr_dir.rank; - let last_accessed = curr_dir.last_accessed; - let next_dir = &mut dirs[idx - 1]; - next_dir.last_accessed = next_dir.last_accessed.max(last_accessed); - next_dir.rank += rank; - - // Delete curr_dir. - dirs.swap_remove(idx); - dirty = true; - } - }); - self.with_dirty_mut(|dirty_prev| *dirty_prev |= dirty); - } - - pub fn sort_by_path(&mut self) { - self.with_dirs_mut(|dirs| dirs.sort_unstable_by(|dir1, dir2| dir1.path.cmp(&dir2.path))); - self.with_dirty_mut(|dirty| *dirty = true); - } - - pub fn sort_by_score(&mut self, now: Epoch) { - self.with_dirs_mut(|dirs| { - dirs.sort_unstable_by(|dir1: &Dir, dir2: &Dir| dir1.score(now).total_cmp(&dir2.score(now))) - }); - self.with_dirty_mut(|dirty| *dirty = true); - } - - pub fn dirty(&self) -> bool { - *self.borrow_dirty() - } - - pub fn dirs(&self) -> &[Dir] { - self.borrow_dirs() - } - - fn serialize(dirs: &[Dir<'_>]) -> Result> { - (|| -> bincode::Result<_> { - // Preallocate buffer with combined size of sections. - let buffer_size = bincode::serialized_size(&Self::VERSION)? + bincode::serialized_size(&dirs)?; - let mut buffer = Vec::with_capacity(buffer_size as usize); - - // Serialize sections into buffer. - bincode::serialize_into(&mut buffer, &Self::VERSION)?; - bincode::serialize_into(&mut buffer, &dirs)?; - - Ok(buffer) - })() - .context("could not serialize database") - } - - fn deserialize(bytes: &[u8]) -> Result> { - // Assume a maximum size for the database. This prevents bincode from throwing strange - // errors when it encounters invalid data. - const MAX_SIZE: u64 = 32 << 20; // 32 MiB - let deserializer = &mut bincode::options().with_fixint_encoding().with_limit(MAX_SIZE); - - // Split bytes into sections. - let version_size = deserializer.serialized_size(&Self::VERSION).unwrap() as _; - if bytes.len() < version_size { - bail!("could not deserialize database: corrupted data"); - } - let (bytes_version, bytes_dirs) = bytes.split_at(version_size); - - // Deserialize sections. - let version = deserializer.deserialize(bytes_version)?; - let dirs = match version { - Self::VERSION => deserializer.deserialize(bytes_dirs).context("could not deserialize database")?, - version => { - bail!("unsupported version (got {version}, supports {})", Self::VERSION) - } - }; - - Ok(dirs) - } -} - -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct Dir<'a> { - #[serde(borrow)] - pub path: Cow<'a, str>, - pub rank: Rank, - pub last_accessed: Epoch, -} - -impl Dir<'_> { - pub fn score(&self, now: Epoch) -> Rank { - const HOUR: Epoch = 60 * 60; - const DAY: Epoch = 24 * HOUR; - const WEEK: Epoch = 7 * DAY; - - // The older the entry, the lesser its importance. - let duration = now.saturating_sub(self.last_accessed); - if duration < HOUR { - self.rank * 4.0 - } else if duration < DAY { - self.rank * 2.0 - } else if duration < WEEK { - self.rank * 0.5 - } else { - self.rank * 0.25 - } - } -} - -pub type Rank = f64; -pub type Epoch = u64; - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn add() { - let data_dir = tempfile::tempdir().unwrap(); - let path = if cfg!(windows) { r"C:\foo\bar" } else { "/foo/bar" }; - let now = 946684800; - - { - let mut db = Store::open_dir(data_dir.path()).unwrap(); - db.add(path, 1.0, now); - db.add(path, 1.0, now); - db.save().unwrap(); - } - - { - let db = Store::open_dir(data_dir.path()).unwrap(); - assert_eq!(db.dirs().len(), 1); - - let dir = &db.dirs()[0]; - assert_eq!(dir.path, path); - assert!((dir.rank - 2.0).abs() < 0.01); - assert_eq!(dir.last_accessed, now); - } - } - - #[test] - fn remove() { - let data_dir = tempfile::tempdir().unwrap(); - let path = if cfg!(windows) { r"C:\foo\bar" } else { "/foo/bar" }; - let now = 946684800; - - { - let mut db = Store::open_dir(data_dir.path()).unwrap(); - db.add(path, 1.0, now); - db.save().unwrap(); - } - - { - let mut db = Store::open_dir(data_dir.path()).unwrap(); - assert!(db.remove(path)); - db.save().unwrap(); - } - - { - let mut db = Store::open_dir(data_dir.path()).unwrap(); - assert!(db.dirs().is_empty()); - assert!(!db.remove(path)); - db.save().unwrap(); - } - } -} diff --git a/src/util.rs b/src/util.rs index 605c3da..8e03abb 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,5 +1,6 @@ use std::fs::{self, File, OpenOptions}; use std::io::{self, Read, Write}; +use std::ops::{Deref, DerefMut}; use std::path::{Component, Path, PathBuf}; use std::process::{Child, ChildStdin, Command, Stdio}; use std::time::SystemTime; @@ -10,9 +11,98 @@ use anyhow::anyhow; use anyhow::{bail, Context, Result}; use crate::config; -use crate::db::Epoch; +use crate::db2::Epoch; use crate::error::SilentExit; +pub const SECOND: Epoch = 1; +pub const MINUTE: Epoch = 60 * SECOND; +pub const HOUR: Epoch = 60 * MINUTE; +pub const DAY: Epoch = 24 * HOUR; +pub const WEEK: Epoch = 7 * DAY; +pub const MONTH: Epoch = 30 * DAY; + +pub struct Fz(Command); + +impl Fz { + const ERR_FZF_NOT_FOUND: &str = "could not find fzf, is it installed?"; + + pub fn new() -> Result { + // On Windows, CreateProcess implicitly searches the current working + // directory for the executable, which is a potential security issue. + // Instead, we resolve the path to the executable and then pass it to + // CreateProcess. + #[cfg(windows)] + let program = which::which("fzf.exe").map_err(|_| anyhow!(Self::ERR_FZF_NOT_FOUND))?; + #[cfg(not(windows))] + let program = "fzf"; + Ok(Fz(Command::new(program))) + } + + pub fn spawn(&mut self) -> Result { + match self.0.spawn() { + Ok(child) => Ok(FzfChild(child)), + Err(e) if e.kind() == io::ErrorKind::NotFound => bail!(Self::ERR_FZF_NOT_FOUND), + Err(e) => Err(e).context("could not launch fzf"), + } + } +} + +impl Deref for Fz { + type Target = Command; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Fz { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +pub struct FzfChild(Child); + +impl FzfChild { + pub fn select(&mut self) -> Result { + // Drop stdin to prevent deadlock. + mem::drop(self.stdin.take()); + + let mut stdout = self.stdout.take().unwrap(); + let mut output = String::new(); + stdout.read_to_string(&mut output).context("failed to read from fzf")?; + + self.wait()?; + Ok(output) + } + + pub fn wait(&mut self) -> Result<()> { + let status = self.0.wait().context("wait failed on fzf")?; + match status.code() { + Some(0) => Ok(()), + Some(1) => bail!("no match found"), + Some(2) => bail!("fzf returned an error"), + Some(130) => bail!(SilentExit { code: 130 }), + Some(128..=254) | None => bail!("fzf was terminated"), + _ => bail!("fzf returned an unknown error"), + } + } +} + +impl Deref for FzfChild { + type Target = Child; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for FzfChild { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + pub struct Fzf { child: Child, }