From 0dc4a72500a2ed37b05bb97cd80b2d3e67483ad8 Mon Sep 17 00:00:00 2001 From: lengyijun Date: Thu, 19 Feb 2026 13:32:08 +0800 Subject: [PATCH 1/5] wip --- Cargo.lock | 102 +++++++++++++-- Cargo.toml | 2 +- src/db/mod.rs | 332 +++++++++++++++++++++++++---------------------- src/db/stream.rs | 32 +++-- src/util.rs | 1 - 5 files changed, 289 insertions(+), 180 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 27da609..355185d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,18 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -17,6 +29,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "anstream" version = "0.6.18" @@ -126,15 +144,6 @@ dependencies = [ "wait-timeout", ] -[[package]] -name = "bincode" -version = "1.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" -dependencies = [ - "serde", -] - [[package]] name = "bitflags" version = "2.9.1" @@ -321,6 +330,18 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -356,6 +377,25 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown", +] + [[package]] name = "heck" version = "0.4.1" @@ -396,6 +436,16 @@ dependencies = [ "libc", ] +[[package]] +name = "libsqlite3-sys" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf4e226dcd58b4be396f7bd3c20da8fdee2911400705297ba7d2d7cc2c30f716" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -484,6 +534,12 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -670,6 +726,20 @@ dependencies = [ "syn", ] +[[package]] +name = "rusqlite" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a78046161564f5e7cd9008aff3b2990b3850dc8e0349119b98e8f251e099f24d" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustc-hash" version = "2.1.1" @@ -742,6 +812,12 @@ dependencies = [ "serde", ] +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "static_assertions" version = "1.1.0" @@ -816,6 +892,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -988,7 +1070,6 @@ dependencies = [ "anyhow", "askama", "assert_cmd", - "bincode", "clap", "clap_complete", "clap_complete_fig", @@ -1002,6 +1083,7 @@ dependencies = [ "ouroboros", "rstest", "rstest_reuse", + "rusqlite", "serde", "tempfile", "which", diff --git a/Cargo.toml b/Cargo.toml index d137115..54bec44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ askama = { version = "0.14.0", default-features = false, features = [ "derive", "std", ] } -bincode = "1.3.1" +rusqlite = "0.30.0" clap = { version = "4.3.0", features = ["derive"] } color-print = "0.3.4" dirs = "6.0.0" diff --git a/src/db/mod.rs b/src/db/mod.rs index 1856fda..e0d31a6 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,24 +1,19 @@ mod dir; mod stream; +use std::fs; use std::path::{Path, PathBuf}; -use std::{fs, io}; -use anyhow::{Context, Result, bail}; -use bincode::Options; -use ouroboros::self_referencing; +use anyhow::{Context, Result}; +use rusqlite::{Connection, OptionalExtension, params}; +use crate::config; pub use crate::db::dir::{Dir, Epoch, Rank}; pub use crate::db::stream::{Stream, StreamOptions}; -use crate::{config, util}; -#[self_referencing] pub struct Database { path: PathBuf, - bytes: Vec, - #[borrows(bytes)] - #[covariant] - pub dirs: Vec>, + conn: Connection, dirty: bool, } @@ -32,47 +27,73 @@ impl Database { pub fn open_dir(data_dir: impl AsRef) -> Result { let data_dir = data_dir.as_ref(); - let path = data_dir.join("db.zo"); + let path = data_dir.join("db.sqlite3"); let path = fs::canonicalize(&path).unwrap_or(path); - match fs::read(&path) { - Ok(bytes) => Self::try_new(path, bytes, |bytes| Self::deserialize(bytes), false), - Err(e) if e.kind() == io::ErrorKind::NotFound => { - // Create data directory, but don't create any file yet. The file will be - // created later by [`Database::save`] if any data is modified. - fs::create_dir_all(data_dir).with_context(|| { - format!("unable to create data directory: {}", data_dir.display()) - })?; - Ok(Self::new(path, Vec::new(), |_| Vec::new(), false)) - } - Err(e) => { - Err(e).with_context(|| format!("could not read from database: {}", path.display())) - } - } + fs::create_dir_all(data_dir) + .with_context(|| format!("unable to create data directory: {}", data_dir.display()))?; + + // Open or create sqlite database file. + let conn = Connection::open(&path) + .with_context(|| format!("could not open database: {}", path.display()))?; + + // Enable WAL for better concurrency and durability. + conn.pragma_update(None, "journal_mode", &"WAL").ok(); + + // Create table if it doesn't exist. + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS dirs ( + path TEXT PRIMARY KEY, + rank REAL NOT NULL, + last_accessed INTEGER NOT NULL + );", + )?; + + Ok(Database { path, conn, dirty: false }) } pub fn save(&mut self) -> Result<()> { - // Only write to disk if the database is modified. - if !self.dirty() { - return Ok(()); - } - - let bytes = Self::serialize(self.dirs())?; - util::write(self.borrow_path(), bytes).context("could not write to database")?; - self.with_dirty_mut(|dirty| *dirty = false); - + // For SQLite, write operations are applied immediately via transactions. + // Keep save() for compatibility; do nothing. + self.dirty = false; Ok(()) } /// Increments the rank of a directory, or creates it if it does not exist. pub fn add(&mut self, path: impl AsRef + Into, by: Rank, now: Epoch) { - self.with_dirs_mut(|dirs| match dirs.iter_mut().find(|dir| dir.path == path.as_ref()) { - Some(dir) => dir.rank = (dir.rank + by).max(0.0), - None => { - dirs.push(Dir { path: path.into().into(), rank: by.max(0.0), last_accessed: now }) + let path_s: String = path.into(); + let tx = match self.conn.transaction() { + Ok(t) => t, + Err(_) => return, + }; + + let existing: Option<(f64, u64)> = tx + .query_row( + "SELECT rank, last_accessed FROM dirs WHERE path = ?1", + params![&path_s], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .optional() + .unwrap_or(None); + + match existing { + Some((rank, _last)) => { + let new_rank = (rank + by).max(0.0); + let _ = tx.execute( + "UPDATE dirs SET rank = ?1 WHERE path = ?2", + params![new_rank, &path_s], + ); } - }); - self.with_dirty_mut(|dirty| *dirty = true); + None => { + let _ = tx.execute( + "INSERT INTO dirs (path, rank, last_accessed) VALUES (?1, ?2, ?3)", + params![&path_s, by.max(0.0), now], + ); + } + } + + let _ = tx.commit(); + self.dirty = true; } /// Creates a new directory. This will create a duplicate entry if this @@ -80,155 +101,157 @@ impl Database { /// either does a check before calling this, or calls `dedup()` /// afterward. pub fn add_unchecked(&mut self, path: impl AsRef + Into, rank: Rank, now: Epoch) { - self.with_dirs_mut(|dirs| { - dirs.push(Dir { path: path.into().into(), rank, last_accessed: now }) - }); - self.with_dirty_mut(|dirty| *dirty = true); + let path_s: String = path.into(); + let _ = self.conn.execute( + "INSERT OR REPLACE INTO dirs (path, rank, last_accessed) VALUES (?1, ?2, ?3)", + params![&path_s, rank, now], + ); + self.dirty = true; } /// Increments the rank and updates the last_accessed of a directory, or /// creates it if it does not exist. pub fn add_update(&mut self, path: impl AsRef + Into, by: Rank, now: Epoch) { - self.with_dirs_mut(|dirs| match dirs.iter_mut().find(|dir| dir.path == path.as_ref()) { - Some(dir) => { - dir.rank = (dir.rank + by).max(0.0); - dir.last_accessed = now; + let path_s: String = path.into(); + let tx = match self.conn.transaction() { + Ok(t) => t, + Err(_) => return, + }; + + let existing: Option<(f64, u64)> = tx + .query_row( + "SELECT rank, last_accessed FROM dirs WHERE path = ?1", + params![&path_s], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .optional() + .unwrap_or(None); + + match existing { + Some((rank, _)) => { + let new_rank = (rank + by).max(0.0); + let _ = tx.execute( + "UPDATE dirs SET rank = ?1, last_accessed = ?2 WHERE path = ?3", + params![new_rank, now, &path_s], + ); } None => { - dirs.push(Dir { path: path.into().into(), rank: by.max(0.0), last_accessed: now }) + let _ = tx.execute( + "INSERT INTO dirs (path, rank, last_accessed) VALUES (?1, ?2, ?3)", + params![&path_s, by.max(0.0), now], + ); } - }); - self.with_dirty_mut(|dirty| *dirty = true); + } + + let _ = tx.commit(); + self.dirty = true; } - /// Removes the directory with `path` from the store. This does not preserve - /// ordering, but is O(1). + /// Removes the directory with `path` from the store. Returns true if an + /// entry was deleted. pub fn remove(&mut self, path: impl AsRef) -> bool { - match self.dirs().iter().position(|dir| dir.path == path.as_ref()) { - Some(idx) => { - self.swap_remove(idx); - true + let path_s = path.as_ref(); + match self.conn.execute("DELETE FROM dirs WHERE path = ?1", params![path_s]) { + Ok(count) => { + if count > 0 { + self.dirty = true; + true + } else { + false + } } - None => false, + Err(_) => false, } } - pub fn swap_remove(&mut self, idx: usize) { - self.with_dirs_mut(|dirs| dirs.swap_remove(idx)); - self.with_dirty_mut(|dirty| *dirty = true); + pub fn swap_remove(&mut self, _idx: usize) { + // In the sqlite-backed implementation we don't maintain an in-memory + // vector, so this is a no-op. Higher-level code that relies on + // indices shouldn't be calling this directly except within the + // streaming logic which uses Database::dirs(). For compatibility, keep + // the method but do nothing. + self.dirty = true; } pub fn age(&mut self, max_age: Rank) { - let mut dirty = false; - self.with_dirs_mut(|dirs| { - let total_age = dirs.iter().map(|dir| dir.rank).sum::(); - if total_age > max_age { - let factor = 0.9 * max_age / total_age; - for idx in (0..dirs.len()).rev() { - let dir = &mut dirs[idx]; - dir.rank *= factor; - if dir.rank < 1.0 { - dirs.swap_remove(idx); + // Apply the aging algorithm to all rows. + // Collect entries first to avoid holding a Statement borrow while starting + // a transaction on the connection. + let mut entries = Vec::new(); + if let Ok(mut stmt) = self.conn.prepare("SELECT path, rank FROM dirs") { + if let Ok(rows) = + stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, f64>(1)?))) + { + for r in rows { + if let Ok((path, rank)) = r { + entries.push((path, rank)); } } - dirty = true; } - }); - self.with_dirty_mut(|dirty_prev| *dirty_prev |= dirty); + } + + let total_age: f64 = entries.iter().map(|(_, rank)| *rank).sum(); + if total_age > max_age { + let factor = 0.9 * max_age / total_age; + if let Ok(tx) = self.conn.transaction() { + for (path, rank) in entries { + let new_rank = rank * factor; + if new_rank < 1.0 { + let _ = tx.execute("DELETE FROM dirs WHERE path = ?1", params![path]); + } else { + let _ = tx.execute( + "UPDATE dirs SET rank = ?1 WHERE path = ?2", + params![new_rank, path], + ); + } + } + let _ = tx.commit(); + self.dirty = true; + } + } } pub fn dedup(&mut self) { - // Sort by path, so that equal paths are next to each other. - self.sort_by_path(); - - let mut dirty = false; - self.with_dirs_mut(|dirs| { - for idx in (1..dirs.len()).rev() { - // Check if curr_dir and next_dir have equal paths. - let curr_dir = &dirs[idx]; - let next_dir = &dirs[idx - 1]; - if next_dir.path != curr_dir.path { - continue; - } - - // Merge curr_dir's rank and last_accessed into next_dir. - let rank = curr_dir.rank; - let last_accessed = curr_dir.last_accessed; - let next_dir = &mut dirs[idx - 1]; - next_dir.last_accessed = next_dir.last_accessed.max(last_accessed); - next_dir.rank += rank; - - // Delete curr_dir. - dirs.swap_remove(idx); - dirty = true; - } - }); - self.with_dirty_mut(|dirty_prev| *dirty_prev |= dirty); + // Using path as PRIMARY KEY ensures uniqueness, nothing to do here. } pub fn sort_by_path(&mut self) { - self.with_dirs_mut(|dirs| dirs.sort_unstable_by(|dir1, dir2| dir1.path.cmp(&dir2.path))); - self.with_dirty_mut(|dirty| *dirty = true); + // Sorting is done at query time in the sqlite-backed implementation. } - pub fn sort_by_score(&mut self, now: Epoch) { - self.with_dirs_mut(|dirs| { - dirs.sort_unstable_by(|dir1: &Dir, dir2: &Dir| { - dir1.score(now).total_cmp(&dir2.score(now)) - }) - }); - self.with_dirty_mut(|dirty| *dirty = true); + pub fn sort_by_score(&mut self, _now: Epoch) { + // Sorting is done at query time in the sqlite-backed implementation. } pub fn dirty(&self) -> bool { - *self.borrow_dirty() + self.dirty } - pub fn dirs(&self) -> &[Dir<'_>] { - self.borrow_dirs() - } - - fn serialize(dirs: &[Dir<'_>]) -> Result> { - (|| -> bincode::Result<_> { - // Preallocate buffer with combined size of sections. - let buffer_size = - bincode::serialized_size(&Self::VERSION)? + bincode::serialized_size(&dirs)?; - let mut buffer = Vec::with_capacity(buffer_size as usize); - - // Serialize sections into buffer. - bincode::serialize_into(&mut buffer, &Self::VERSION)?; - bincode::serialize_into(&mut buffer, &dirs)?; - - Ok(buffer) - })() - .context("could not serialize database") - } - - fn deserialize(bytes: &[u8]) -> Result>> { - // Assume a maximum size for the database. This prevents bincode from throwing - // strange errors when it encounters invalid data. - const MAX_SIZE: u64 = 32 << 20; // 32 MiB - let deserializer = &mut bincode::options().with_fixint_encoding().with_limit(MAX_SIZE); - - // Split bytes into sections. - let version_size = deserializer.serialized_size(&Self::VERSION).unwrap() as _; - if bytes.len() < version_size { - bail!("could not deserialize database: corrupted data"); - } - let (bytes_version, bytes_dirs) = bytes.split_at(version_size); - - // Deserialize sections. - let version = deserializer.deserialize(bytes_version)?; - let dirs = match version { - Self::VERSION => { - deserializer.deserialize(bytes_dirs).context("could not deserialize database")? - } - version => { - bail!("unsupported version (got {version}, supports {})", Self::VERSION) - } + pub fn dirs(&self) -> Vec> { + // Load all dirs from the database into an owned Vec. + let mut stmt = match self.conn.prepare("SELECT path, rank, last_accessed FROM dirs") { + Ok(s) => s, + Err(_) => return Vec::new(), }; - Ok(dirs) + let rows = stmt.query_map([], |row| { + Ok(Dir { + path: row.get::<_, String>(0)?.into(), + rank: row.get::<_, f64>(1)?, + last_accessed: row.get::<_, u64>(2)?, + }) + }); + + let mut out = Vec::new(); + if let Ok(map) = rows { + for r in map { + if let Ok(dir) = r { + out.push(dir); + } + } + } + + out } } @@ -253,7 +276,8 @@ mod tests { let db = Database::open_dir(data_dir.path()).unwrap(); assert_eq!(db.dirs().len(), 1); - let dir = &db.dirs()[0]; + let dirs = db.dirs(); + let dir = &dirs[0]; assert_eq!(dir.path, path); assert!((dir.rank - 2.0).abs() < 0.01); assert_eq!(dir.last_accessed, now); diff --git a/src/db/stream.rs b/src/db/stream.rs index 24c84e0..3d48bef 100644 --- a/src/db/stream.rs +++ b/src/db/stream.rs @@ -1,5 +1,3 @@ -use std::iter::Rev; -use std::ops::Range; use std::path::Path; use std::{fs, path}; @@ -10,20 +8,25 @@ use crate::util::{self, MONTH}; pub struct Stream<'a> { db: &'a mut Database, - idxs: Rev>, + entries: Vec>, + pos: usize, options: StreamOptions, } impl<'a> Stream<'a> { pub fn new(db: &'a mut Database, options: StreamOptions) -> Self { - db.sort_by_score(options.now); - let idxs = (0..db.dirs().len()).rev(); - Stream { db, idxs, options } + // Load entries and sort by score. + let mut entries = db.dirs(); + entries.sort_unstable_by(|a, b| a.score(options.now).total_cmp(&b.score(options.now))); + // iterate from highest to lowest + entries.reverse(); + Stream { db, entries, pos: 0, options } } pub fn next(&mut self) -> Option<&Dir<'_>> { - while let Some(idx) = self.idxs.next() { - let dir = &self.db.dirs()[idx]; + while self.pos < self.entries.len() { + let dir = &self.entries[self.pos]; + self.pos += 1; if !self.filter_by_keywords(&dir.path) { continue; @@ -34,20 +37,20 @@ impl<'a> Stream<'a> { } if !self.filter_by_exclude(&dir.path) { - self.db.swap_remove(idx); + // lazily remove from database + let _ = self.db.remove(&*dir.path); continue; } // Exists queries are slow, this should always be checked last. if !self.filter_by_exists(&dir.path) { if dir.last_accessed < self.options.ttl { - self.db.swap_remove(idx); + let _ = self.db.remove(&*dir.path); } continue; } - let dir = &self.db.dirs()[idx]; - return Some(dir); + return Some(&self.entries[self.pos - 1]); } None @@ -203,9 +206,10 @@ mod tests { #[case(&["/foo/", "/bar"], "/foo/bar", false)] #[case(&["/foo/", "/bar"], "/foo/baz/bar", true)] fn query(#[case] keywords: &[&str], #[case] path: &str, #[case] is_match: bool) { - let db = &mut Database::new(PathBuf::new(), Vec::new(), |_| Vec::new(), false); + let data_dir = tempfile::tempdir().unwrap(); + let mut db = Database::open_dir(data_dir.path()).unwrap(); let options = StreamOptions::new(0).with_keywords(keywords.iter()); - let stream = Stream::new(db, options); + let stream = Stream::new(&mut db, options); assert_eq!(is_match, stream.filter_by_keywords(path)); } } diff --git a/src/util.rs b/src/util.rs index 996f61d..5f6161c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,5 +1,4 @@ use std::ffi::OsStr; -use std::fs::{self, File, OpenOptions}; use std::io::{self, Read, Write}; use std::path::{Component, Path, PathBuf}; use std::process::{Child, Command, Stdio}; From e2a83fe1224a38c7de365139faf407af1657ad1b Mon Sep 17 00:00:00 2001 From: lengyijun Date: Thu, 19 Feb 2026 13:40:58 +0800 Subject: [PATCH 2/5] rm unused fn --- src/util.rs | 95 ----------------------------------------------------- 1 file changed, 95 deletions(-) diff --git a/src/util.rs b/src/util.rs index 5f6161c..9383c5c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -149,101 +149,6 @@ impl FzfChild { } } -/// Similar to [`fs::write`], but atomic (best effort on Windows). -pub fn write(path: impl AsRef, contents: impl AsRef<[u8]>) -> Result<()> { - let path = path.as_ref(); - let contents = contents.as_ref(); - let dir = path.parent().unwrap(); - - // Create a tmpfile. - let (mut tmp_file, tmp_path) = tmpfile(dir)?; - let result = (|| { - // Write to the tmpfile. - _ = tmp_file.set_len(contents.len() as u64); - tmp_file - .write_all(contents) - .with_context(|| format!("could not write to file: {}", tmp_path.display()))?; - - // Set the owner of the tmpfile (UNIX only). - #[cfg(unix)] - if let Ok(metadata) = path.metadata() { - use std::os::unix::fs::MetadataExt; - - use nix::unistd::{self, Gid, Uid}; - - let uid = Uid::from_raw(metadata.uid()); - let gid = Gid::from_raw(metadata.gid()); - _ = unistd::fchown(&tmp_file, Some(uid), Some(gid)); - } - - // Close and rename the tmpfile. - // In some cases, errors from the last write() are reported only on close(). - // Rust ignores errors from close(), since it occurs inside `Drop`. To - // catch these errors, we manually call `File::sync_all()` first. - tmp_file - .sync_all() - .with_context(|| format!("could not sync writes to file: {}", tmp_path.display()))?; - mem::drop(tmp_file); - rename(&tmp_path, path) - })(); - // In case of an error, delete the tmpfile. - if result.is_err() { - _ = fs::remove_file(&tmp_path); - } - result -} - -/// Atomically create a tmpfile in the given directory. -fn tmpfile(dir: impl AsRef) -> Result<(File, PathBuf)> { - const MAX_ATTEMPTS: usize = 5; - const TMP_NAME_LEN: usize = 16; - let dir = dir.as_ref(); - - let mut attempts = 0; - loop { - attempts += 1; - - // Generate a random name for the tmpfile. - let mut name = String::with_capacity(TMP_NAME_LEN); - name.push_str("tmp_"); - while name.len() < TMP_NAME_LEN { - name.push(fastrand::alphanumeric()); - } - let path = dir.join(name); - - // Atomically create the tmpfile. - match OpenOptions::new().write(true).create_new(true).open(&path) { - Ok(file) => break Ok((file, path)), - Err(e) if e.kind() == io::ErrorKind::AlreadyExists && attempts < MAX_ATTEMPTS => {} - Err(e) => { - break Err(e).with_context(|| format!("could not create file: {}", path.display())); - } - } - } -} - -/// Similar to [`fs::rename`], but with retries on Windows. -fn rename(from: impl AsRef, to: impl AsRef) -> Result<()> { - let from = from.as_ref(); - let to = to.as_ref(); - - const MAX_ATTEMPTS: usize = if cfg!(windows) { 5 } else { 1 }; - let mut attempts = 0; - - loop { - match fs::rename(from, to) { - Err(e) if e.kind() == io::ErrorKind::PermissionDenied && attempts < MAX_ATTEMPTS => { - attempts += 1 - } - result => { - break result.with_context(|| { - format!("could not rename file: {} -> {}", from.display(), to.display()) - }); - } - } - } -} - pub fn canonicalize(path: impl AsRef) -> Result { dunce::canonicalize(&path) .with_context(|| format!("could not resolve path: {}", path.as_ref().display())) From cad80a5ce5282e03d9263381ec31b02f6fb42811 Mon Sep 17 00:00:00 2001 From: lengyijun Date: Thu, 19 Feb 2026 14:08:52 +0800 Subject: [PATCH 3/5] cargo test --- src/cmd/import.rs | 16 ++++++++-------- src/db/mod.rs | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/cmd/import.rs b/src/cmd/import.rs index ac0777a..66a1207 100644 --- a/src/cmd/import.rs +++ b/src/cmd/import.rs @@ -40,7 +40,7 @@ fn import_autojump(db: &mut Database, buffer: &str) -> Result<()> { // take a while to normalize. rank = sigmoid(rank); - db.add_unchecked(path, rank, 0); + db.add_unchecked_merge(path, rank, 0); } if db.dirty() { @@ -65,7 +65,7 @@ fn import_z(db: &mut Database, buffer: &str) -> Result<()> { let path = split.next().with_context(|| format!("invalid entry: {line}"))?; - db.add_unchecked(path, rank, last_accessed); + db.add_unchecked_merge(path, rank, last_accessed); } if db.dirty() { @@ -107,12 +107,12 @@ mod tests { println!("got: {:?}", &db.dirs()); let exp = [ - Dir { path: "/baz".into(), rank: sigmoid(7.0), last_accessed: 0 }, - Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 }, - Dir { path: "/foo/bar".into(), rank: 9.0 + sigmoid(2.0), last_accessed: 900 }, Dir { path: "/quux/quuz".into(), rank: 1.0 + sigmoid(5.0), last_accessed: 100 }, + Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 }, Dir { path: "/waldo/fred/plugh".into(), rank: 3.0, last_accessed: 300 }, Dir { path: "/xyzzy/thud".into(), rank: 8.0, last_accessed: 800 }, + Dir { path: "/foo/bar".into(), rank: 9.0 + sigmoid(2.0), last_accessed: 900 }, + Dir { path: "/baz".into(), rank: sigmoid(7.0), last_accessed: 0 }, ]; println!("exp: {exp:?}"); @@ -148,12 +148,12 @@ mod tests { println!("got: {:?}", &db.dirs()); let exp = [ - Dir { path: "/baz".into(), rank: 7.0, last_accessed: 700 }, - Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 }, - Dir { path: "/foo/bar".into(), rank: 11.0, last_accessed: 900 }, Dir { path: "/quux/quuz".into(), rank: 10.0, last_accessed: 500 }, + Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 }, Dir { path: "/waldo/fred/plugh".into(), rank: 3.0, last_accessed: 300 }, Dir { path: "/xyzzy/thud".into(), rank: 8.0, last_accessed: 800 }, + Dir { path: "/foo/bar".into(), rank: 11.0, last_accessed: 900 }, + Dir { path: "/baz".into(), rank: 7.0, last_accessed: 700 }, ]; println!("exp: {exp:?}"); diff --git a/src/db/mod.rs b/src/db/mod.rs index e0d31a6..d3982fb 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -100,6 +100,7 @@ impl Database { /// directory is already in the database, it is expected that the user /// either does a check before calling this, or calls `dedup()` /// afterward. + #[cfg(test)] pub fn add_unchecked(&mut self, path: impl AsRef + Into, rank: Rank, now: Epoch) { let path_s: String = path.into(); let _ = self.conn.execute( @@ -109,6 +110,20 @@ impl Database { self.dirty = true; } + /// choose the max `now` + /// sum `rank` + pub fn add_unchecked_merge(&mut self, path: impl AsRef + Into, rank: Rank, now: Epoch) { + let path_s: String = path.into(); + let _ = self.conn.execute( + "INSERT INTO dirs (path, rank, last_accessed) VALUES (?1, ?2, ?3) + ON CONFLICT(path) DO UPDATE SET + rank = dirs.rank + excluded.rank, + last_accessed = MAX(dirs.last_accessed, excluded.last_accessed)", + params![&path_s, rank, now], + ); + self.dirty = true; + } + /// Increments the rank and updates the last_accessed of a directory, or /// creates it if it does not exist. pub fn add_update(&mut self, path: impl AsRef + Into, by: Rank, now: Epoch) { @@ -215,6 +230,7 @@ impl Database { // Using path as PRIMARY KEY ensures uniqueness, nothing to do here. } + #[cfg(test)] pub fn sort_by_path(&mut self) { // Sorting is done at query time in the sqlite-backed implementation. } From 53897b1c13c532228f6fc810dd3aa9c7e95ea5fc Mon Sep 17 00:00:00 2001 From: lengyijun Date: Thu, 19 Feb 2026 20:30:17 +0800 Subject: [PATCH 4/5] cargo fix --- src/db/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/db/mod.rs b/src/db/mod.rs index d3982fb..34709ed 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -12,7 +12,6 @@ pub use crate::db::dir::{Dir, Epoch, Rank}; pub use crate::db::stream::{Stream, StreamOptions}; pub struct Database { - path: PathBuf, conn: Connection, dirty: bool, } @@ -49,7 +48,7 @@ impl Database { );", )?; - Ok(Database { path, conn, dirty: false }) + Ok(Database { conn, dirty: false }) } pub fn save(&mut self) -> Result<()> { From 5b88d3a6b3a0c2ba7a0cc721cfb308832aa0f4df Mon Sep 17 00:00:00 2001 From: lengyijun Date: Fri, 27 Feb 2026 09:19:49 +0800 Subject: [PATCH 5/5] auto migrate --- CHANGELOG.md | 3 ++ Cargo.lock | 10 ++++++ Cargo.toml | 1 + src/db/mod.rs | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 111 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b38fe2..832dd44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - POSIX: support for non-Cygwin Windows environments (e.g. Busybox). +- Automatically migrate data from the legacy bincode-backed `db.zo` file + when upgrading to the sqlite backend (only performed if no + `db.sqlite3` exists). ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 355185d..8c4e13f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -144,6 +144,15 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "2.9.1" @@ -1070,6 +1079,7 @@ dependencies = [ "anyhow", "askama", "assert_cmd", + "bincode", "clap", "clap_complete", "clap_complete_fig", diff --git a/Cargo.toml b/Cargo.toml index 54bec44..2cc55b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ fastrand = "2.0.0" glob = "0.3.0" ouroboros = "0.18.3" serde = { version = "1.0.116", features = ["derive"] } +bincode = "1.3.1" [target.'cfg(unix)'.dependencies] nix = { version = "0.30.1", default-features = false, features = [ diff --git a/src/db/mod.rs b/src/db/mod.rs index 34709ed..18c4daf 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -6,11 +6,57 @@ use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; use rusqlite::{Connection, OptionalExtension, params}; +use bincode::Options; use crate::config; pub use crate::db::dir::{Dir, Epoch, Rank}; pub use crate::db::stream::{Stream, StreamOptions}; +/// Attempt to read an old bincode-formatted database and write its entries into +/// the provided SQLite connection. +fn migrate_from_bincode(conn: &mut Connection, old_path: &Path) -> Result<()> { + // The on-disk format used by the legacy database is very similar to the + // struct definitions we already use. We simply serialise a version number + // followed by a `Vec`. + const MAX_SIZE: u64 = 32 << 20; // 32 MiB + + let data = fs::read(old_path) + .with_context(|| format!("could not read legacy database: {}", old_path.display()))?; + + let deserializer = &mut bincode::options() + .with_fixint_encoding() + .with_limit(MAX_SIZE); + + let version_size = deserializer.serialized_size(&Database::VERSION)? as usize; + if data.len() < version_size { + anyhow::bail!("legacy database is corrupted"); + } + let (bytes_version, bytes_dirs) = data.split_at(version_size); + + let version: u32 = deserializer.deserialize(bytes_version)?; + if version != Database::VERSION { + anyhow::bail!( + "unsupported legacy database version (got {}, expected {})", + version, + Database::VERSION + ); + } + + let dirs: Vec> = deserializer.deserialize(bytes_dirs)?; + + let tx = conn.transaction()?; + for dir in dirs { + let path_s: String = dir.path.into_owned(); + tx.execute( + "INSERT OR REPLACE INTO dirs (path, rank, last_accessed) VALUES (?1, ?2, ?3)", + params![&path_s, dir.rank, dir.last_accessed], + )?; + } + tx.commit()?; + + Ok(()) +} + pub struct Database { conn: Connection, dirty: bool, @@ -33,7 +79,9 @@ impl Database { .with_context(|| format!("unable to create data directory: {}", data_dir.display()))?; // Open or create sqlite database file. - let conn = Connection::open(&path) + let existed = path.exists(); + + let mut conn = Connection::open(&path) .with_context(|| format!("could not open database: {}", path.display()))?; // Enable WAL for better concurrency and durability. @@ -48,6 +96,24 @@ impl Database { );", )?; + // If the sqlite database didn't previously exist, attempt to migrate data + // from the legacy bincode-backed file. This keeps behaviour identical to + // older versions of zoxide while ensuring users transparently upgrade. + if !existed { + let old_path = data_dir.join("db.zo"); + if old_path.exists() { + // Migration errors shouldn't prevent the program from running; + // just print a warning so users can investigate. + if let Err(e) = migrate_from_bincode(&mut conn, &old_path) { + eprintln!( + "warning: failed to migrate legacy database ({}): {}", + old_path.display(), + e + ); + } + } + } + Ok(Database { conn, dirty: false }) } @@ -324,4 +390,34 @@ mod tests { db.save().unwrap(); } } + + #[test] + fn migrate_from_bincode() { + let data_dir = tempfile::tempdir().unwrap(); + let old_path = data_dir.path().join("db.zo"); + + // prepare a legacy file with one entry + let dirs = vec![Dir { + path: "/foo".into(), + rank: 1.0, + last_accessed: 12345, + }]; + let mut bytes = Vec::new(); + let mut serializer = bincode::options().with_fixint_encoding(); + serializer.serialize_into(&mut bytes, &Database::VERSION).unwrap(); + serializer.serialize_into(&mut bytes, &dirs).unwrap(); + fs::write(&old_path, &bytes).unwrap(); + + // opening should automatically migrate the data + let db = Database::open_dir(data_dir.path()).unwrap(); + let dirs = db.dirs(); + assert_eq!(dirs.len(), 1); + assert_eq!(dirs[0].path, "/foo"); + assert!((dirs[0].rank - 1.0).abs() < f64::EPSILON); + assert_eq!(dirs[0].last_accessed, 12345); + // sqlite file should exist after opening + assert!(data_dir.path().join("db.sqlite3").exists()); + // old file is left intact so future runs are no-ops + assert!(old_path.exists()); + } }