diff --git a/src/cmd/add.rs b/src/cmd/add.rs index 19fb329..3a3c4a2 100644 --- a/src/cmd/add.rs +++ b/src/cmd/add.rs @@ -1,8 +1,8 @@ use super::Cmd; use crate::config; +use crate::store::StoreBuilder; use crate::util; -use crate::store::Store; use anyhow::Result; use clap::Clap; @@ -40,7 +40,8 @@ impl Cmd for Add { let data_dir = config::zo_data_dir()?; let max_age = config::zo_maxage()?; - let mut store = Store::open(&data_dir)?; + let mut store = StoreBuilder::new(data_dir); + let mut store = store.build()?; store.add(path, now); store.age(max_age); diff --git a/src/cmd/import.rs b/src/cmd/import.rs index e2cd761..8f45544 100644 --- a/src/cmd/import.rs +++ b/src/cmd/import.rs @@ -3,7 +3,7 @@ use crate::config; use crate::import::{Autojump, Import as _, Z}; use crate::util; -use crate::store::Store; +use crate::store::StoreBuilder; use anyhow::{bail, Result}; use clap::{ArgEnum, Clap}; @@ -27,7 +27,8 @@ impl Cmd for Import { fn run(&self) -> Result<()> { let data_dir = config::zo_data_dir()?; - let mut store = Store::open(&data_dir)?; + let mut store = StoreBuilder::new(data_dir); + let mut store = store.build()?; if !self.merge && !store.dirs.is_empty() { bail!("zoxide database is not empty, specify --merge to continue anyway") } diff --git a/src/cmd/query.rs b/src/cmd/query.rs index 24bafcc..c43b090 100644 --- a/src/cmd/query.rs +++ b/src/cmd/query.rs @@ -3,7 +3,7 @@ use crate::config; use crate::fzf::Fzf; use crate::util; -use crate::store::{self, Store}; +use crate::store::{self, StoreBuilder}; use anyhow::{Context, Result}; use clap::Clap; @@ -30,7 +30,8 @@ pub struct Query { impl Cmd for Query { fn run(&self) -> Result<()> { let data_dir = config::zo_data_dir()?; - let mut store = Store::open(&data_dir)?; + let mut store = StoreBuilder::new(data_dir); + let mut store = store.build()?; let query = store::Query::new(&self.keywords); let now = util::current_time()?; diff --git a/src/cmd/remove.rs b/src/cmd/remove.rs index b629c16..aa44fa8 100644 --- a/src/cmd/remove.rs +++ b/src/cmd/remove.rs @@ -1,8 +1,7 @@ use super::Cmd; use crate::config; use crate::fzf::Fzf; -use crate::store::Query; -use crate::store::Store; +use crate::store::{Query, StoreBuilder}; use crate::util; use anyhow::{bail, Context, Result}; @@ -25,7 +24,8 @@ pub struct Remove { impl Cmd for Remove { fn run(&self) -> Result<()> { let data_dir = config::zo_data_dir()?; - let mut store = Store::open(&data_dir)?; + let mut store = StoreBuilder::new(data_dir); + let mut store = store.build()?; let selection; let path = match &self.interactive { diff --git a/src/config.rs b/src/config.rs index 8d7fe35..844ec5f 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,4 +1,5 @@ use crate::store::Rank; + use anyhow::{bail, Context, Result}; use dirs_next as dirs; diff --git a/src/import/autojump.rs b/src/import/autojump.rs index 0e469ea..03ff448 100644 --- a/src/import/autojump.rs +++ b/src/import/autojump.rs @@ -3,6 +3,7 @@ use super::Import; use crate::store::{Dir, Epoch, Store}; use anyhow::{Context, Result}; +use std::borrow::Cow; use std::fs; use std::path::Path; @@ -43,15 +44,10 @@ impl Import for Autojump { } let rank_sum = entries.iter().map(|(_, rank)| rank).sum::(); - for (path, rank) in entries.iter() { - if store - .dirs - .iter_mut() - .find(|dir| &dir.path == path) - .is_none() - { + for &(path, rank) in entries.iter() { + if store.dirs.iter_mut().find(|dir| dir.path == path).is_none() { store.dirs.push(Dir { - path: path.to_string(), + path: Cow::Owned(path.into()), rank: rank / rank_sum, last_accessed: self.now, }); diff --git a/src/import/z.rs b/src/import/z.rs index e6d565a..c427555 100644 --- a/src/import/z.rs +++ b/src/import/z.rs @@ -3,6 +3,7 @@ use super::Import; use crate::store::{Dir, Store}; use anyhow::{Context, Result}; +use std::borrow::Cow; use std::fs::File; use std::io::{BufRead, BufReader}; use std::path::Path; @@ -46,7 +47,7 @@ impl Import for Z { dir.last_accessed = dir.last_accessed.max(last_accessed); } None => store.dirs.push(Dir { - path: path.to_string(), + path: Cow::Owned(path.into()), rank, last_accessed, }), diff --git a/src/store/dir.rs b/src/store/dir.rs index f8aa47e..1924dd4 100644 --- a/src/store/dir.rs +++ b/src/store/dir.rs @@ -1,27 +1,110 @@ -use super::{Epoch, Query, Rank}; +use super::Query; +use anyhow::{bail, Context, Result}; +use bincode::Options as _; use serde::{Deserialize, Serialize}; +use std::borrow::Cow; use std::fmt::{self, Display, Formatter}; +use std::ops::{Deref, DerefMut}; use std::path::Path; #[derive(Debug, Deserialize, Serialize)] -pub struct Dir { - pub path: String, +pub struct DirList<'a>(#[serde(borrow)] Vec>); + +impl DirList<'_> { + const VERSION: u32 = 3; + + pub fn new() -> DirList<'static> { + DirList(Vec::new()) + } + + pub fn from_bytes(bytes: &[u8]) -> Result { + // Assume a maximum size for the store. This prevents bincode from throwing strange + // errors when it encounters invalid data. + const MAX_SIZE: u64 = 8 << 20; // 8 MiB + let deserializer = &mut bincode::options() + .with_fixint_encoding() + .with_limit(MAX_SIZE); + + // Split bytes into sections. + let version_size = deserializer.serialized_size(&Self::VERSION).unwrap() as _; + if bytes.len() < version_size { + bail!("could not deserialize store: corrupted data"); + } + let (bytes_version, bytes_dirs) = bytes.split_at(version_size); + + // Deserialize sections. + (|| { + let version = deserializer.deserialize(bytes_version)?; + match version { + Self::VERSION => Ok(deserializer.deserialize(bytes_dirs)?), + version => bail!( + "unsupported version (got {}, supports {})", + version, + Self::VERSION, + ), + } + })() + .context("could not deserialize store") + } + + pub fn to_bytes(&self) -> Result> { + (|| -> bincode::Result<_> { + // Preallocate buffer with combined size of sections. + let version_size = bincode::serialized_size(&Self::VERSION)?; + let dirs_size = bincode::serialized_size(&self)?; + let buffer_size = version_size + dirs_size; + let mut buffer = Vec::with_capacity(buffer_size as _); + + // Serialize sections into buffer. + bincode::serialize_into(&mut buffer, &Self::VERSION)?; + bincode::serialize_into(&mut buffer, &self)?; + Ok(buffer) + })() + .context("could not serialize store") + } +} + +impl<'a> Deref for DirList<'a> { + type Target = Vec>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a> DerefMut for DirList<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<'a> From>> for DirList<'a> { + fn from(dirs: Vec>) -> Self { + DirList(dirs) + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Dir<'a> { + #[serde(borrow)] + pub path: Cow<'a, str>, pub rank: Rank, pub last_accessed: Epoch, } -impl Dir { +impl Dir<'_> { pub fn is_match(&self, query: &Query) -> bool { - query.matches(&self.path) && Path::new(&self.path).is_dir() + query.matches(&self.path) && Path::new(self.path.as_ref()).is_dir() } - pub fn get_score(&self, now: Epoch) -> Rank { + pub fn score(&self, now: Epoch) -> Rank { const HOUR: Epoch = 60 * 60; const DAY: Epoch = 24 * HOUR; const WEEK: Epoch = 7 * DAY; + // The older the entry, the lesser its importance. let duration = now.saturating_sub(self.last_accessed); if duration < HOUR { self.rank * 4.0 @@ -44,7 +127,7 @@ impl Dir { } pub struct DirDisplay<'a> { - dir: &'a Dir, + dir: &'a Dir<'a>, } impl Display for DirDisplay<'_> { @@ -54,13 +137,13 @@ impl Display for DirDisplay<'_> { } pub struct DirDisplayScore<'a> { - dir: &'a Dir, + dir: &'a Dir<'a>, now: Epoch, } impl Display for DirDisplayScore<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let score = self.dir.get_score(self.now); + let score = self.dir.score(self.now); let score = if score > 9999.0 { 9999 } else if score > 0.0 { @@ -71,3 +154,6 @@ impl Display for DirDisplayScore<'_> { write!(f, "{:>4} {}", score, self.dir.path) } } + +pub type Rank = f64; +pub type Epoch = u64; diff --git a/src/store/mod.rs b/src/store/mod.rs index c71ef37..a3dfea2 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -1,111 +1,32 @@ mod dir; mod query; -use anyhow::{bail, Context, Result}; -use bincode::Options; +pub use dir::{Dir, DirList, Epoch, Rank}; +pub use query::Query; + +use anyhow::{Context, Result}; use ordered_float::OrderedFloat; -use serde::{Deserialize, Serialize}; use tempfile::{NamedTempFile, PersistError}; +use std::borrow::Cow; use std::cmp::Reverse; use std::fs; use std::io::{self, Write}; use std::path::{Path, PathBuf}; -pub use dir::Dir; -pub use query::Query; - -pub type Rank = f64; -pub type Epoch = u64; - -#[derive(Debug)] -pub struct Store { - pub dirs: Vec, +pub struct Store<'a> { + pub dirs: DirList<'a>, pub modified: bool, - data_dir: PathBuf, + data_dir: &'a Path, } -impl Store { - pub const CURRENT_VERSION: StoreVersion = StoreVersion(3); - const MAX_SIZE: u64 = 8 * 1024 * 1024; // 8 MiB - - pub fn open>(data_dir: P) -> Result { - let data_dir = data_dir.into(); - let path = Self::get_path(&data_dir); - - let buffer = match fs::read(&path) { - Ok(buffer) => buffer, - Err(e) if e.kind() == io::ErrorKind::NotFound => { - fs::create_dir_all(&data_dir).with_context(|| { - format!("unable to create data directory: {}", path.display()) - })?; - return Ok(Store { - dirs: Vec::new(), - modified: false, - data_dir, - }); - } - Err(e) => { - Err(e).with_context(|| format!("could not read from store: {}", path.display()))? - } - }; - - let deserializer = &mut bincode::options() - .with_fixint_encoding() - .with_limit(Self::MAX_SIZE); - - let version_size = deserializer - .serialized_size(&Self::CURRENT_VERSION) - .unwrap() as _; - - if buffer.len() < version_size { - bail!("data store may be corrupted: {}", path.display()); - } - - let (buffer_version, buffer_dirs) = buffer.split_at(version_size); - - let version = deserializer - .deserialize(buffer_version) - .with_context(|| format!("could not deserialize store version: {}", path.display()))?; - - let dirs = match version { - Self::CURRENT_VERSION => deserializer - .deserialize(buffer_dirs) - .with_context(|| format!("could not deserialize store: {}", path.display()))?, - version => bail!( - "unsupported store version, got={}, supported={}: {}", - version.0, - Self::CURRENT_VERSION.0, - path.display() - ), - }; - - Ok(Store { - dirs, - modified: false, - data_dir, - }) - } - +impl<'a> Store<'a> { pub fn save(&mut self) -> Result<()> { if !self.modified { return Ok(()); } - let (buffer, buffer_size) = (|| -> bincode::Result<_> { - let version_size = bincode::serialized_size(&Self::CURRENT_VERSION)?; - let dirs_size = bincode::serialized_size(&self.dirs)?; - - let buffer_size = version_size + dirs_size; - let mut buffer = Vec::with_capacity(buffer_size as _); - - bincode::serialize_into(&mut buffer, &Self::CURRENT_VERSION)?; - bincode::serialize_into(&mut buffer, &self.dirs)?; - - Ok((buffer, buffer_size)) - })() - .context("could not serialize store")?; - + let buffer = self.dirs.to_bytes()?; let mut file = NamedTempFile::new_in(&self.data_dir).with_context(|| { format!( "could not create temporary store in: {}", @@ -113,7 +34,10 @@ impl Store { ) })?; - let _ = file.as_file().set_len(buffer_size); + // Preallocate enough space on the file, preventing copying later on. + // This optimization may fail on some filesystems, but it is safe to + // ignore it and proceed. + let _ = file.as_file().set_len(buffer.len() as _); file.write_all(&buffer).with_context(|| { format!( "could not write to temporary store: {}", @@ -121,7 +45,7 @@ impl Store { ) })?; - let path = Self::get_path(&self.data_dir); + let path = store_path(&self.data_dir); persist(file, &path) .with_context(|| format!("could not replace store: {}", path.display()))?; @@ -135,7 +59,7 @@ impl Store { match self.dirs.iter_mut().find(|dir| dir.path == path) { None => self.dirs.push(Dir { - path: path.into(), + path: Cow::Owned(path.into()), last_accessed: now, rank: 1.0, }), @@ -148,13 +72,13 @@ impl Store { self.modified = true; } - pub fn iter_matches<'a>( - &'a mut self, - query: &'a Query, + pub fn iter_matches<'b>( + &'b mut self, + query: &'b Query, now: Epoch, - ) -> impl DoubleEndedIterator { + ) -> impl DoubleEndedIterator { self.dirs - .sort_unstable_by_key(|dir| Reverse(OrderedFloat(dir.get_score(now)))); + .sort_unstable_by_key(|dir| Reverse(OrderedFloat(dir.score(now)))); self.dirs.iter().filter(move |dir| dir.is_match(&query)) } @@ -188,23 +112,18 @@ impl Store { self.modified = true; } } - - fn get_path>(data_dir: P) -> PathBuf { - data_dir.as_ref().join("db.zo") - } } -impl Drop for Store { +impl Drop for Store<'_> { fn drop(&mut self) { + // Since the error can't be properly handled here, + // pretty-print it instead. if let Err(e) = self.save() { println!("Error: {}", e) } } } -#[derive(Debug, Deserialize, Eq, PartialEq, Serialize)] -pub struct StoreVersion(pub u32); - #[cfg(windows)] fn persist>(mut file: NamedTempFile, path: P) -> Result<(), PersistError> { use rand::distributions::{Distribution, Uniform}; @@ -240,6 +159,62 @@ fn persist>(file: NamedTempFile, path: P) -> Result<(), PersistEr Ok(()) } +pub struct StoreBuilder { + data_dir: PathBuf, + buffer: Vec, +} + +impl StoreBuilder { + pub fn new>(data_dir: P) -> StoreBuilder { + StoreBuilder { + data_dir: data_dir.into(), + buffer: Vec::new(), + } + } + + pub fn build(&mut self) -> Result { + // Read the entire store to memory. For smaller files, this is faster + // than mmap / streaming, and allows for zero-copy deserialization. + let path = store_path(&self.data_dir); + match fs::read(&path) { + Ok(buffer) => { + self.buffer = buffer; + let dirs = DirList::from_bytes(&self.buffer) + .with_context(|| format!("could not deserialize store: {}", path.display()))?; + Ok(Store { + dirs, + modified: false, + data_dir: &self.data_dir, + }) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + // Create data directory, but don't create any file yet. + // The file will be created later by [`Store::save`] + // if any data is modified. + fs::create_dir_all(&self.data_dir).with_context(|| { + format!( + "unable to create data directory: {}", + self.data_dir.display() + ) + })?; + Ok(Store { + dirs: DirList::new(), + modified: false, + data_dir: &self.data_dir, + }) + } + Err(e) => { + Err(e).with_context(|| format!("could not read from store: {}", path.display())) + } + } + } +} + +fn store_path>(data_dir: P) -> PathBuf { + const STORE_FILENAME: &str = "db.zo"; + data_dir.as_ref().join(STORE_FILENAME) +} + #[cfg(test)] mod tests { use super::*; @@ -255,12 +230,14 @@ mod tests { let data_dir = tempfile::tempdir().unwrap(); { - let mut store = Store::open(data_dir.path()).unwrap(); + let mut store = StoreBuilder::new(data_dir.path()); + let mut store = store.build().unwrap(); store.add(path, now); store.add(path, now); } { - let store = Store::open(data_dir.path()).unwrap(); + let mut store = StoreBuilder::new(data_dir.path()); + let store = store.build().unwrap(); assert_eq!(store.dirs.len(), 1); let dir = &store.dirs[0]; @@ -280,15 +257,18 @@ mod tests { let data_dir = tempfile::tempdir().unwrap(); { - let mut store = Store::open(data_dir.path()).unwrap(); + let mut store = StoreBuilder::new(data_dir.path()); + let mut store = store.build().unwrap(); store.add(path, now); } { - let mut store = Store::open(data_dir.path()).unwrap(); + let mut store = StoreBuilder::new(data_dir.path()); + let mut store = store.build().unwrap(); assert!(store.remove(path)); } { - let mut store = Store::open(data_dir.path()).unwrap(); + let mut store = StoreBuilder::new(data_dir.path()); + let mut store = store.build().unwrap(); assert!(store.dirs.is_empty()); assert!(!store.remove(path)); }