Speed up imports (#203)

This commit is contained in:
Ajeet D'Souza 2021-05-04 11:45:28 +05:30 committed by GitHub
parent 6d3cb4bc99
commit 1075ba5a50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 123 additions and 154 deletions

View File

@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Added
- Auto-generated shell completions.
### Fixed
- Nushell: `__zoxide_hook` should not do anything inside subshells created using `enter`
## [0.7.0] - 2021-05-02
### Added

View File

@ -1,11 +1,13 @@
use super::Run;
use crate::app::{Import, ImportFrom};
use crate::config;
use crate::import::{Autojump, Import as _, Z};
use crate::util;
use crate::db::{Database, DatabaseFile, Dir, DirList};
use crate::db::DatabaseFile;
use anyhow::{bail, Result};
use anyhow::{bail, Context, Result};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
impl Run for Import {
fn run(&self) -> Result<()> {
@ -17,14 +19,112 @@ impl Run for Import {
bail!("current database is not empty, specify --merge to continue anyway");
}
let resolve_symlinks = config::zo_resolve_symlinks();
match self.from {
ImportFrom::Autojump => Autojump {
resolve_symlinks,
now: util::current_time()?,
}
.import(&mut db, &self.path),
ImportFrom::Z => Z { resolve_symlinks }.import(&mut db, &self.path),
ImportFrom::Autojump => from_autojump(&mut db, &self.path),
ImportFrom::Z => from_z(&mut db, &self.path),
}
.context("import error")
}
}
fn from_autojump<P: AsRef<Path>>(db: &mut Database, path: P) -> Result<()> {
let path = path.as_ref();
let buffer = fs::read_to_string(path)
.with_context(|| format!("could not open autojump database: {}", path.display()))?;
let mut dirs = db
.dirs
.iter()
.map(|dir| (dir.path.as_ref(), dir.clone()))
.collect::<HashMap<_, _>>();
for line in buffer.lines() {
if line.is_empty() {
continue;
}
let mut split = line.splitn(2, '\t');
let rank = split
.next()
.with_context(|| format!("invalid entry: {}", line))?;
let mut rank = rank
.parse::<f64>()
.with_context(|| format!("invalid rank: {}", rank))?;
// Normalize the rank using a sigmoid function. Don't import actual
// ranks from autojump, since its scoring algorithm is very different,
// and might take a while to get normalized.
rank = 1.0 / (1.0 + (-rank).exp());
let path = split
.next()
.with_context(|| format!("invalid entry: {}", line))?;
dirs.entry(path)
.and_modify(|dir| dir.rank += rank)
.or_insert_with(|| Dir {
path: path.to_string().into(),
rank,
last_accessed: 0,
});
}
db.dirs = DirList(dirs.into_iter().map(|(_, dir)| dir).collect());
db.modified = true;
Ok(())
}
fn from_z<P: AsRef<Path>>(db: &mut Database, path: P) -> Result<()> {
let path = path.as_ref();
let buffer = fs::read_to_string(path)
.with_context(|| format!("could not open z database: {}", path.display()))?;
let mut dirs = db
.dirs
.iter()
.map(|dir| (dir.path.as_ref(), dir.clone()))
.collect::<HashMap<_, _>>();
for line in buffer.lines() {
if line.is_empty() {
continue;
}
let mut split = line.rsplitn(3, '|');
let last_accessed = split
.next()
.with_context(|| format!("invalid entry: {}", line))?;
let last_accessed = last_accessed
.parse()
.with_context(|| format!("invalid epoch: {}", last_accessed))?;
let rank = split
.next()
.with_context(|| format!("invalid entry: {}", line))?;
let rank = rank
.parse()
.with_context(|| format!("invalid rank: {}", rank))?;
let path = split
.next()
.with_context(|| format!("invalid entry: {}", line))?;
dirs.entry(path)
.and_modify(|dir| {
dir.rank += rank;
if last_accessed > dir.last_accessed {
dir.last_accessed = last_accessed;
}
})
.or_insert(Dir {
path: path.to_string().into(),
rank,
last_accessed,
});
}
db.dirs = DirList(dirs.into_iter().map(|(_, dir)| dir).collect());
db.modified = true;
Ok(())
}

View File

@ -10,7 +10,7 @@ use std::fs;
use std::ops::{Deref, DerefMut};
#[derive(Debug, Deserialize, Serialize)]
pub struct DirList<'a>(#[serde(borrow)] Vec<Dir<'a>>);
pub struct DirList<'a>(#[serde(borrow)] pub Vec<Dir<'a>>);
impl DirList<'_> {
const VERSION: u32 = 3;
@ -86,7 +86,7 @@ impl<'a> From<Vec<Dir<'a>>> for DirList<'a> {
}
}
#[derive(Debug, Deserialize, Serialize)]
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Dir<'a> {
#[serde(borrow)]
pub path: Cow<'a, str>,

View File

@ -1,62 +0,0 @@
use super::Import;
use crate::db::{Database, Dir, Epoch};
use anyhow::{Context, Result};
use std::borrow::Cow;
use std::fs;
use std::path::Path;
pub struct Autojump {
pub resolve_symlinks: bool,
pub now: Epoch,
}
impl Import for Autojump {
fn import<P: AsRef<Path>>(&self, db: &mut Database, path: P) -> Result<()> {
let path = path.as_ref();
let buffer = fs::read_to_string(path)
.with_context(|| format!("could not open autojump database: {}", path.display()))?;
let mut entries = Vec::new();
for (idx, line) in buffer.lines().enumerate() {
(|| -> Result<()> {
if line.is_empty() {
return Ok(());
}
let (rank, path) = (|| {
let mut split = line.splitn(2, '\t');
let rank = split.next()?;
let path = split.next()?;
Some((rank, path))
})()
.with_context(|| format!("invalid entry: {}", line))?;
let rank = rank
.parse::<f64>()
.with_context(|| format!("invalid rank: {}", rank))?;
entries.push((path, rank));
Ok(())
})()
.with_context(|| format!("line {}: error reading from autojump database", idx + 1))?;
}
// Don't import actual ranks from autojump, since its algorithm is
// very different, and might take a while to get normalized.
let rank_sum = entries.iter().map(|(_, rank)| rank).sum::<f64>();
for &(path, rank) in entries.iter() {
if !db.dirs.iter().any(|dir| dir.path == path) {
db.dirs.push(Dir {
path: Cow::Owned(path.into()),
rank: rank / rank_sum,
last_accessed: self.now,
});
db.modified = true;
}
}
Ok(())
}
}

View File

@ -1,14 +0,0 @@
mod autojump;
mod z;
use crate::db::Database;
use anyhow::Result;
use std::path::Path;
pub use autojump::Autojump;
pub use z::Z;
pub trait Import {
fn import<P: AsRef<Path>>(&self, db: &mut Database, path: P) -> Result<()>;
}

View File

@ -1,64 +0,0 @@
use super::Import;
use crate::db::{Database, Dir};
use anyhow::{Context, Result};
use std::borrow::Cow;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
pub struct Z {
pub resolve_symlinks: bool,
}
impl Import for Z {
fn import<P: AsRef<Path>>(&self, db: &mut Database, path: P) -> Result<()> {
let file = File::open(path).context("could not open z database")?;
let reader = BufReader::new(file);
for (idx, line) in reader.lines().enumerate() {
(|| -> Result<()> {
let line = line?;
if line.is_empty() {
return Ok(());
}
let (path, rank, last_accessed) = (|| {
let mut split = line.rsplitn(3, '|');
let last_accessed = split.next()?;
let rank = split.next()?;
let path = split.next()?;
Some((path, rank, last_accessed))
})()
.with_context(|| format!("invalid entry: {}", line))?;
let rank = rank
.parse()
.with_context(|| format!("invalid rank: {}", rank))?;
let last_accessed = last_accessed
.parse()
.with_context(|| format!("invalid epoch: {}", last_accessed))?;
match db.dirs.iter_mut().find(|dir| dir.path == path) {
Some(dir) => {
dir.rank += rank;
dir.last_accessed = dir.last_accessed.max(last_accessed);
}
None => db.dirs.push(Dir {
path: Cow::Owned(path.into()),
rank,
last_accessed,
}),
}
db.modified = true;
Ok(())
})()
.with_context(|| format!("line {}: error reading from z database", idx + 1))?;
}
Ok(())
}
}

View File

@ -4,7 +4,6 @@ mod config;
mod db;
mod error;
mod fzf;
mod import;
mod shell;
mod util;