Avoid use of hashmap for imports

This commit is contained in:
Ajeet D'Souza 2021-06-01 08:59:24 +05:30
parent 9ef2ef5a6d
commit fcdfb19fd1
4 changed files with 146 additions and 45 deletions

View File

@ -62,8 +62,8 @@ _arguments "${_arguments_options[@]}" \
'(-l --list)--interactive[Use interactive selection]' \
'(-i --interactive)-l[List all matching directories]' \
'(-i --interactive)--list[List all matching directories]' \
'-s[Print score with results]' \
'--score[Print score with results]' \
'(-i --interactive)-s[Print score with results]' \
'(-i --interactive)--score[Print score with results]' \
'-h[Prints help information]' \
'--help[Prints help information]' \
'*::keywords:' \

View File

@ -114,7 +114,7 @@ pub struct Query {
pub list: bool,
/// Print score with results
#[clap(long, short)]
#[clap(long, short, conflicts_with = "interactive")]
pub score: bool,
/// Exclude a path from results

View File

@ -1,17 +1,18 @@
use crate::app::{Import, ImportFrom, Run};
use crate::config;
use crate::db::{Database, DatabaseFile, Dir, DirList};
use crate::db::{Database, DatabaseFile, Dir};
use anyhow::{bail, Context, Result};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
impl Run for Import {
fn run(&self) -> Result<()> {
let data_dir = config::data_dir()?;
let buffer = &fs::read_to_string(&self.path).with_context(|| {
format!("could not open database for importing: {}", &self.path.display())
})?;
let data_dir = config::data_dir()?;
let mut db = DatabaseFile::new(data_dir);
let db = &mut db.open()?;
if !self.merge && !db.dirs.is_empty() {
@ -19,21 +20,16 @@ impl Run for Import {
}
match self.from {
ImportFrom::Autojump => from_autojump(db, &self.path),
ImportFrom::Z => from_z(db, &self.path),
ImportFrom::Autojump => from_autojump(db, buffer),
ImportFrom::Z => from_z(db, buffer),
}
.context("import error")
.context("import error")?;
Ok(())
}
}
fn from_autojump<P: AsRef<Path>>(db: &mut Database, path: P) -> Result<()> {
let path = path.as_ref();
let buffer = fs::read_to_string(path)
.with_context(|| format!("could not open autojump database: {}", path.display()))?;
let mut dirs =
db.dirs.iter().map(|dir| (dir.path.as_ref(), dir.clone())).collect::<HashMap<_, _>>();
fn from_autojump<'a>(db: &mut Database<'a>, buffer: &'a str) -> Result<()> {
for line in buffer.lines() {
if line.is_empty() {
continue;
@ -45,31 +41,22 @@ fn from_autojump<P: AsRef<Path>>(db: &mut Database, path: P) -> Result<()> {
// Normalize the rank using a sigmoid function. Don't import actual
// ranks from autojump, since its scoring algorithm is very different,
// and might take a while to get normalized.
rank = 1.0 / (1.0 + (-rank).exp());
rank = sigmoid(rank);
let path = split.next().with_context(|| format!("invalid entry: {}", line))?;
dirs.entry(path).and_modify(|dir| dir.rank += rank).or_insert_with(|| Dir {
path: path.to_string().into(),
rank,
last_accessed: 0,
});
db.dirs.push(Dir { path: path.into(), rank, last_accessed: 0 });
db.modified = true;
}
db.dirs = DirList(dirs.into_iter().map(|(_, dir)| dir).collect());
db.modified = true;
if db.modified {
db.dedup();
}
Ok(())
}
fn from_z<P: AsRef<Path>>(db: &mut Database, path: P) -> Result<()> {
let path = path.as_ref();
let buffer = fs::read_to_string(path)
.with_context(|| format!("could not open z database: {}", path.display()))?;
let mut dirs =
db.dirs.iter().map(|dir| (dir.path.as_ref(), dir.clone())).collect::<HashMap<_, _>>();
fn from_z<'a>(db: &mut Database<'a>, buffer: &'a str) -> Result<()> {
for line in buffer.lines() {
if line.is_empty() {
continue;
@ -85,18 +72,104 @@ fn from_z<P: AsRef<Path>>(db: &mut Database, path: P) -> Result<()> {
let path = split.next().with_context(|| format!("invalid entry: {}", line))?;
dirs.entry(path)
.and_modify(|dir| {
dir.rank += rank;
if last_accessed > dir.last_accessed {
dir.last_accessed = last_accessed;
}
})
.or_insert(Dir { path: path.to_string().into(), rank, last_accessed });
db.dirs.push(Dir { path: path.into(), rank, last_accessed });
db.modified = true;
}
db.dirs = DirList(dirs.into_iter().map(|(_, dir)| dir).collect());
db.modified = true;
if db.modified {
db.dedup();
}
Ok(())
}
fn sigmoid(x: f64) -> f64 {
1.0 / (1.0 + (-x).exp())
}
#[cfg(test)]
mod tests {
use super::sigmoid;
use crate::db::{Database, Dir};
#[test]
fn from_autojump() {
let buffer = r#"
7.0 /baz
2.0 /foo/bar
5.0 /quux/quuz
"#;
let dirs = vec![
Dir { path: "/quux/quuz".into(), rank: 1.0, last_accessed: 100 },
Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 },
Dir { path: "/waldo/fred/plugh".into(), rank: 3.0, last_accessed: 300 },
Dir { path: "/xyzzy/thud".into(), rank: 8.0, last_accessed: 800 },
Dir { path: "/foo/bar".into(), rank: 9.0, last_accessed: 900 },
];
let data_dir = tempfile::tempdir().unwrap();
let data_dir = &data_dir.path().to_path_buf();
let mut db = Database { dirs: dirs.into(), modified: false, data_dir };
super::from_autojump(&mut db, buffer).unwrap();
db.dirs.sort_by(|dir1, dir2| dir1.path.cmp(&dir2.path));
println!("got: {:?}", &db.dirs.as_slice());
let exp = &[
Dir { path: "/baz".into(), rank: sigmoid(7.0), last_accessed: 0 },
Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 },
Dir { path: "/foo/bar".into(), rank: 9.0 + sigmoid(2.0), last_accessed: 900 },
Dir { path: "/quux/quuz".into(), rank: 1.0 + sigmoid(5.0), last_accessed: 100 },
Dir { path: "/waldo/fred/plugh".into(), rank: 3.0, last_accessed: 300 },
Dir { path: "/xyzzy/thud".into(), rank: 8.0, last_accessed: 800 },
];
println!("exp: {:?}", &exp);
for (dir1, dir2) in db.dirs.iter().zip(exp) {
assert_eq!(dir1.path, dir2.path);
assert!((dir1.rank - dir2.rank).abs() < 0.01);
assert_eq!(dir1.last_accessed, dir2.last_accessed);
}
}
#[test]
fn from_z() {
let buffer = r#"
/baz|7|700
/quux/quuz|4|400
/foo/bar|2|200
/quux/quuz|5|500
"#;
let dirs = vec![
Dir { path: "/quux/quuz".into(), rank: 1.0, last_accessed: 100 },
Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 },
Dir { path: "/waldo/fred/plugh".into(), rank: 3.0, last_accessed: 300 },
Dir { path: "/xyzzy/thud".into(), rank: 8.0, last_accessed: 800 },
Dir { path: "/foo/bar".into(), rank: 9.0, last_accessed: 900 },
];
let data_dir = tempfile::tempdir().unwrap();
let data_dir = &data_dir.path().to_path_buf();
let mut db = Database { dirs: dirs.into(), modified: false, data_dir };
super::from_z(&mut db, buffer).unwrap();
db.dirs.sort_by(|dir1, dir2| dir1.path.cmp(&dir2.path));
println!("got: {:?}", &db.dirs.as_slice());
let exp = &[
Dir { path: "/baz".into(), rank: 7.0, last_accessed: 700 },
Dir { path: "/corge/grault/garply".into(), rank: 6.0, last_accessed: 600 },
Dir { path: "/foo/bar".into(), rank: 11.0, last_accessed: 900 },
Dir { path: "/quux/quuz".into(), rank: 10.0, last_accessed: 500 },
Dir { path: "/waldo/fred/plugh".into(), rank: 3.0, last_accessed: 300 },
Dir { path: "/xyzzy/thud".into(), rank: 8.0, last_accessed: 800 },
];
println!("exp: {:?}", &exp);
for (dir1, dir2) in db.dirs.iter().zip(exp) {
assert_eq!(dir1.path, dir2.path);
assert!((dir1.rank - dir2.rank).abs() < 0.01);
assert_eq!(dir1.last_accessed, dir2.last_accessed);
}
}
}

View File

@ -11,10 +11,11 @@ use std::fs;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
#[derive(Debug)]
pub struct Database<'file> {
pub dirs: DirList<'file>,
pub modified: bool,
data_dir: &'file PathBuf,
pub data_dir: &'file PathBuf,
}
impl<'file> Database<'file> {
@ -61,6 +62,33 @@ impl<'file> Database<'file> {
self.modified = true;
}
pub fn dedup(&mut self) {
// Sort by path, so that equal paths are next to each other.
self.dirs.sort_by(|dir1, dir2| dir1.path.cmp(&dir2.path));
for idx in (1..self.dirs.len()).rev() {
// Check if curr_dir and next_dir have equal paths.
let curr_dir = &self.dirs[idx];
let next_dir = &self.dirs[idx - 1];
if next_dir.path != curr_dir.path {
continue;
}
// Merge curr_dir's rank and last_accessed into next_dir.
let rank = curr_dir.rank;
let last_accessed = curr_dir.last_accessed;
let next_dir = &mut self.dirs[idx - 1];
if next_dir.last_accessed < last_accessed {
next_dir.last_accessed = last_accessed;
}
next_dir.rank += rank;
// Delete curr_dir.
self.dirs.swap_remove(idx);
self.modified = true;
}
}
// Streaming iterator for directories.
pub fn stream(&mut self, now: Epoch) -> Stream<'_, 'file> {
Stream::new(self, now)