Optimize query algorithm

This commit is contained in:
Ajeet D'Souza 2020-05-15 19:40:54 +05:30
parent 08cf7dfa86
commit efd615b910
5 changed files with 46 additions and 42 deletions

7
Cargo.lock generated
View File

@ -141,6 +141,11 @@ name = "dunce"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "float-ord"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "getrandom"
version = "0.1.14"
@ -438,6 +443,7 @@ dependencies = [
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -463,6 +469,7 @@ dependencies = [
"checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
"checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b"
"checksum dunce 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0ad6bf6a88548d1126045c413548df1453d9be094a8ab9fd59bf1fdd338da4f"
"checksum float-ord 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bad48618fdb549078c333a7a8528acb57af271d0433bdecd523eb620628364e"
"checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
"checksum hermit-abi 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"

View File

@ -18,6 +18,7 @@ bincode = "1.2.1"
clap = "2.33.0"
dirs = "2.0.2"
dunce = "1.0.0"
float-ord = "0.2.0"
serde = { version = "1.0.106", features = ["derive"] }
structopt = "0.3.12"
uuid = { version = "0.8.1", features = ["v4"] }

View File

@ -4,7 +4,6 @@ use anyhow::{bail, Context, Result};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use std::cmp::Ordering;
use std::fs::{self, File, OpenOptions};
use std::io::{self, BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
@ -15,9 +14,9 @@ pub use i32 as DBVersion;
struct DbVersion(u32);
pub struct Db {
pub dirs: Vec<Dir>,
pub modified: bool,
data_dir: PathBuf,
dirs: Vec<Dir>,
modified: bool,
}
impl Db {
@ -34,9 +33,9 @@ impl Db {
Ok(buffer) => buffer,
Err(e) if e.kind() == io::ErrorKind::NotFound => {
return Ok(Db {
data_dir,
modified: false,
dirs: Vec::new(),
modified: false,
data_dir,
})
}
Err(e) => {
@ -48,9 +47,9 @@ impl Db {
if buffer.is_empty() {
return Ok(Db {
data_dir,
modified: false,
dirs: Vec::new(),
modified: false,
data_dir,
});
}
@ -87,9 +86,9 @@ impl Db {
};
Ok(Db {
data_dir,
modified: false,
dirs,
modified: false,
data_dir,
})
}
@ -271,27 +270,6 @@ impl Db {
Ok(())
}
pub fn query(&mut self, keywords: &[String], now: Epoch) -> Option<&Dir> {
let (idx, dir, _) = self
.dirs
.iter()
.enumerate()
.filter(|(_, dir)| dir.is_match(&keywords))
.map(|(idx, dir)| (idx, dir, dir.get_frecency(now)))
.max_by(|(_, _, frecency1), (_, _, frecency2)| {
frecency1.partial_cmp(frecency2).unwrap_or(Ordering::Equal)
})?;
if dir.is_dir() {
// FIXME: change this to Some(dir) once the MIR borrow checker comes to stable Rust
Some(&self.dirs[idx])
} else {
self.dirs.swap_remove(idx);
self.modified = true;
self.query(keywords, now)
}
}
pub fn query_many<'a>(&'a mut self, keywords: &'a [String]) -> impl Iterator<Item = &'a Dir> {
self.query_all()
.iter()
@ -300,7 +278,7 @@ impl Db {
pub fn query_all(&mut self) -> &[Dir] {
let orig_len = self.dirs.len();
self.dirs.retain(Dir::is_dir);
self.dirs.retain(Dir::is_valid);
if orig_len != self.dirs.len() {
self.modified = true;

View File

@ -13,8 +13,8 @@ pub struct Dir {
}
impl Dir {
pub fn is_dir(&self) -> bool {
self.path.is_dir()
pub fn is_valid(&self) -> bool {
self.rank.is_finite() && self.rank >= 1.0 && self.path.is_dir()
}
#[cfg(unix)]

View File

@ -1,6 +1,8 @@
use crate::db::Db;
use crate::util;
use anyhow::{bail, Result};
use float_ord::FloatOrd;
use structopt::StructOpt;
use std::io::{self, Write};
@ -19,7 +21,8 @@ impl Query {
let path_opt = if self.interactive {
self.query_interactive()?
} else {
self.query()?
let mut db = util::get_db()?;
self.query(&mut db)?
};
match path_opt {
@ -35,7 +38,8 @@ impl Query {
Ok(())
}
fn query(&self) -> Result<Option<Vec<u8>>> {
fn query(&self, db: &mut Db) -> Result<Option<Vec<u8>>> {
// if the input is already a valid path, simply return it
if let [path] = self.keywords.as_slice() {
if Path::new(path).is_dir() {
return Ok(Some(path.as_bytes().to_vec()));
@ -50,14 +54,28 @@ impl Query {
.map(|keyword| keyword.to_lowercase())
.collect::<Vec<_>>();
let path_opt = util::get_db()?.query(&keywords, now).map(|dir| {
// `path_to_bytes` is guaranteed to succeed here since
// the path has already been queried successfully
let path_bytes = util::path_to_bytes(&dir.path).unwrap();
path_bytes.to_vec()
});
db.dirs
.sort_unstable_by_key(|dir| FloatOrd(dir.get_frecency(now)));
Ok(path_opt)
// Iterating in reverse order ensures that the directory indices do not
// change as we remove them.
for idx in (0..db.dirs.len()).rev() {
let dir = &db.dirs[idx];
if !dir.is_match(&keywords) {
continue;
}
if !dir.is_valid() {
db.dirs.swap_remove(idx);
db.modified = true;
continue;
}
let path = util::path_to_bytes(&dir.path)?.to_vec();
return Ok(Some(path));
}
Ok(None)
}
fn query_interactive(&self) -> Result<Option<Vec<u8>>> {