From 4e49b91d2360c23a8ac959222f2d3e87409d8faa Mon Sep 17 00:00:00 2001 From: Ben S Date: Wed, 2 Sep 2015 23:19:10 +0100 Subject: [PATCH] Parallelise the details view! This commit removes the threadpool in `main.rs` that stats each command-line argument separately, and replaces it with a *scoped* threadpool in `options/details.rs` that builds the table in parallel! Running this on my machine halves the execution time when tree-ing my entire home directory (which isn't exactly a common occurrence, but it's the only way to give exa a large running time) The statting will be added back in parallel at a later stage. This was facilitated by the previous changes to recursion that made it easier to deal with. There's a lot of large sweeping architectural changes. Here's a smattering of them: - In `main.rs`, the files are now passed around as vectors of files rather than array slices of files. This is because `File`s aren't `Clone`, and the `Vec` is necessary to give away ownership of the files at the appropriate point. - In the details view, files are now sorted *all* the time, rather than obeying the command-line order. As they're run in parallel, they have no guaranteed order anyway, so we *have* to sort them again. (I'm not sure if this should be the intended behaviour or not!) This means that the `Details` struct has to have the filter *all* the time, not only while recursing, so it's been moved out of the `recurse` field. - We use `scoped_threadpool` over `threadpool`, a recent addition. It's only safely used on Nightly, which we're using anyway, so that's OK! - Removed a bunch of out-of-date comments. This also fixes #77, mainly by accident :) --- Cargo.lock | 28 ++++-- Cargo.toml | 2 +- src/dir.rs | 2 +- src/file.rs | 4 +- src/main.rs | 193 ++++++++++++++++-------------------------- src/options.rs | 67 ++++++++------- src/output/details.rs | 173 ++++++++++++++++++++++--------------- 7 files changed, 242 insertions(+), 227 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9bbcfec..84c0542 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,8 +13,8 @@ dependencies = [ "num_cpus 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "number_prefix 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", "pad 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "scoped_threadpool 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "term_grid 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "threadpool 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "users 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -278,6 +278,27 @@ name = "rustc-serialize" version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "rustc_version" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "semver 0.1.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "scoped_threadpool" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rustc_version 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "semver" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "tempdir" version = "0.3.4" @@ -294,11 +315,6 @@ dependencies = [ "unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "threadpool" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "tz" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 71dbac9..64df420 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,8 @@ natord = "1.0.7" num_cpus = "*" number_prefix = "0.2.3" pad = "0.1.1" +scoped_threadpool = "*" term_grid = "*" -threadpool = "*" unicode-width = "*" users = "0.4.0" diff --git a/src/dir.rs b/src/dir.rs index 414ebd3..d7adb09 100644 --- a/src/dir.rs +++ b/src/dir.rs @@ -15,7 +15,7 @@ use file::{File, fields}; /// accordingly. (See `File#get_source_files`) pub struct Dir { contents: Vec, - path: PathBuf, + pub path: PathBuf, git: Option, } diff --git a/src/file.rs b/src/file.rs index be8f960..89c8a13 100644 --- a/src/file.rs +++ b/src/file.rs @@ -83,8 +83,8 @@ impl<'dir> File<'dir> { self.metadata.is_dir() } - pub fn to_dir(&self) -> io::Result { - Dir::readdir(&*self.path, false) + pub fn to_dir(&self, scan_for_git: bool) -> io::Result { + Dir::readdir(&*self.path, scan_for_git) } /// Whether this file is a regular file on the filesystem - that is, not a diff --git a/src/main.rs b/src/main.rs index e64f3fb..fd9e527 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,8 +11,8 @@ extern crate natord; extern crate num_cpus; extern crate number_prefix; extern crate pad; +extern crate scoped_threadpool; extern crate term_grid; -extern crate threadpool; extern crate unicode_width; extern crate users; @@ -21,12 +21,8 @@ extern crate git2; use std::env; -use std::fs; -use std::path::{Component, Path, PathBuf}; +use std::path::{Component, Path}; use std::process; -use std::sync::mpsc::channel; - -use threadpool::ThreadPool; use dir::Dir; use file::File; @@ -44,94 +40,48 @@ mod term; #[cfg(not(test))] -struct Exa<'dir> { - count: usize, +struct Exa { options: Options, - dirs: Vec, - files: Vec>, } #[cfg(not(test))] -impl<'dir> Exa<'dir> { - fn new(options: Options) -> Exa<'dir> { - Exa { - count: 0, - options: options, - dirs: Vec::new(), - files: Vec::new(), - } +impl Exa { + fn new(options: Options) -> Exa { + Exa { options: options } } - fn load(&mut self, files: &[String]) { + fn run(&mut self, args_file_names: &[String]) { + let mut files = Vec::new(); + let mut dirs = Vec::new(); - // Separate the user-supplied paths into directories and files. - // Files are shown first, and then each directory is expanded - // and listed second. - let is_tree = self.options.dir_action.is_tree() || self.options.dir_action.is_as_file(); - let total_files = files.len(); - - - // Communication between consumer thread and producer threads - enum StatResult<'dir> { - File(File<'dir>), - Dir(PathBuf), - Error - } - - let pool = ThreadPool::new(8 * num_cpus::get()); - let (tx, rx) = channel(); - - for file in files.iter() { - let tx = tx.clone(); - let file = file.clone(); - - // Spawn producer thread - pool.execute(move || { - let path = Path::new(&*file); - let _ = tx.send(match fs::metadata(&path) { - Ok(metadata) => { - if is_tree || !metadata.is_dir() { - StatResult::File(File::with_metadata(metadata, &path, None)) - } - else { - StatResult::Dir(path.to_path_buf()) + for file_name in args_file_names.iter() { + match File::from_path(Path::new(&file_name), None) { + Err(e) => { + println!("{}: {}", file_name, e); + }, + Ok(f) => { + if f.is_directory() && !self.options.dir_action.treat_dirs_as_files() { + match f.to_dir(self.options.should_scan_for_git()) { + Ok(d) => dirs.push(d), + Err(e) => println!("{}: {}", file_name, e), } } - Err(e) => { - println!("{}: {}", file, e); - StatResult::Error + else { + files.push(f); } - }); - }); - } - - // Spawn consumer thread - for result in rx.iter().take(total_files) { - match result { - StatResult::File(file) => self.files.push(file), - StatResult::Dir(path) => self.dirs.push(path), - StatResult::Error => () + }, } - self.count += 1; } + + let any_files = files.is_empty(); + self.print_files(None, files); + + let is_only_dir = dirs.len() == 1; + self.print_dirs(dirs, any_files, is_only_dir); } - fn print_files(&self) { - if !self.files.is_empty() { - self.print(None, &self.files[..]); - } - } - - fn print_dirs(&mut self) { - let mut first = self.files.is_empty(); - - // Directories are put on a stack rather than just being iterated through, - // as the vector can change as more directories are added. - loop { - let dir_path = match self.dirs.pop() { - None => break, - Some(f) => f, - }; + fn print_dirs(&self, dir_files: Vec, mut first: bool, is_only_dir: bool) { + for dir in dir_files { // Put a gap between directories, or between the list of files and the // first directory. @@ -142,53 +92,54 @@ impl<'dir> Exa<'dir> { print!("\n"); } - match Dir::readdir(&dir_path, self.options.should_scan_for_git()) { - Ok(ref dir) => { - let mut files = Vec::new(); + if !is_only_dir { + println!("{}:", dir.path.display()); + } - for file in dir.files() { - match file { - Ok(file) => files.push(file), - Err((path, e)) => println!("[{}: {}]", path.display(), e), - } - } - - self.options.transform_files(&mut files); - - // When recursing, add any directories to the dirs stack - // backwards: the *last* element of the stack is used each - // time, so by inserting them backwards, they get displayed in - // the correct sort order. - if let Some(recurse_opts) = self.options.dir_action.recurse_options() { - let depth = dir_path.components().filter(|&c| c != Component::CurDir).count() + 1; - if !recurse_opts.tree && !recurse_opts.is_too_deep(depth) { - for dir in files.iter().filter(|f| f.is_directory()).rev() { - self.dirs.push(dir.path.clone()); - } - } - } - - if self.count > 1 { - println!("{}:", dir_path.display()); - } - self.count += 1; - - self.print(Some(dir), &files[..]); - } - Err(e) => { - println!("{}: {}", dir_path.display(), e); - return; + let mut children = Vec::new(); + for file in dir.files() { + match file { + Ok(file) => children.push(file), + Err((path, e)) => println!("[{}: {}]", path.display(), e), } }; + + self.options.filter_files(&mut children); + self.options.sort_files(&mut children); + + if let Some(recurse_opts) = self.options.dir_action.recurse_options() { + let depth = dir.path.components().filter(|&c| c != Component::CurDir).count() + 1; + if !recurse_opts.tree && !recurse_opts.is_too_deep(depth) { + + let mut child_dirs = Vec::new(); + for child_dir in children.iter().filter(|f| f.is_directory()) { + match child_dir.to_dir(false) { + Ok(d) => child_dirs.push(d), + Err(e) => println!("{}: {}", child_dir.path.display(), e), + } + } + + self.print_files(Some(&dir), children); + + if !child_dirs.is_empty() { + self.print_dirs(child_dirs, false, false); + } + + continue; + } + } + + self.print_files(Some(&dir), children); + } } - fn print(&self, dir: Option<&Dir>, files: &[File]) { + fn print_files(&self, dir: Option<&Dir>, files: Vec) { match self.options.view { - View::Grid(g) => g.view(files), + View::Grid(g) => g.view(&files), View::Details(d) => d.view(dir, files), - View::GridDetails(gd) => gd.view(dir, files), - View::Lines(l) => l.view(files), + View::GridDetails(gd) => gd.view(dir, &files), + View::Lines(l) => l.view(&files), } } } @@ -201,9 +152,7 @@ fn main() { match Options::getopts(&args) { Ok((options, paths)) => { let mut exa = Exa::new(options); - exa.load(&paths); - exa.print_files(); - exa.print_dirs(); + exa.run(&paths); }, Err(e) => { println!("{}", e); diff --git a/src/options.rs b/src/options.rs index d88eee5..dd76218 100644 --- a/src/options.rs +++ b/src/options.rs @@ -107,8 +107,12 @@ impl Options { }, path_strs)) } - pub fn transform_files(&self, files: &mut Vec) { - self.filter.transform_files(files) + pub fn sort_files(&self, files: &mut Vec) { + self.filter.sort_files(files) + } + + pub fn filter_files(&self, files: &mut Vec) { + self.filter.filter_files(files) } /// Whether the View specified in this set of options includes a Git @@ -124,7 +128,7 @@ impl Options { } -#[derive(PartialEq, Debug, Copy, Clone)] +#[derive(Default, PartialEq, Debug, Copy, Clone)] pub struct FileFilter { list_dirs_first: bool, reverse: bool, @@ -133,26 +137,14 @@ pub struct FileFilter { } impl FileFilter { - /// Transform the files (sorting, reversing, filtering) before listing them. - pub fn transform_files(&self, files: &mut Vec) { - + pub fn filter_files(&self, files: &mut Vec) { if !self.show_invisibles { files.retain(|f| !f.is_dotfile()); } + } - match self.sort_field { - SortField::Unsorted => {}, - SortField::Name => files.sort_by(|a, b| natord::compare(&*a.name, &*b.name)), - SortField::Size => files.sort_by(|a, b| a.metadata.len().cmp(&b.metadata.len())), - SortField::FileInode => files.sort_by(|a, b| a.metadata.ino().cmp(&b.metadata.ino())), - SortField::ModifiedDate => files.sort_by(|a, b| a.metadata.mtime().cmp(&b.metadata.mtime())), - SortField::AccessedDate => files.sort_by(|a, b| a.metadata.atime().cmp(&b.metadata.atime())), - SortField::CreatedDate => files.sort_by(|a, b| a.metadata.ctime().cmp(&b.metadata.ctime())), - SortField::Extension => files.sort_by(|a, b| match a.ext.cmp(&b.ext) { - cmp::Ordering::Equal => natord::compare(&*a.name, &*b.name), - order => order, - }), - } + pub fn sort_files(&self, files: &mut Vec) { + files.sort_by(|a, b| self.compare_files(a, b)); if self.reverse { files.reverse(); @@ -163,6 +155,22 @@ impl FileFilter { files.sort_by(|a, b| b.is_directory().cmp(&a.is_directory())); } } + + pub fn compare_files(&self, a: &File, b: &File) -> cmp::Ordering { + match self.sort_field { + SortField::Unsorted => cmp::Ordering::Equal, + SortField::Name => natord::compare(&*a.name, &*b.name), + SortField::Size => a.metadata.len().cmp(&b.metadata.len()), + SortField::FileInode => a.metadata.ino().cmp(&b.metadata.ino()), + SortField::ModifiedDate => a.metadata.mtime().cmp(&b.metadata.mtime()), + SortField::AccessedDate => a.metadata.atime().cmp(&b.metadata.atime()), + SortField::CreatedDate => a.metadata.ctime().cmp(&b.metadata.ctime()), + SortField::Extension => match a.ext.cmp(&b.ext) { + cmp::Ordering::Equal => natord::compare(&*a.name, &*b.name), + order => order, + }, + } + } } /// User-supplied field to sort by. @@ -280,7 +288,8 @@ impl View { let details = Details { columns: Some(try!(Columns::deduce(matches))), header: matches.opt_present("header"), - recurse: dir_action.recurse_options().map(|o| (o, filter)), + recurse: dir_action.recurse_options(), + filter: filter, xattr: xattr::ENABLED && matches.opt_present("extended"), colours: if dimensions().is_some() { Colours::colourful() } else { Colours::plain() }, }; @@ -328,7 +337,8 @@ impl View { let details = Details { columns: None, header: false, - recurse: dir_action.recurse_options().map(|o| (o, filter)), + recurse: dir_action.recurse_options(), + filter: filter, xattr: false, colours: if dimensions().is_some() { Colours::colourful() } else { Colours::plain() }, }; @@ -406,6 +416,7 @@ impl SizeFormat { } } + #[derive(PartialEq, Debug, Copy, Clone)] pub enum TimeType { FileAccessed, @@ -423,6 +434,7 @@ impl TimeType { } } + #[derive(PartialEq, Debug, Copy, Clone)] pub struct TimeTypes { accessed: bool, @@ -479,6 +491,7 @@ impl TimeTypes { } } + /// What to do when encountering a directory? #[derive(PartialEq, Debug, Copy, Clone)] pub enum DirAction { @@ -510,21 +523,16 @@ impl DirAction { } } - pub fn is_as_file(&self) -> bool { + pub fn treat_dirs_as_files(&self) -> bool { match *self { DirAction::AsFile => true, + DirAction::Recurse(RecurseOptions { tree, .. }) => tree, _ => false, } } - - pub fn is_tree(&self) -> bool { - match *self { - DirAction::Recurse(RecurseOptions { tree, .. }) => tree, - _ => false, - } - } } + #[derive(PartialEq, Debug, Copy, Clone)] pub struct RecurseOptions { pub tree: bool, @@ -559,6 +567,7 @@ impl RecurseOptions { } } + #[derive(PartialEq, Copy, Clone, Debug, Default)] pub struct Columns { size_format: SizeFormat, diff --git a/src/output/details.rs b/src/output/details.rs index 3052448..0757f81 100644 --- a/src/output/details.rs +++ b/src/output/details.rs @@ -49,7 +49,10 @@ pub struct Details { /// Whether to recurse through directories with a tree view, and if so, /// which options to use. This field is only relevant here if the `tree` /// field of the RecurseOptions is `true`. - pub recurse: Option<(RecurseOptions, FileFilter)>, + pub recurse: Option, + + /// How to sort and filter the files after getting their details. + pub filter: FileFilter, /// Whether to show a header line or not. pub header: bool, @@ -63,7 +66,7 @@ pub struct Details { } impl Details { - pub fn view(&self, dir: Option<&Dir>, files: &[File]) { + pub fn view(&self, dir: Option<&Dir>, files: Vec) { // First, transform the Columns object into a vector of columns for // the current directory. @@ -84,76 +87,121 @@ impl Details { /// Adds files to the table - recursively, if the `recurse` option /// is present. - fn add_files_to_table(&self, table: &mut Table, src: &[File], depth: usize) { - for (index, file) in src.iter().enumerate() { - let mut xattrs = Vec::new(); - let mut errors = Vec::new(); + fn add_files_to_table<'dir, U: Users+Send+Sync>(&self, mut table: &mut Table, src: Vec>, depth: usize) { + use num_cpus; + use scoped_threadpool::Pool; + use std::sync::{Arc, Mutex}; - let has_xattrs = match file.path.attributes() { - Ok(xs) => { - let r = !xs.is_empty(); - if self.xattr { - for xattr in xs { - xattrs.push(xattr); - } - } - r - }, - Err(e) => { - if self.xattr { - errors.push((e, None)); - } - true - }, - }; + let mut pool = Pool::new(num_cpus::get() as u32); + let mut file_eggs = Vec::new(); - table.add_file(file, depth, index == src.len() - 1, true, has_xattrs); + struct Egg<'_> { + cells: Vec, + name: Cell, + xattrs: Vec, + errors: Vec<(io::Error, Option)>, + dir: Option, + file: Arc>, + } - // There are two types of recursion that exa supports: a tree - // view, which is dealt with here, and multiple listings, which is - // dealt with in the main module. So only actually recurse if we - // are in tree mode - the other case will be dealt with elsewhere. - if let Some((r, filter)) = self.recurse { - if file.is_directory() && r.tree && !r.is_too_deep(depth) { + pool.scoped(|scoped| { + let file_eggs = Arc::new(Mutex::new(&mut file_eggs)); + let table = Arc::new(Mutex::new(&mut table)); - // Use the filter to remove unwanted files *before* expanding - // them, so we don't examine any directories that wouldn't - // have their contents listed anyway. - match file.to_dir() { - Ok(ref dir) => { - let mut files = Vec::new(); + for file in src.into_iter() { + let file: Arc = Arc::new(file); + let file_eggs = file_eggs.clone(); + let table = table.clone(); - for file_to_add in dir.files() { - match file_to_add { - Ok(f) => files.push(f), - Err((path, e)) => errors.push((e, Some(path))) + scoped.execute(move || { + let mut errors = Vec::new(); + + let mut xattrs = Vec::new(); + match file.path.attributes() { + Ok(xs) => { + if self.xattr { + for xattr in xs { + xattrs.push(xattr); } } - - filter.transform_files(&mut files); - - if !files.is_empty() { - for xattr in xattrs { - table.add_xattr(xattr, depth + 1, false); - } - - for (error, path) in errors { - table.add_error(&error, depth + 1, false, path); - } - - self.add_files_to_table(table, &files, depth + 1); - continue; - } }, Err(e) => { - errors.push((e, None)); + if self.xattr { + errors.push((e, None)); + } }, + }; + + let cells = table.lock().unwrap().cells_for_file(&file, !xattrs.is_empty()); + let links = true; + let name = Cell { text: filename(&file, &self.colours, links), length: file.file_name_width() }; + + let mut dir = None; + + if let Some(r) = self.recurse { + if file.is_directory() && r.tree && !r.is_too_deep(depth) { + if let Ok(d) = file.to_dir(false) { + dir = Some(d); + } + } + }; + + let egg = Egg { + cells: cells, + name: name, + xattrs: xattrs, + errors: errors, + dir: dir, + file: file, + }; + + file_eggs.lock().unwrap().push(egg); + }); + } + }); + + file_eggs.sort_by(|a, b| self.filter.compare_files(&*a.file, &*b.file)); + + let num_eggs = file_eggs.len(); + for (index, egg) in file_eggs.into_iter().enumerate() { + let mut files = Vec::new(); + let mut errors = egg.errors; + + let row = Row { + depth: depth, + cells: Some(egg.cells), + name: egg.name, + last: index == num_eggs - 1, + }; + + table.rows.push(row); + + if let Some(ref dir) = egg.dir { + for file_to_add in dir.files() { + match file_to_add { + Ok(f) => files.push(f), + Err((path, e)) => errors.push((e, Some(path))) } } + + self.filter.filter_files(&mut files); + + if !files.is_empty() { + for xattr in egg.xattrs { + table.add_xattr(xattr, depth + 1, false); + } + + for (error, path) in errors { + table.add_error(&error, depth + 1, false, path); + } + + self.add_files_to_table(table, files, depth + 1); + continue; + } } - let count = xattrs.len(); - for (index, xattr) in xattrs.into_iter().enumerate() { + let count = egg.xattrs.len(); + for (index, xattr) in egg.xattrs.into_iter().enumerate() { table.add_xattr(xattr, depth + 1, errors.is_empty() && index == count - 1); } @@ -161,7 +209,6 @@ impl Details { for (index, (error, path)) in errors.into_iter().enumerate() { table.add_error(&error, depth + 1, index == count - 1, path); } - } } } @@ -289,19 +336,13 @@ impl Table where U: Users { let row = Row { depth: depth, cells: None, - name: Cell::paint(self.colours.perms.attribute, &format!("{}\t{}", xattr.name, xattr.size)), + name: Cell::paint(self.colours.perms.attribute, &format!("{} (len {})", xattr.name, xattr.size)), last: last, }; self.rows.push(row); } - /// Get the cells for the given file, and add the result to the table. - fn add_file(&mut self, file: &File, depth: usize, last: bool, links: bool, xattrs: bool) { - let cells = self.cells_for_file(file, xattrs); - self.add_file_with_cells(cells, file, depth, last, links) - } - pub fn add_file_with_cells(&mut self, cells: Vec, file: &File, depth: usize, last: bool, links: bool) { let row = Row { depth: depth,