Give the filter modules some love

This commit is contained in:
Benjamin Sago 2017-08-12 10:09:33 +01:00
parent a7d8e1c84b
commit 673e894d25
2 changed files with 184 additions and 104 deletions

View File

@ -1,3 +1,5 @@
//! Filtering and sorting the list of files before displaying them.
use std::cmp::Ordering;
use std::iter::FromIterator;
use std::os::unix::fs::MetadataExt;
@ -9,9 +11,20 @@ use fs::File;
use fs::DotFilter;
/// The **file filter** processes a vector of files before outputting them,
/// filtering and sorting the files depending on the users command-line
/// flags.
/// The **file filter** processes a list of files before displaying them to
/// the user, by removing files they dont want to see, and putting the list
/// in the desired order.
///
/// Usually a user does not want to see *every* file in the list. The most
/// common case is to remove files starting with `.`, which are designated
/// as hidden files.
///
/// The special files `.` and `..` files are not actually filtered out, but
/// need to be inserted into the list, in a special case.
///
/// The filter also governs sorting the list. After being filtered, pairs of
/// files are compared and sorted based on the result, with the sort field
/// performing the comparison.
#[derive(PartialEq, Debug, Clone)]
pub struct FileFilter {
@ -64,80 +77,47 @@ pub struct FileFilter {
pub dot_filter: DotFilter,
/// Glob patterns to ignore. Any file name that matches *any* of these
/// patterns won't be displayed in the list.
/// patterns wont be displayed in the list.
pub ignore_patterns: IgnorePatterns,
}
impl FileFilter {
/// Remove every file in the given vector that does *not* pass the
/// filter predicate for files found inside a directory.
pub fn filter_child_files(&self, files: &mut Vec<File>) {
files.retain(|f| !self.ignore_patterns.is_ignored(f));
}
/// Remove every file in the given vector that does *not* pass the
/// filter predicate for files found inside a directory.
pub fn filter_child_files(&self, files: &mut Vec<File>) {
files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
}
/// Remove every file in the given vector that does *not* pass the
/// filter predicate for file names specified on the command-line.
///
/// The rules are different for these types of files than the other
/// type because the ignore rules can be used with globbing. For
/// example, running "exa -I='*.tmp' .vimrc" shouldn't filter out the
/// dotfile, because it's been directly specified. But running
/// "exa -I='*.ogg' music/*" should filter out the ogg files obtained
/// from the glob, even though the globbing is done by the shell!
pub fn filter_argument_files(&self, files: &mut Vec<File>) {
files.retain(|f| !self.ignore_patterns.is_ignored(f));
}
/// Remove every file in the given vector that does *not* pass the
/// filter predicate for file names specified on the command-line.
///
/// The rules are different for these types of files than the other
/// type because the ignore rules can be used with globbing. For
/// example, running `exa -I='*.tmp' .vimrc` shouldnt filter out the
/// dotfile, because its been directly specified. But running
/// `exa -I='*.ogg' music/*` should filter out the ogg files obtained
/// from the glob, even though the globbing is done by the shell!
pub fn filter_argument_files(&self, files: &mut Vec<File>) {
files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
}
/// Sort the files in the given vector based on the sort field option.
pub fn sort_files<'a, F>(&self, files: &mut Vec<F>)
where F: AsRef<File<'a>> {
/// Sort the files in the given vector based on the sort field option.
pub fn sort_files<'a, F>(&self, files: &mut Vec<F>)
where F: AsRef<File<'a>> {
files.sort_by(|a, b| self.compare_files(a.as_ref(), b.as_ref()));
files.sort_by(|a, b| self.sort_field.compare_files(a.as_ref(), b.as_ref()));
if self.reverse {
files.reverse();
}
if self.reverse {
files.reverse();
}
if self.list_dirs_first {
// This relies on the fact that `sort_by` is stable.
files.sort_by(|a, b| b.as_ref().is_directory().cmp(&a.as_ref().is_directory()));
}
}
/// Compares two files to determine the order they should be listed in,
/// depending on the search field.
pub fn compare_files(&self, a: &File, b: &File) -> Ordering {
use self::SortCase::{Sensitive, Insensitive};
match self.sort_field {
SortField::Unsorted => Ordering::Equal,
SortField::Name(Sensitive) => natord::compare(&a.name, &b.name),
SortField::Name(Insensitive) => natord::compare_ignore_case(&a.name, &b.name),
SortField::Size => a.metadata.len().cmp(&b.metadata.len()),
SortField::FileInode => a.metadata.ino().cmp(&b.metadata.ino()),
SortField::ModifiedDate => a.metadata.mtime().cmp(&b.metadata.mtime()),
SortField::AccessedDate => a.metadata.atime().cmp(&b.metadata.atime()),
SortField::CreatedDate => a.metadata.ctime().cmp(&b.metadata.ctime()),
SortField::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes
Ordering::Equal => natord::compare(&*a.name, &*b.name),
order => order,
},
SortField::Extension(Sensitive) => match a.ext.cmp(&b.ext) {
Ordering::Equal => natord::compare(&*a.name, &*b.name),
order => order,
},
SortField::Extension(Insensitive) => match a.ext.cmp(&b.ext) {
Ordering::Equal => natord::compare_ignore_case(&*a.name, &*b.name),
order => order,
},
}
}
if self.list_dirs_first {
// This relies on the fact that `sort_by` is *stable*: it will keep
// adjacent elements next to each other.
files.sort_by(|a, b| b.as_ref().is_directory().cmp(&a.as_ref().is_directory()));
}
}
}
@ -145,44 +125,44 @@ impl FileFilter {
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum SortField {
/// Don't apply any sorting. This is usually used as an optimisation in
/// scripts, where the order doesn't matter.
/// Dont apply any sorting. This is usually used as an optimisation in
/// scripts, where the order doesnt matter.
Unsorted,
/// The file name. This is the default sorting.
Name(SortCase),
/// The file's extension, with extensionless files being listed first.
/// The files extension, with extensionless files being listed first.
Extension(SortCase),
/// The file's size.
/// The files size, in bytes.
Size,
/// The file's inode. This is sometimes analogous to the order in which
/// the files were created on the hard drive.
/// The files inode, which usually corresponds to the order in which
/// files were created on the filesystem, more or less.
FileInode,
/// The time at which this file was modified (the `mtime`).
/// The time this file was modified (the “mtime”).
///
/// As this is stored as a Unix timestamp, rather than a local time
/// instance, the time zone does not matter and will only be used to
/// display the timestamps, not compare them.
ModifiedDate,
/// The time at this file was accessed (the `atime`).
/// The time file was accessed (the “atime”).
///
/// Oddly enough, this field rarely holds the *actual* accessed time.
/// Recording a read time means writing to the file each time its read
/// slows the whole operation down, so many systems will only update the
/// timestamp in certain circumstances. This has become common enough that
/// its now expected behaviour for the `atime` field.
/// its now expected behaviour!
/// http://unix.stackexchange.com/a/8842
AccessedDate,
/// The time at which this file was changed or created (the `ctime`).
/// The time this file was changed or created (the “ctime”).
///
/// Contrary to the name, this field is used to mark the time when a
/// file's metadata changed -- its permissions, owners, or link count.
/// files metadata changed -- its permissions, owners, or link count.
///
/// In original Unix, this was, however, meant as creation time.
/// https://www.bell-labs.com/usr/dmr/www/cacm.html
@ -196,7 +176,6 @@ pub enum SortField {
}
/// Whether a field should be sorted case-sensitively or case-insensitively.
///
/// This determines which of the `natord` functions to use.
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum SortCase {
@ -209,6 +188,49 @@ pub enum SortCase {
Insensitive,
}
impl SortField {
/// Compares two files to determine the order they should be listed in,
/// depending on the search field.
///
/// The `natord` crate is used here to provide a more *natural* sorting
/// order than just sorting character-by-character. This splits filenames
/// into groups between letters and numbers, and then sorts those blocks
/// together, so `file10` will sort after `file9`, instead of before it
/// because of the `1`.
pub fn compare_files(&self, a: &File, b: &File) -> Ordering {
use self::SortCase::{Sensitive, Insensitive};
match *self {
SortField::Unsorted => Ordering::Equal,
SortField::Name(Sensitive) => natord::compare(&a.name, &b.name),
SortField::Name(Insensitive) => natord::compare_ignore_case(&a.name, &b.name),
SortField::Size => a.metadata.len().cmp(&b.metadata.len()),
SortField::FileInode => a.metadata.ino().cmp(&b.metadata.ino()),
SortField::ModifiedDate => a.metadata.mtime().cmp(&b.metadata.mtime()),
SortField::AccessedDate => a.metadata.atime().cmp(&b.metadata.atime()),
SortField::CreatedDate => a.metadata.ctime().cmp(&b.metadata.ctime()),
SortField::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes
Ordering::Equal => natord::compare(&*a.name, &*b.name),
order => order,
},
SortField::Extension(Sensitive) => match a.ext.cmp(&b.ext) {
Ordering::Equal => natord::compare(&*a.name, &*b.name),
order => order,
},
SortField::Extension(Insensitive) => match a.ext.cmp(&b.ext) {
Ordering::Equal => natord::compare_ignore_case(&*a.name, &*b.name),
order => order,
},
}
}
}
/// The **ignore patterns** are a list of globs that are tested against
/// each filename, and if any of them match, that file isnt displayed.
@ -230,7 +252,16 @@ impl IgnorePatterns {
/// are valid glob patterns into an IgnorePatterns. The inputs that dont
/// parse correctly are returned separately.
pub fn parse_from_iter<'a, I: IntoIterator<Item = &'a str>>(iter: I) -> (Self, Vec<glob::PatternError>) {
let mut patterns = Vec::new();
let iter = iter.into_iter();
// Almost all glob patterns are valid, so its worth pre-allocating
// the vector with enough space for all of them.
let mut patterns = match iter.size_hint() {
(_, Some(count)) => Vec::with_capacity(count),
_ => Vec::new(),
};
// Similarly, assume there wont be any errors.
let mut errors = Vec::new();
for input in iter {
@ -243,13 +274,51 @@ impl IgnorePatterns {
(IgnorePatterns { patterns }, errors)
}
/// Create a new empty list that matches nothing.
/// Create a new empty set of patterns that matches nothing.
pub fn empty() -> IgnorePatterns {
IgnorePatterns { patterns: Vec::new() }
}
/// Test whether the given file should be hidden from the results.
fn is_ignored(&self, file: &File) -> bool {
self.patterns.iter().any(|p| p.matches(&file.name))
fn is_ignored(&self, file: &str) -> bool {
self.patterns.iter().any(|p| p.matches(file))
}
}
#[cfg(test)]
mod test_ignores {
use super::*;
#[test]
fn empty_matches_nothing() {
let pats = IgnorePatterns::empty();
assert_eq!(false, pats.is_ignored("nothing"));
assert_eq!(false, pats.is_ignored("test.mp3"));
}
#[test]
fn ignores_a_glob() {
let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "*.mp3" ]);
assert!(fails.is_empty());
assert_eq!(false, pats.is_ignored("nothing"));
assert_eq!(true, pats.is_ignored("test.mp3"));
}
#[test]
fn ignores_an_exact_filename() {
let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing" ]);
assert!(fails.is_empty());
assert_eq!(true, pats.is_ignored("nothing"));
assert_eq!(false, pats.is_ignored("test.mp3"));
}
#[test]
fn ignores_both() {
let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing", "*.mp3" ]);
assert!(fails.is_empty());
assert_eq!(true, pats.is_ignored("nothing"));
assert_eq!(true, pats.is_ignored("test.mp3"));
}
}

View File

@ -1,3 +1,5 @@
//! Parsing the options for `FileFilter`.
use fs::DotFilter;
use fs::filter::{FileFilter, SortField, SortCase, IgnorePatterns};
@ -7,8 +9,7 @@ use options::parser::MatchedFlags;
impl FileFilter {
/// Determines the set of file filter options to use, based on the users
/// command-line arguments.
/// Determines which of all the file filter options to use.
pub fn deduce(matches: &MatchedFlags) -> Result<FileFilter, Misfire> {
Ok(FileFilter {
list_dirs_first: matches.has(&flags::DIRS_FIRST)?,
@ -20,29 +21,23 @@ impl FileFilter {
}
}
impl Default for SortField {
fn default() -> SortField {
SortField::Name(SortCase::Sensitive)
}
}
const SORTS: &[&str] = &[ "name", "Name", "size", "extension",
"Extension", "modified", "accessed",
"created", "inode", "type", "none" ];
impl SortField {
/// Determine the sort field to use, based on the presence of a “sort”
/// argument. This will return `Err` if the option is there, but does not
/// correspond to a valid field.
/// Determines which sort field to use based on the `--sort` argument.
/// This arguments value can be one of several flags, listed above.
/// Returns the default sort field if none is given, or `Err` if the
/// value doesnt correspond to a sort field we know about.
fn deduce(matches: &MatchedFlags) -> Result<SortField, Misfire> {
let word = match matches.get(&flags::SORT)? {
Some(w) => w,
None => return Ok(SortField::default()),
};
// The field is an OsStr, so cant be matched.
if word == "name" || word == "filename" {
Ok(SortField::Name(SortCase::Sensitive))
}
@ -82,8 +77,21 @@ impl SortField {
}
}
impl Default for SortField {
fn default() -> SortField {
SortField::Name(SortCase::Sensitive)
}
}
impl DotFilter {
/// Determines the dot filter based on how many `--all` options were
/// given: one will show dotfiles, but two will show `.` and `..` too.
///
/// It also checks for the `--tree` option in strict mode, because of a
/// special case where `--tree --all --all` won't work: listing the
/// parent directory in tree mode would loop onto itself!
pub fn deduce(matches: &MatchedFlags) -> Result<DotFilter, Misfire> {
let count = matches.count(&flags::ALL);
@ -108,24 +116,27 @@ impl DotFilter {
impl IgnorePatterns {
/// Determines the set of file filter options to use, based on the users
/// command-line arguments.
/// Determines the set of glob patterns to use based on the
/// `--ignore-patterns` arguments value. This is a list of strings
/// separated by pipe (`|`) characters, given in any order.
pub fn deduce(matches: &MatchedFlags) -> Result<IgnorePatterns, Misfire> {
// If there are no inputs, we return a set of patterns that doesnt
// match anything, rather than, say, `None`.
let inputs = match matches.get(&flags::IGNORE_GLOB)? {
None => return Ok(IgnorePatterns::empty()),
Some(is) => is,
};
// Awkwardly, though, a glob pattern can be invalid, and we need to
// deal with invalid patterns somehow.
let (patterns, mut errors) = IgnorePatterns::parse_from_iter(inputs.to_string_lossy().split('|'));
// It can actually return more than one glob error,
// but we only use one.
if let Some(error) = errors.pop() {
return Err(error.into())
}
else {
Ok(patterns)
// but we only use one. (TODO)
match errors.pop() {
Some(e) => Err(e.into()),
None => Ok(patterns),
}
}
}