Give the filter modules some love

2024-11-25 21:37:33 +00:00 · 2017-08-12 10:09:33 +01:00 · 2017-08-12 10:09:33 +01:00 · 673e894d25
commit 673e894d25
parent a7d8e1c84b
2 changed files with 184 additions and 104 deletions
--- a/src/fs/filter.rs
+++ b/src/fs/filter.rs
@ -1,3 +1,5 @@
+//! Filtering and sorting the list of files before displaying them.
+
 use std::cmp::Ordering;
 use std::iter::FromIterator;
 use std::os::unix::fs::MetadataExt;
@ -9,9 +11,20 @@ use fs::File;
 use fs::DotFilter;


-/// The **file filter** processes a vector of files before outputting them,
-/// filtering and sorting the files depending on the user’s command-line
-/// flags.
+/// The **file filter** processes a list of files before displaying them to
+/// the user, by removing files they don’t want to see, and putting the list
+/// in the desired order.
+///
+/// Usually a user does not want to see *every* file in the list. The most
+/// common case is to remove files starting with `.`, which are designated
+/// as ‘hidden’ files.
+///
+/// The special files `.` and `..` files are not actually filtered out, but
+/// need to be inserted into the list, in a special case.
+///
+/// The filter also governs sorting the list. After being filtered, pairs of
+/// files are compared and sorted based on the result, with the sort field
+/// performing the comparison.
 #[derive(PartialEq, Debug, Clone)]
 pub struct FileFilter {

@ -64,80 +77,47 @@ pub struct FileFilter {
    pub dot_filter: DotFilter,

    /// Glob patterns to ignore. Any file name that matches *any* of these
-    /// patterns won't be displayed in the list.
+    /// patterns won’t be displayed in the list.
    pub ignore_patterns: IgnorePatterns,
 }


 impl FileFilter {
-/// Remove every file in the given vector that does *not* pass the
-   /// filter predicate for files found inside a directory.
-   pub fn filter_child_files(&self, files: &mut Vec<File>) {
-       files.retain(|f| !self.ignore_patterns.is_ignored(f));
-   }
+    /// Remove every file in the given vector that does *not* pass the
+    /// filter predicate for files found inside a directory.
+    pub fn filter_child_files(&self, files: &mut Vec<File>) {
+        files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
+    }

-   /// Remove every file in the given vector that does *not* pass the
-   /// filter predicate for file names specified on the command-line.
-   ///
-   /// The rules are different for these types of files than the other
-   /// type because the ignore rules can be used with globbing. For
-   /// example, running "exa -I='*.tmp' .vimrc" shouldn't filter out the
-   /// dotfile, because it's been directly specified. But running
-   /// "exa -I='*.ogg' music/*" should filter out the ogg files obtained
-   /// from the glob, even though the globbing is done by the shell!
-   pub fn filter_argument_files(&self, files: &mut Vec<File>) {
-       files.retain(|f| !self.ignore_patterns.is_ignored(f));
-   }
+    /// Remove every file in the given vector that does *not* pass the
+    /// filter predicate for file names specified on the command-line.
+    ///
+    /// The rules are different for these types of files than the other
+    /// type because the ignore rules can be used with globbing. For
+    /// example, running `exa -I='*.tmp' .vimrc` shouldn’t filter out the
+    /// dotfile, because it’s been directly specified. But running
+    /// `exa -I='*.ogg' music/*` should filter out the ogg files obtained
+    /// from the glob, even though the globbing is done by the shell!
+    pub fn filter_argument_files(&self, files: &mut Vec<File>) {
+        files.retain(|f| !self.ignore_patterns.is_ignored(&f.name));
+    }

-   /// Sort the files in the given vector based on the sort field option.
-   pub fn sort_files<'a, F>(&self, files: &mut Vec<F>)
-   where F: AsRef<File<'a>> {
+    /// Sort the files in the given vector based on the sort field option.
+    pub fn sort_files<'a, F>(&self, files: &mut Vec<F>)
+    where F: AsRef<File<'a>> {

-       files.sort_by(|a, b| self.compare_files(a.as_ref(), b.as_ref()));
+        files.sort_by(|a, b| self.sort_field.compare_files(a.as_ref(), b.as_ref()));

-       if self.reverse {
-           files.reverse();
-       }
+        if self.reverse {
+            files.reverse();
+        }

-       if self.list_dirs_first {
-           // This relies on the fact that `sort_by` is stable.
-           files.sort_by(|a, b| b.as_ref().is_directory().cmp(&a.as_ref().is_directory()));
-       }
-   }
-
-   /// Compares two files to determine the order they should be listed in,
-   /// depending on the search field.
-   pub fn compare_files(&self, a: &File, b: &File) -> Ordering {
-       use self::SortCase::{Sensitive, Insensitive};
-
-       match self.sort_field {
-           SortField::Unsorted  => Ordering::Equal,
-
-           SortField::Name(Sensitive)    => natord::compare(&a.name, &b.name),
-           SortField::Name(Insensitive)  => natord::compare_ignore_case(&a.name, &b.name),
-
-           SortField::Size          => a.metadata.len().cmp(&b.metadata.len()),
-           SortField::FileInode     => a.metadata.ino().cmp(&b.metadata.ino()),
-           SortField::ModifiedDate  => a.metadata.mtime().cmp(&b.metadata.mtime()),
-           SortField::AccessedDate  => a.metadata.atime().cmp(&b.metadata.atime()),
-           SortField::CreatedDate   => a.metadata.ctime().cmp(&b.metadata.ctime()),
-
-           SortField::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes
-               Ordering::Equal  => natord::compare(&*a.name, &*b.name),
-               order            => order,
-           },
-
-           SortField::Extension(Sensitive) => match a.ext.cmp(&b.ext) {
-               Ordering::Equal  => natord::compare(&*a.name, &*b.name),
-               order            => order,
-           },
-
-           SortField::Extension(Insensitive) => match a.ext.cmp(&b.ext) {
-               Ordering::Equal  => natord::compare_ignore_case(&*a.name, &*b.name),
-               order            => order,
-           },
-       }
-   }
+        if self.list_dirs_first {
+            // This relies on the fact that `sort_by` is *stable*: it will keep
+            // adjacent elements next to each other.
+            files.sort_by(|a, b| b.as_ref().is_directory().cmp(&a.as_ref().is_directory()));
+        }
+    }
 }


@ -145,44 +125,44 @@ impl FileFilter {
 #[derive(PartialEq, Debug, Copy, Clone)]
 pub enum SortField {

-    /// Don't apply any sorting. This is usually used as an optimisation in
-    /// scripts, where the order doesn't matter.
+    /// Don’t apply any sorting. This is usually used as an optimisation in
+    /// scripts, where the order doesn’t matter.
    Unsorted,

    /// The file name. This is the default sorting.
    Name(SortCase),

-    /// The file's extension, with extensionless files being listed first.
+    /// The file’s extension, with extensionless files being listed first.
    Extension(SortCase),

-    /// The file's size.
+    /// The file’s size, in bytes.
    Size,

-    /// The file's inode. This is sometimes analogous to the order in which
-    /// the files were created on the hard drive.
+    /// The file’s inode, which usually corresponds to the order in which
+    /// files were created on the filesystem, more or less.
    FileInode,

-    /// The time at which this file was modified (the `mtime`).
+    /// The time this file was modified (the “mtime”).
    ///
    /// As this is stored as a Unix timestamp, rather than a local time
    /// instance, the time zone does not matter and will only be used to
    /// display the timestamps, not compare them.
    ModifiedDate,

-    /// The time at this file was accessed (the `atime`).
+    /// The time file was accessed (the “atime”).
    ///
    /// Oddly enough, this field rarely holds the *actual* accessed time.
    /// Recording a read time means writing to the file each time it’s read
    /// slows the whole operation down, so many systems will only update the
    /// timestamp in certain circumstances. This has become common enough that
-    /// it’s now expected behaviour for the `atime` field.
+    /// it’s now expected behaviour!
    /// http://unix.stackexchange.com/a/8842
    AccessedDate,

-    /// The time at which this file was changed or created (the `ctime`).
+    /// The time this file was changed or created (the “ctime”).
    ///
    /// Contrary to the name, this field is used to mark the time when a
-    /// file's metadata changed -- its permissions, owners, or link count.
+    /// file’s metadata changed -- its permissions, owners, or link count.
    ///
    /// In original Unix, this was, however, meant as creation time.
    /// https://www.bell-labs.com/usr/dmr/www/cacm.html
@ -196,7 +176,6 @@ pub enum SortField {
 }

 /// Whether a field should be sorted case-sensitively or case-insensitively.
-///
 /// This determines which of the `natord` functions to use.
 #[derive(PartialEq, Debug, Copy, Clone)]
 pub enum SortCase {
@ -209,6 +188,49 @@ pub enum SortCase {
    Insensitive,
 }

+impl SortField {
+
+    /// Compares two files to determine the order they should be listed in,
+    /// depending on the search field.
+    ///
+    /// The `natord` crate is used here to provide a more *natural* sorting
+    /// order than just sorting character-by-character. This splits filenames
+    /// into groups between letters and numbers, and then sorts those blocks
+    /// together, so `file10` will sort after `file9`, instead of before it
+    /// because of the `1`.
+    pub fn compare_files(&self, a: &File, b: &File) -> Ordering {
+        use self::SortCase::{Sensitive, Insensitive};
+
+        match *self {
+            SortField::Unsorted  => Ordering::Equal,
+
+            SortField::Name(Sensitive)    => natord::compare(&a.name, &b.name),
+            SortField::Name(Insensitive)  => natord::compare_ignore_case(&a.name, &b.name),
+
+            SortField::Size          => a.metadata.len().cmp(&b.metadata.len()),
+            SortField::FileInode     => a.metadata.ino().cmp(&b.metadata.ino()),
+            SortField::ModifiedDate  => a.metadata.mtime().cmp(&b.metadata.mtime()),
+            SortField::AccessedDate  => a.metadata.atime().cmp(&b.metadata.atime()),
+            SortField::CreatedDate   => a.metadata.ctime().cmp(&b.metadata.ctime()),
+
+            SortField::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes
+                Ordering::Equal  => natord::compare(&*a.name, &*b.name),
+                order            => order,
+            },
+
+            SortField::Extension(Sensitive) => match a.ext.cmp(&b.ext) {
+                Ordering::Equal  => natord::compare(&*a.name, &*b.name),
+                order            => order,
+            },
+
+            SortField::Extension(Insensitive) => match a.ext.cmp(&b.ext) {
+                Ordering::Equal  => natord::compare_ignore_case(&*a.name, &*b.name),
+                order            => order,
+            },
+        }
+    }
+}
+

 /// The **ignore patterns** are a list of globs that are tested against
 /// each filename, and if any of them match, that file isn’t displayed.
@ -230,7 +252,16 @@ impl IgnorePatterns {
    /// are valid glob patterns into an IgnorePatterns. The inputs that don’t
    /// parse correctly are returned separately.
    pub fn parse_from_iter<'a, I: IntoIterator<Item = &'a str>>(iter: I) -> (Self, Vec<glob::PatternError>) {
-        let mut patterns = Vec::new();
+        let iter = iter.into_iter();
+
+        // Almost all glob patterns are valid, so it’s worth pre-allocating
+        // the vector with enough space for all of them.
+        let mut patterns = match iter.size_hint() {
+            (_, Some(count))  => Vec::with_capacity(count),
+             _                => Vec::new(),
+        };
+
+        // Similarly, assume there won’t be any errors.
        let mut errors = Vec::new();

        for input in iter {
@ -243,13 +274,51 @@ impl IgnorePatterns {
        (IgnorePatterns { patterns }, errors)
    }

-    /// Create a new empty list that matches nothing.
+    /// Create a new empty set of patterns that matches nothing.
    pub fn empty() -> IgnorePatterns {
        IgnorePatterns { patterns: Vec::new() }
    }

    /// Test whether the given file should be hidden from the results.
-    fn is_ignored(&self, file: &File) -> bool {
-        self.patterns.iter().any(|p| p.matches(&file.name))
+    fn is_ignored(&self, file: &str) -> bool {
+        self.patterns.iter().any(|p| p.matches(file))
+    }
+}
+
+
+
+#[cfg(test)]
+mod test_ignores {
+    use super::*;
+
+    #[test]
+    fn empty_matches_nothing() {
+        let pats = IgnorePatterns::empty();
+        assert_eq!(false, pats.is_ignored("nothing"));
+        assert_eq!(false, pats.is_ignored("test.mp3"));
+    }
+
+    #[test]
+    fn ignores_a_glob() {
+        let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "*.mp3" ]);
+        assert!(fails.is_empty());
+        assert_eq!(false, pats.is_ignored("nothing"));
+        assert_eq!(true,  pats.is_ignored("test.mp3"));
+    }
+
+    #[test]
+    fn ignores_an_exact_filename() {
+        let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing" ]);
+        assert!(fails.is_empty());
+        assert_eq!(true,  pats.is_ignored("nothing"));
+        assert_eq!(false, pats.is_ignored("test.mp3"));
+    }
+
+    #[test]
+    fn ignores_both() {
+        let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing", "*.mp3" ]);
+        assert!(fails.is_empty());
+        assert_eq!(true, pats.is_ignored("nothing"));
+        assert_eq!(true, pats.is_ignored("test.mp3"));
    }
 }
--- a/src/options/filter.rs
+++ b/src/options/filter.rs
@ -1,3 +1,5 @@
+//! Parsing the options for `FileFilter`.
+
 use fs::DotFilter;
 use fs::filter::{FileFilter, SortField, SortCase, IgnorePatterns};

@ -7,8 +9,7 @@ use options::parser::MatchedFlags;

 impl FileFilter {

-    /// Determines the set of file filter options to use, based on the user’s
-    /// command-line arguments.
+    /// Determines which of all the file filter options to use.
    pub fn deduce(matches: &MatchedFlags) -> Result<FileFilter, Misfire> {
        Ok(FileFilter {
            list_dirs_first: matches.has(&flags::DIRS_FIRST)?,
@ -20,29 +21,23 @@ impl FileFilter {
    }
 }

-
-
-impl Default for SortField {
-    fn default() -> SortField {
-        SortField::Name(SortCase::Sensitive)
-    }
-}
-
 const SORTS: &[&str] = &[ "name", "Name", "size", "extension",
                          "Extension", "modified", "accessed",
                          "created", "inode", "type", "none" ];

 impl SortField {

-    /// Determine the sort field to use, based on the presence of a “sort”
-    /// argument. This will return `Err` if the option is there, but does not
-    /// correspond to a valid field.
+    /// Determines which sort field to use based on the `--sort` argument.
+    /// This argument’s value can be one of several flags, listed above.
+    /// Returns the default sort field if none is given, or `Err` if the
+    /// value doesn’t correspond to a sort field we know about.
    fn deduce(matches: &MatchedFlags) -> Result<SortField, Misfire> {
        let word = match matches.get(&flags::SORT)? {
            Some(w)  => w,
            None     => return Ok(SortField::default()),
        };

+        // The field is an OsStr, so can’t be matched.
        if word == "name" || word == "filename" {
            Ok(SortField::Name(SortCase::Sensitive))
        }
@ -82,8 +77,21 @@ impl SortField {
    }
 }

+impl Default for SortField {
+    fn default() -> SortField {
+        SortField::Name(SortCase::Sensitive)
+    }
+}
+

 impl DotFilter {
+
+    /// Determines the dot filter based on how many `--all` options were
+    /// given: one will show dotfiles, but two will show `.` and `..` too.
+    ///
+    /// It also checks for the `--tree` option in strict mode, because of a
+    /// special case where `--tree --all --all` won't work: listing the
+    /// parent directory in tree mode would loop onto itself!
    pub fn deduce(matches: &MatchedFlags) -> Result<DotFilter, Misfire> {
        let count = matches.count(&flags::ALL);

@ -108,24 +116,27 @@ impl DotFilter {

 impl IgnorePatterns {

-    /// Determines the set of file filter options to use, based on the user’s
-    /// command-line arguments.
+    /// Determines the set of glob patterns to use based on the
+    /// `--ignore-patterns` argument’s value. This is a list of strings
+    /// separated by pipe (`|`) characters, given in any order.
    pub fn deduce(matches: &MatchedFlags) -> Result<IgnorePatterns, Misfire> {

+        // If there are no inputs, we return a set of patterns that doesn’t
+        // match anything, rather than, say, `None`.
        let inputs = match matches.get(&flags::IGNORE_GLOB)? {
            None => return Ok(IgnorePatterns::empty()),
            Some(is) => is,
        };

+        // Awkwardly, though, a glob pattern can be invalid, and we need to
+        // deal with invalid patterns somehow.
        let (patterns, mut errors) = IgnorePatterns::parse_from_iter(inputs.to_string_lossy().split('|'));

        // It can actually return more than one glob error,
-        // but we only use one.
-        if let Some(error) = errors.pop() {
-            return Err(error.into())
-        }
-        else {
-            Ok(patterns)
+        // but we only use one. (TODO)
+        match errors.pop() {
+            Some(e) => Err(e.into()),
+            None    => Ok(patterns),
        }
    }
 }