//! Filtering and sorting the list of files before displaying them. use std::cmp::Ordering; use std::iter::FromIterator; use std::os::unix::fs::MetadataExt; use std::path::Path; use crate::fs::DotFilter; use crate::fs::File; /// The **file filter** processes a list of files before displaying them to /// the user, by removing files they don’t want to see, and putting the list /// in the desired order. /// /// Usually a user does not want to see *every* file in the list. The most /// common case is to remove files starting with `.`, which are designated /// as ‘hidden’ files. /// /// The special files `.` and `..` files are not actually filtered out, but /// need to be inserted into the list, in a special case. /// /// The filter also governs sorting the list. After being filtered, pairs of /// files are compared and sorted based on the result, with the sort field /// performing the comparison. #[derive(PartialEq, Debug, Clone)] pub struct FileFilter { /// Whether directories should be listed first, and other types of file /// second. Some users prefer it like this. pub list_dirs_first: bool, /// The metadata field to sort by. pub sort_field: SortField, /// Whether to reverse the sorting order. This would sort the largest /// files first, or files starting with Z, or the most-recently-changed /// ones, depending on the sort field. pub reverse: bool, /// Whether to only show directories. pub only_dirs: bool, /// Which invisible “dot” files to include when listing a directory. /// /// Files starting with a single “.” are used to determine “system” or /// “configuration” files that should not be displayed in a regular /// directory listing, and the directory entries “.” and “..” are /// considered extra-special. /// /// This came about more or less by a complete historical accident, /// when the original `ls` tried to hide `.` and `..`: /// https://plus.google.com/+RobPikeTheHuman/posts/R58WgWwN9jp /// /// When one typed ls, however, these files appeared, so either Ken or /// Dennis added a simple test to the program. It was in assembler then, /// but the code in question was equivalent to something like this: /// if (name[0] == '.') continue; /// This statement was a little shorter than what it should have been, /// which is: /// if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; /// but hey, it was easy. /// /// Two things resulted. /// /// First, a bad precedent was set. A lot of other lazy programmers /// introduced bugs by making the same simplification. Actual files /// beginning with periods are often skipped when they should be counted. /// /// Second, and much worse, the idea of a "hidden" or "dot" file was /// created. As a consequence, more lazy programmers started dropping /// files into everyone's home directory. I don't have all that much /// stuff installed on the machine I'm using to type this, but my home /// directory has about a hundred dot files and I don't even know what /// most of them are or whether they're still needed. Every file name /// evaluation that goes through my home directory is slowed down by /// this accumulated sludge. pub dot_filter: DotFilter, /// Glob patterns to ignore. Any file name that matches *any* of these /// patterns won’t be displayed in the list. pub ignore_patterns: IgnorePatterns, /// Whether to ignore Git-ignored patterns. /// This is implemented completely separately from the actual Git /// repository scanning — a `.gitignore` file will still be scanned even /// if there’s no `.git` folder present. pub git_ignore: GitIgnore, } impl FileFilter { /// Remove every file in the given vector that does *not* pass the /// filter predicate for files found inside a directory. pub fn filter_child_files(&self, files: &mut Vec>) { files.retain(|f| ! self.ignore_patterns.is_ignored(&f.name)); if self.only_dirs { files.retain(File::is_directory); } } /// Remove every file in the given vector that does *not* pass the /// filter predicate for file names specified on the command-line. /// /// The rules are different for these types of files than the other /// type because the ignore rules can be used with globbing. For /// example, running `exa -I='*.tmp' .vimrc` shouldn’t filter out the /// dotfile, because it’s been directly specified. But running /// `exa -I='*.ogg' music/*` should filter out the ogg files obtained /// from the glob, even though the globbing is done by the shell! pub fn filter_argument_files(&self, files: &mut Vec>) { files.retain(|f| { ! self.ignore_patterns.is_ignored(&f.name) }); } /// Sort the files in the given vector based on the sort field option. pub fn sort_files<'a, F>(&self, files: &mut Vec) where F: AsRef> { files.sort_by(|a, b| { self.sort_field.compare_files(a.as_ref(), b.as_ref()) }); if self.reverse { files.reverse(); } if self.list_dirs_first { // This relies on the fact that `sort_by` is *stable*: it will keep // adjacent elements next to each other. files.sort_by(|a, b| { b.as_ref().points_to_directory() .cmp(&a.as_ref().points_to_directory()) }); } } } /// User-supplied field to sort by. #[derive(PartialEq, Debug, Copy, Clone)] pub enum SortField { /// Don’t apply any sorting. This is usually used as an optimisation in /// scripts, where the order doesn’t matter. Unsorted, /// The file name. This is the default sorting. Name(SortCase), /// The file’s extension, with extensionless files being listed first. Extension(SortCase), /// The file’s size, in bytes. Size, /// The file’s inode, which usually corresponds to the order in which /// files were created on the filesystem, more or less. FileInode, /// The time the file was modified (the “mtime”). /// /// As this is stored as a Unix timestamp, rather than a local time /// instance, the time zone does not matter and will only be used to /// display the timestamps, not compare them. ModifiedDate, /// The time the file was accessed (the “atime”). /// /// Oddly enough, this field rarely holds the *actual* accessed time. /// Recording a read time means writing to the file each time it’s read /// slows the whole operation down, so many systems will only update the /// timestamp in certain circumstances. This has become common enough that /// it’s now expected behaviour! /// http://unix.stackexchange.com/a/8842 AccessedDate, /// The time the file was changed (the “ctime”). /// /// This field is used to mark the time when a file’s metadata /// changed — its permissions, owners, or link count. /// /// In original Unix, this was, however, meant as creation time. /// https://www.bell-labs.com/usr/dmr/www/cacm.html ChangedDate, /// The time the file was created (the “btime” or “birthtime”). CreatedDate, /// The type of the file: directories, links, pipes, regular, files, etc. /// /// Files are ordered according to the `PartialOrd` implementation of /// `fs::fields::Type`, so changing that will change this. FileType, /// The “age” of the file, which is the time it was modified sorted /// backwards. The reverse of the `ModifiedDate` ordering! /// /// It turns out that listing the most-recently-modified files first is a /// common-enough use case that it deserves its own variant. This would be /// implemented by just using the modified date and setting the reverse /// flag, but this would make reversing *that* output not work, which is /// bad, even though that’s kind of nonsensical. So it’s its own variant /// that can be reversed like usual. ModifiedAge, /// The file's name, however if the name of the file begins with `.` /// ignore the leading `.` and then sort as Name NameMixHidden(SortCase), } /// Whether a field should be sorted case-sensitively or case-insensitively. /// This determines which of the `natord` functions to use. /// /// I kept on forgetting which one was sensitive and which one was /// insensitive. Would a case-sensitive sort put capital letters first because /// it takes the case of the letters into account, or intermingle them with /// lowercase letters because it takes the difference between the two cases /// into account? I gave up and just named these two variants after the /// effects they have. #[derive(PartialEq, Debug, Copy, Clone)] pub enum SortCase { /// Sort files case-sensitively with uppercase first, with ‘A’ coming /// before ‘a’. ABCabc, /// Sort files case-insensitively, with ‘A’ being equal to ‘a’. AaBbCc, } impl SortField { /// Compares two files to determine the order they should be listed in, /// depending on the search field. /// /// The `natord` crate is used here to provide a more *natural* sorting /// order than just sorting character-by-character. This splits filenames /// into groups between letters and numbers, and then sorts those blocks /// together, so `file10` will sort after `file9`, instead of before it /// because of the `1`. pub fn compare_files(self, a: &File<'_>, b: &File<'_>) -> Ordering { use self::SortCase::{ABCabc, AaBbCc}; match self { Self::Unsorted => Ordering::Equal, Self::Name(ABCabc) => natord::compare(&a.name, &b.name), Self::Name(AaBbCc) => natord::compare_ignore_case(&a.name, &b.name), Self::Size => a.metadata.len().cmp(&b.metadata.len()), Self::FileInode => a.metadata.ino().cmp(&b.metadata.ino()), Self::ModifiedDate => a.modified_time().cmp(&b.modified_time()), Self::AccessedDate => a.accessed_time().cmp(&b.accessed_time()), Self::ChangedDate => a.changed_time().cmp(&b.changed_time()), Self::CreatedDate => a.created_time().cmp(&b.created_time()), Self::ModifiedAge => b.modified_time().cmp(&a.modified_time()), // flip b and a Self::FileType => match a.type_char().cmp(&b.type_char()) { // todo: this recomputes Ordering::Equal => natord::compare(&*a.name, &*b.name), order => order, }, Self::Extension(ABCabc) => match a.ext.cmp(&b.ext) { Ordering::Equal => natord::compare(&*a.name, &*b.name), order => order, }, Self::Extension(AaBbCc) => match a.ext.cmp(&b.ext) { Ordering::Equal => natord::compare_ignore_case(&*a.name, &*b.name), order => order, }, Self::NameMixHidden(ABCabc) => natord::compare( Self::strip_dot(&a.name), Self::strip_dot(&b.name) ), Self::NameMixHidden(AaBbCc) => natord::compare_ignore_case( Self::strip_dot(&a.name), Self::strip_dot(&b.name) ) } } fn strip_dot(n: &str) -> &str { match n.strip_prefix('.') { Some(s) => s, None => n, } } } /// The **ignore patterns** are a list of globs that are tested against /// each filename, and if any of them match, that file isn’t displayed. /// This lets a user hide, say, text files by ignoring `*.txt`. #[derive(PartialEq, Default, Debug, Clone)] pub struct IgnorePatterns { patterns: Vec, } impl FromIterator for IgnorePatterns { fn from_iter(iter: I) -> Self where I: IntoIterator { let patterns = iter.into_iter().collect(); Self { patterns } } } impl IgnorePatterns { /// Create a new list from the input glob strings, turning the inputs that /// are valid glob patterns into an `IgnorePatterns`. The inputs that /// don’t parse correctly are returned separately. pub fn parse_from_iter<'a, I: IntoIterator>(iter: I) -> (Self, Vec) { let iter = iter.into_iter(); // Almost all glob patterns are valid, so it’s worth pre-allocating // the vector with enough space for all of them. let mut patterns = match iter.size_hint() { (_, Some(count)) => Vec::with_capacity(count), _ => Vec::new(), }; // Similarly, assume there won’t be any errors. let mut errors = Vec::new(); for input in iter { match glob::Pattern::new(input) { Ok(pat) => patterns.push(pat), Err(e) => errors.push(e), } } (Self { patterns }, errors) } /// Create a new empty set of patterns that matches nothing. pub fn empty() -> Self { Self { patterns: Vec::new() } } /// Test whether the given file should be hidden from the results. fn is_ignored(&self, file: &str) -> bool { self.patterns.iter().any(|p| p.matches(file)) } /// Test whether the given file should be hidden from the results. pub fn is_ignored_path(&self, file: &Path) -> bool { self.patterns.iter().any(|p| p.matches_path(file)) } // TODO(ogham): The fact that `is_ignored_path` is pub while `is_ignored` // isn’t probably means it’s in the wrong place } /// Whether to ignore or display files that are mentioned in `.gitignore` files. #[derive(PartialEq, Debug, Copy, Clone)] pub enum GitIgnore { /// Ignore files that Git would ignore. This means doing a check for a /// `.gitignore` file, possibly recursively up the filesystem tree. CheckAndIgnore, /// Display files, even if Git would ignore them. Off, } // This is not fully baked yet. The `ignore` crate lists a lot more files that // we aren’t checking: // // > By default, all ignore files found are respected. This includes .ignore, // > .gitignore, .git/info/exclude and even your global gitignore globs, // > usually found in $XDG_CONFIG_HOME/git/ignore. #[cfg(test)] mod test_ignores { use super::*; #[test] fn empty_matches_nothing() { let pats = IgnorePatterns::empty(); assert_eq!(false, pats.is_ignored("nothing")); assert_eq!(false, pats.is_ignored("test.mp3")); } #[test] fn ignores_a_glob() { let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "*.mp3" ]); assert!(fails.is_empty()); assert_eq!(false, pats.is_ignored("nothing")); assert_eq!(true, pats.is_ignored("test.mp3")); } #[test] fn ignores_an_exact_filename() { let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing" ]); assert!(fails.is_empty()); assert_eq!(true, pats.is_ignored("nothing")); assert_eq!(false, pats.is_ignored("test.mp3")); } #[test] fn ignores_both() { let (pats, fails) = IgnorePatterns::parse_from_iter(vec![ "nothing", "*.mp3" ]); assert!(fails.is_empty()); assert_eq!(true, pats.is_ignored("nothing")); assert_eq!(true, pats.is_ignored("test.mp3")); } }