Properly handle invalid UTF-8 filenames

This commit is contained in:
Ben S 2014-06-29 21:02:14 +01:00
parent 85e557a744
commit df65886d6d
4 changed files with 65 additions and 52 deletions

View File

@ -1,6 +1,7 @@
use colours::{Plain, Style, Black, Red, Green, Yellow, Blue, Purple, Cyan, Fixed}; use colours::{Plain, Style, Black, Red, Green, Yellow, Blue, Purple, Cyan, Fixed};
use std::io::{fs, IoResult}; use std::io::{fs, IoResult};
use std::io; use std::io;
use std::str::{from_utf8, from_utf8_lossy};
use column::{Column, Permissions, FileName, FileSize, User, Group, HardLinks, Inode, Blocks}; use column::{Column, Permissions, FileName, FileSize, User, Group, HardLinks, Inode, Blocks};
use format::{format_metric_bytes, format_IEC_bytes}; use format::{format_metric_bytes, format_IEC_bytes};
@ -17,9 +18,9 @@ use filetype::HasType;
// the actual path. // the actual path.
pub struct File<'a> { pub struct File<'a> {
pub name: &'a str, pub name: String,
pub dir: &'a Dir<'a>, pub dir: &'a Dir<'a>,
pub ext: Option<&'a str>, pub ext: Option<String>,
pub path: &'a Path, pub path: &'a Path,
pub stat: io::FileStat, pub stat: io::FileStat,
pub parts: Vec<SortPart>, pub parts: Vec<SortPart>,
@ -27,9 +28,11 @@ pub struct File<'a> {
impl<'a> File<'a> { impl<'a> File<'a> {
pub fn from_path(path: &'a Path, parent: &'a Dir) -> IoResult<File<'a>> { pub fn from_path(path: &'a Path, parent: &'a Dir) -> IoResult<File<'a>> {
// Getting the string from a filename fails whenever it's not let v = path.filename().unwrap(); // fails if / or . or ..
// UTF-8 representable - just assume it is for now. let filename = match from_utf8(v) {
let filename: &str = path.filename_str().unwrap(); Some(name) => name.to_string(),
None => from_utf8_lossy(v).to_string(),
};
// Use lstat here instead of file.stat(), as it doesn't follow // Use lstat here instead of file.stat(), as it doesn't follow
// symbolic links. Otherwise, the stat() call will fail if it // symbolic links. Otherwise, the stat() call will fail if it
@ -39,26 +42,27 @@ impl<'a> File<'a> {
path: path, path: path,
dir: parent, dir: parent,
stat: stat, stat: stat,
name: filename, name: filename.clone(),
ext: File::ext(filename), ext: File::ext(filename.clone()),
parts: SortPart::split_into_parts(filename), parts: SortPart::split_into_parts(filename.clone()),
}) })
} }
fn ext(name: &'a str) -> Option<&'a str> { fn ext(name: String) -> Option<String> {
// The extension is the series of characters after a dot at // The extension is the series of characters after a dot at
// the end of a filename. This deliberately also counts // the end of a filename. This deliberately also counts
// dotfiles - the ".git" folder has the extension "git". // dotfiles - the ".git" folder has the extension "git".
let re = regex!(r"\.([^.]+)$"); let re = regex!(r"\.([^.]+)$");
re.captures(name).map(|caps| caps.at(1)) re.captures(name.as_slice()).map(|caps| caps.at(1).to_string())
} }
pub fn is_dotfile(&self) -> bool { pub fn is_dotfile(&self) -> bool {
self.name.starts_with(".") self.name.as_slice().starts_with(".")
} }
pub fn is_tmpfile(&self) -> bool { pub fn is_tmpfile(&self) -> bool {
self.name.ends_with("~") || (self.name.starts_with("#") && self.name.ends_with("#")) let name = self.name.as_slice();
name.ends_with("~") || (name.starts_with("#") && name.ends_with("#"))
} }
// Highlight the compiled versions of files. Some of them, like .o, // Highlight the compiled versions of files. Some of them, like .o,
@ -68,22 +72,23 @@ impl<'a> File<'a> {
// without a .coffee. // without a .coffee.
pub fn get_source_files(&self) -> Vec<Path> { pub fn get_source_files(&self) -> Vec<Path> {
match self.ext { let ext = self.ext.clone().unwrap();
Some("class") => vec![self.path.with_extension("java")], // Java match ext.as_slice() {
Some("elc") => vec![self.path.with_extension("el")], // Emacs Lisp "class" => vec![self.path.with_extension("java")], // Java
Some("hi") => vec![self.path.with_extension("hs")], // Haskell "elc" => vec![self.path.with_extension("el")], // Emacs Lisp
Some("o") => vec![self.path.with_extension("c"), self.path.with_extension("cpp")], // C, C++ "hi" => vec![self.path.with_extension("hs")], // Haskell
Some("pyc") => vec![self.path.with_extension("py")], // Python "o" => vec![self.path.with_extension("c"), self.path.with_extension("cpp")], // C, C++
Some("js") => vec![self.path.with_extension("coffee"), self.path.with_extension("ts")], // CoffeeScript, TypeScript "pyc" => vec![self.path.with_extension("py")], // Python
Some("css") => vec![self.path.with_extension("sass"), self.path.with_extension("less")], // SASS, Less "js" => vec![self.path.with_extension("coffee"), self.path.with_extension("ts")], // CoffeeScript, TypeScript
"css" => vec![self.path.with_extension("sass"), self.path.with_extension("less")], // SASS, Less
Some("aux") => vec![self.path.with_extension("tex")], // TeX: auxiliary file "aux" => vec![self.path.with_extension("tex")], // TeX: auxiliary file
Some("bbl") => vec![self.path.with_extension("tex")], // BibTeX bibliography file "bbl" => vec![self.path.with_extension("tex")], // BibTeX bibliography file
Some("blg") => vec![self.path.with_extension("tex")], // BibTeX log file "blg" => vec![self.path.with_extension("tex")], // BibTeX log file
Some("lof") => vec![self.path.with_extension("tex")], // list of figures "lof" => vec![self.path.with_extension("tex")], // list of figures
Some("log") => vec![self.path.with_extension("tex")], // TeX log file "log" => vec![self.path.with_extension("tex")], // TeX log file
Some("lot") => vec![self.path.with_extension("tex")], // list of tables "lot" => vec![self.path.with_extension("tex")], // list of tables
Some("toc") => vec![self.path.with_extension("tex")], // table of contents "toc" => vec![self.path.with_extension("tex")], // table of contents
_ => vec![], _ => vec![],
} }
@ -133,7 +138,8 @@ impl<'a> File<'a> {
} }
fn file_name(&self) -> String { fn file_name(&self) -> String {
let displayed_name = self.file_colour().paint(self.name); let name = self.name.as_slice();
let displayed_name = self.file_colour().paint(name);
if self.stat.kind == io::TypeSymlink { if self.stat.kind == io::TypeSymlink {
match fs::readlink(self.path) { match fs::readlink(self.path) {
Ok(path) => { Ok(path) => {
@ -149,13 +155,18 @@ impl<'a> File<'a> {
} }
fn target_file_name_and_arrow(&self, target_path: Path) -> String { fn target_file_name_and_arrow(&self, target_path: Path) -> String {
let filename = target_path.as_str().unwrap(); let v = target_path.filename().unwrap();
let filename = match from_utf8(v) {
Some(name) => name.to_string(),
None => from_utf8_lossy(v).to_string(),
};
let link_target = fs::stat(&target_path).map(|stat| File { let link_target = fs::stat(&target_path).map(|stat| File {
path: &target_path, path: &target_path,
dir: self.dir, dir: self.dir,
stat: stat, stat: stat,
name: filename, name: filename.clone(),
ext: File::ext(filename), ext: File::ext(filename.clone()),
parts: vec![], // not needed parts: vec![], // not needed
}); });
@ -166,8 +177,8 @@ impl<'a> File<'a> {
// that reason anyway. // that reason anyway.
match link_target { match link_target {
Ok(file) => format!("{} {}", Fixed(244).paint("=>"), file.file_colour().paint(filename)), Ok(file) => format!("{} {}", Fixed(244).paint("=>"), file.file_colour().paint(filename.as_slice())),
Err(_) => format!("{} {}", Red.paint("=>"), Red.underline().paint(filename)), Err(_) => format!("{} {}", Red.paint("=>"), Red.underline().paint(filename.as_slice())),
} }
} }

View File

@ -73,6 +73,7 @@ pub trait HasType {
impl<'a> HasType for File<'a> { impl<'a> HasType for File<'a> {
fn get_type(&self) -> FileType { fn get_type(&self) -> FileType {
let name = self.name.as_slice();
if self.stat.kind == io::TypeDirectory { if self.stat.kind == io::TypeDirectory {
return Directory; return Directory;
} }
@ -85,11 +86,12 @@ impl<'a> HasType for File<'a> {
else if self.stat.perm.contains(io::UserExecute) { else if self.stat.perm.contains(io::UserExecute) {
return Executable; return Executable;
} }
else if self.name.starts_with("README") || BUILD_TYPES.iter().any(|&s| s == self.name) { else if name.starts_with("README") || BUILD_TYPES.iter().any(|&s| s == name) {
return Immediate; return Immediate;
} }
else if self.ext.is_some() { else if self.ext.is_some() {
let ext = self.ext.unwrap(); let e = self.ext.clone().unwrap();
let ext = e.as_slice();
if IMAGE_TYPES.iter().any(|&s| s == ext) { if IMAGE_TYPES.iter().any(|&s| s == ext) {
return Image; return Image;
} }

View File

@ -90,7 +90,7 @@ impl Options {
if self.showInvisibles { if self.showInvisibles {
true true
} else { } else {
!f.name.starts_with(".") !f.name.as_slice().starts_with(".")
} }
} }
@ -103,8 +103,8 @@ impl Options {
Name => files.sort_by(|a, b| a.parts.cmp(&b.parts)), Name => files.sort_by(|a, b| a.parts.cmp(&b.parts)),
Size => files.sort_by(|a, b| a.stat.size.cmp(&b.stat.size)), Size => files.sort_by(|a, b| a.stat.size.cmp(&b.stat.size)),
Extension => files.sort_by(|a, b| { Extension => files.sort_by(|a, b| {
let exts = a.ext.map(|e| e.to_ascii_lower()).cmp(&b.ext.map(|e| e.to_ascii_lower())); let exts = a.ext.clone().map(|e| e.as_slice().to_ascii_lower()).cmp(&b.ext.clone().map(|e| e.as_slice().to_ascii_lower()));
let names = a.name.to_ascii_lower().cmp(&b.name.to_ascii_lower()); let names = a.name.as_slice().to_ascii_lower().cmp(&b.name.as_slice().to_ascii_lower());
lexical_ordering(exts, names) lexical_ordering(exts, names)
}), }),
} }

View File

@ -16,7 +16,7 @@ pub enum SortPart {
} }
impl SortPart { impl SortPart {
pub fn from_string(is_digit: bool, slice: &str) -> SortPart { fn from_string(is_digit: bool, slice: &str) -> SortPart {
if is_digit { if is_digit {
// numbers too big for a u64 fall back into strings. // numbers too big for a u64 fall back into strings.
match from_str::<u64>(slice) { match from_str::<u64>(slice) {
@ -31,68 +31,68 @@ impl SortPart {
// The logic here is taken from my question at // The logic here is taken from my question at
// http://stackoverflow.com/q/23969191/3484614 // http://stackoverflow.com/q/23969191/3484614
pub fn split_into_parts(input: &str) -> Vec<SortPart> { pub fn split_into_parts(input: String) -> Vec<SortPart> {
let mut parts = vec![]; let mut parts = vec![];
if input.is_empty() { if input.is_empty() {
return parts return parts
} }
let mut is_digit = input.char_at(0).is_digit(); let mut is_digit = input.as_slice().char_at(0).is_digit();
let mut start = 0; let mut start = 0;
for (i, c) in input.char_indices() { for (i, c) in input.as_slice().char_indices() {
if is_digit != c.is_digit() { if is_digit != c.is_digit() {
parts.push(SortPart::from_string(is_digit, input.slice(start, i))); parts.push(SortPart::from_string(is_digit, input.as_slice().slice(start, i)));
is_digit = !is_digit; is_digit = !is_digit;
start = i; start = i;
} }
} }
parts.push(SortPart::from_string(is_digit, input.slice_from(start))); parts.push(SortPart::from_string(is_digit, input.as_slice().slice_from(start)));
parts parts
} }
} }
#[test] #[test]
fn test_numeric() { fn test_numeric() {
let bits = SortPart::split_into_parts("123456789".as_slice()); let bits = SortPart::split_into_parts("123456789".to_string());
assert!(bits == vec![ Numeric(123456789) ]); assert!(bits == vec![ Numeric(123456789) ]);
} }
#[test] #[test]
fn test_stringular() { fn test_stringular() {
let bits = SortPart::split_into_parts("toothpaste".as_slice()); let bits = SortPart::split_into_parts("toothpaste".to_string());
assert!(bits == vec![ Stringular("toothpaste".to_string()) ]); assert!(bits == vec![ Stringular("toothpaste".to_string()) ]);
} }
#[test] #[test]
fn test_empty() { fn test_empty() {
let bits = SortPart::split_into_parts("".as_slice()); let bits = SortPart::split_into_parts("".to_string());
assert!(bits == vec![]); assert!(bits == vec![]);
} }
#[test] #[test]
fn test_one() { fn test_one() {
let bits = SortPart::split_into_parts("123abc123".as_slice()); let bits = SortPart::split_into_parts("123abc123".to_string());
assert!(bits == vec![ Numeric(123), Stringular("abc".to_string()), Numeric(123) ]); assert!(bits == vec![ Numeric(123), Stringular("abc".to_string()), Numeric(123) ]);
} }
#[test] #[test]
fn test_two() { fn test_two() {
let bits = SortPart::split_into_parts("final version 3.pdf".as_slice()); let bits = SortPart::split_into_parts("final version 3.pdf".to_string());
assert!(bits == vec![ Stringular("final version ".to_string()), Numeric(3), Stringular(".pdf".to_string()) ]); assert!(bits == vec![ Stringular("final version ".to_string()), Numeric(3), Stringular(".pdf".to_string()) ]);
} }
#[test] #[test]
fn test_huge_number() { fn test_huge_number() {
let bits = SortPart::split_into_parts("9999999999999999999999999999999999999999999999999999999".as_slice()); let bits = SortPart::split_into_parts("9999999999999999999999999999999999999999999999999999999".to_string());
assert!(bits == vec![ Stringular("9999999999999999999999999999999999999999999999999999999".to_string()) ]); assert!(bits == vec![ Stringular("9999999999999999999999999999999999999999999999999999999".to_string()) ]);
} }
#[test] #[test]
fn test_case() { fn test_case() {
let bits = SortPart::split_into_parts("123ABC123".as_slice()); let bits = SortPart::split_into_parts("123ABC123".to_string());
assert!(bits == vec![ Numeric(123), Stringular("abc".to_string()), Numeric(123) ]); assert!(bits == vec![ Numeric(123), Stringular("abc".to_string()), Numeric(123) ]);
} }