exa/src/options/parser.rs
Benjamin Sago 2d1f462bfa Switch to the new options parser
This commit removes the dependency on the ‘getopts’ crate entirely, and re-writes all its uses to use the new options parser instead.

As expected there are casualties galore:

- We now need to collect the options into a vector at the start, so we can use references to them, knowing they’ll be stored *somewhere*.
- Because OsString isn’t Display, its Debug impl gets used instead. (This is hopefully temporary)
- Options that take values (such as ‘sort’ or ‘time-style’) now parse those values with ‘to_string_lossy’. The ‘lossy’ part means “I’m at a loss for what to do here”
- Error messages got a lot worse, but “--tree --all --all” is now a special case of error rather than just another Misfire::Useless.
- Some tests had to be re-written to deal with the fact that the parser works with references.
- ParseError loses its lifetime and owns its contents, to avoid having to attach <'a> to Misfire.
- The parser now takes an iterator instead of a slice.
- OsStrings can’t be ‘match’ patterns, so the code devolves to using long Eq chains instead.
- Make a change to the xtest that assumed an input argument with invalid UTF-8 in was always an error to stderr, when that now in fact works!
- Fix a bug in Vagrant where ‘exa’ and ‘rexa’ didn’t properly escape filenames with spaces in.
2017-07-26 17:48:18 +01:00

443 lines
17 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! A general parser for command-line options.
//!
//! exa uses its own hand-rolled parser for command-line options. It supports
//! the following syntax:
//!
//! - Long options: `--inode`, `--grid`
//! - Long options with values: `--sort size`, `--level=4`
//! - Short options: `-i`, `-G`
//! - Short options with values: `-ssize`, `-L=4`
//!
//! These values can be mixed and matched: `exa -lssize --grid`. If youve used
//! other command-line programs, then hopefully itll work much like them.
//!
//! Because exa already has its own files for the help text, shell completions,
//! man page, and readme, so it can get away with having the options parser do
//! very little: all it really needs to do is parse a slice of strings.
//!
//!
//! ## UTF-8 and `OsStr`
//!
//! The parser uses `OsStr` as its string type. This is necessary for exa to
//! list files that have invalid UTF-8 in their names: by treating file paths
//! as bytes with no encoding, a file can be specified on the command-line and
//! be looked up without having to be encoded into a `str` first.
//!
//! It also avoids the overhead of checking for invalid UTF-8 when parsing
//! command-line options, as all the options and their values (such as
//! `--sort size`) are guaranteed to just be 8-bit ASCII.
#![allow(unused_variables, dead_code)]
use std::ffi::{OsStr, OsString};
use std::fmt;
pub type ShortArg = u8;
pub type LongArg = &'static str;
#[derive(PartialEq, Debug, Clone)]
pub enum Flag {
Short(ShortArg),
Long(LongArg),
}
impl Flag {
fn matches(&self, arg: &Arg) -> bool {
match *self {
Flag::Short(short) => arg.short == Some(short),
Flag::Long(long) => arg.long == long,
}
}
}
#[derive(PartialEq, Debug)]
pub enum Strictness {
ComplainAboutRedundantArguments,
UseLastArguments,
}
#[derive(Copy, Clone, PartialEq, Debug)]
pub enum TakesValue {
Necessary,
Forbidden,
}
#[derive(PartialEq, Debug)]
pub struct Arg {
pub short: Option<ShortArg>,
pub long: LongArg,
pub takes_value: TakesValue,
}
impl fmt::Display for Arg {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "--{}", self.long)?;
if let Some(short) = self.short {
write!(f, " (-{})", short as char)?;
}
Ok(())
}
}
#[derive(PartialEq, Debug)]
pub struct Args(pub &'static [&'static Arg]);
impl Args {
fn lookup_short<'a>(&self, short: ShortArg) -> Result<&Arg, ParseError> {
match self.0.into_iter().find(|arg| arg.short == Some(short)) {
Some(arg) => Ok(arg),
None => Err(ParseError::UnknownShortArgument { attempt: short })
}
}
fn lookup_long<'a>(&self, long: &'a OsStr) -> Result<&Arg, ParseError> {
match self.0.into_iter().find(|arg| arg.long == long) {
Some(arg) => Ok(arg),
None => Err(ParseError::UnknownArgument { attempt: long.to_os_string() })
}
}
}
#[derive(PartialEq, Debug)]
pub struct Matches<'args> {
/// Long and short arguments need to be kept in the same vector, because
/// we usually want the one nearest the end to count.
pub flags: Vec<(Flag, Option<&'args OsStr>)>,
pub frees: Vec<&'args OsStr>,
}
impl<'a> Matches<'a> {
pub fn has(&self, arg: &Arg) -> bool {
self.flags.iter().rev()
.find(|tuple| tuple.1.is_none() && tuple.0.matches(arg))
.is_some()
}
pub fn get(&self, arg: &Arg) -> Option<&OsStr> {
self.flags.iter().rev()
.find(|tuple| tuple.1.is_some() && tuple.0.matches(arg))
.map(|tuple| tuple.1.unwrap())
}
pub fn count(&self, arg: &Arg) -> usize {
self.flags.iter()
.filter(|tuple| tuple.0.matches(arg))
.count()
}
}
#[derive(PartialEq, Debug)]
pub enum ParseError {
NeedsValue { flag: Flag },
ForbiddenValue { flag: Flag },
UnknownShortArgument { attempt: ShortArg },
UnknownArgument { attempt: OsString },
}
// Its technically possible for ParseError::UnknownArgument to borrow its
// OsStr rather than owning it, but that would give ParseError a lifetime,
// which would give Misfire a lifetime, which gets used everywhere. And this
// only happens when an error occurs, so its not really worth it.
pub fn parse<'args, I>(args: &Args, inputs: I) -> Result<Matches<'args>, ParseError>
where I: IntoIterator<Item=&'args OsString> {
use std::os::unix::ffi::OsStrExt;
use self::TakesValue::*;
let mut parsing = true;
let mut results = Matches {
flags: Vec::new(),
frees: Vec::new(),
};
let mut inputs = inputs.into_iter();
while let Some(arg) = inputs.next() {
let bytes = arg.as_bytes();
if !parsing {
results.frees.push(arg)
}
else if arg == "--" {
parsing = false;
}
else if bytes.starts_with(b"--") {
let long_arg_name = OsStr::from_bytes(&bytes[2..]);
if let Some((before, after)) = split_on_equals(long_arg_name) {
let arg = args.lookup_long(before)?;
let flag = Flag::Long(arg.long);
match arg.takes_value {
Necessary => results.flags.push((flag, Some(after))),
Forbidden => return Err(ParseError::ForbiddenValue { flag })
}
}
else {
let arg = args.lookup_long(long_arg_name)?;
let flag = Flag::Long(arg.long);
match arg.takes_value {
Forbidden => results.flags.push((flag, None)),
Necessary => {
if let Some(next_arg) = inputs.next() {
results.flags.push((flag, Some(next_arg)));
}
else {
return Err(ParseError::NeedsValue { flag })
}
}
}
}
}
else if bytes.starts_with(b"-") && arg != "-" {
let short_arg = OsStr::from_bytes(&bytes[1..]);
if let Some((before, after)) = split_on_equals(short_arg) {
let (arg_with_value, other_args) = before.as_bytes().split_last().unwrap();
for byte in other_args {
let arg = args.lookup_short(*byte)?;
let flag = Flag::Short(*byte);
match arg.takes_value {
Forbidden => results.flags.push((flag, None)),
Necessary => return Err(ParseError::NeedsValue { flag })
}
}
let arg = args.lookup_short(*before.as_bytes().last().unwrap())?;
let flag = Flag::Short(arg.short.unwrap());
match arg.takes_value {
Necessary => results.flags.push((flag, Some(after))),
Forbidden => return Err(ParseError::ForbiddenValue { flag })
}
}
else {
for (index, byte) in bytes.into_iter().enumerate().skip(1) {
let arg = args.lookup_short(*byte)?;
let flag = Flag::Short(*byte);
match arg.takes_value {
Forbidden => results.flags.push((flag, None)),
Necessary => {
if index < bytes.len() - 1 {
let remnants = &bytes[index+1 ..];
results.flags.push((flag, Some(OsStr::from_bytes(remnants))));
break;
}
else if let Some(next_arg) = inputs.next() {
results.flags.push((flag, Some(next_arg)));
}
else {
return Err(ParseError::NeedsValue { flag })
}
}
}
}
}
}
else {
results.frees.push(arg)
}
}
Ok(results)
}
/// Splits a string on its `=` character, returning the two substrings on
/// either side. Returns `None` if theres no equals or a string is missing.
fn split_on_equals(input: &OsStr) -> Option<(&OsStr, &OsStr)> {
use std::os::unix::ffi::OsStrExt;
if let Some(index) = input.as_bytes().iter().position(|elem| *elem == b'=') {
let (before, after) = input.as_bytes().split_at(index);
// The after string contains the = that we need to remove.
if before.len() >= 1 && after.len() >= 2 {
return Some((OsStr::from_bytes(before),
OsStr::from_bytes(&after[1..])))
}
}
None
}
/// Creates an `OSString` (used in tests)
#[cfg(test)]
fn os(input: &'static str) -> OsString {
let mut os = OsString::new();
os.push(input);
os
}
#[cfg(test)]
mod split_test {
use super::{split_on_equals, os};
macro_rules! test_split {
($name:ident: $input:expr => None) => {
#[test]
fn $name() {
assert_eq!(split_on_equals(&os($input)),
None);
}
};
($name:ident: $input:expr => $before:expr, $after:expr) => {
#[test]
fn $name() {
assert_eq!(split_on_equals(&os($input)),
Some((&*os($before), &*os($after))));
}
};
}
test_split!(empty: "" => None);
test_split!(letter: "a" => None);
test_split!(just: "=" => None);
test_split!(intro: "=bbb" => None);
test_split!(denou: "aaa=" => None);
test_split!(equals: "aaa=bbb" => "aaa", "bbb");
test_split!(sort: "--sort=size" => "--sort", "size");
test_split!(more: "this=that=other" => "this", "that=other");
}
#[cfg(test)]
mod parse_test {
use super::*;
macro_rules! test {
($name:ident: $input:expr => $result:expr) => {
#[test]
fn $name() {
let bits = $input;
let results = parse(&Args(TEST_ARGS), bits.into_iter());
assert_eq!(results, $result);
}
};
}
static TEST_ARGS: &[&Arg] = &[
&Arg { short: Some(b'l'), long: "long", takes_value: TakesValue::Forbidden },
&Arg { short: Some(b'v'), long: "verbose", takes_value: TakesValue::Forbidden },
&Arg { short: Some(b'c'), long: "count", takes_value: TakesValue::Necessary }
];
// Just filenames
test!(empty: [] => Ok(Matches { frees: vec![], flags: vec![] }));
test!(one_arg: [os("exa")] => Ok(Matches { frees: vec![ &os("exa") ], flags: vec![] }));
// Dashes and double dashes
test!(one_dash: [os("-")] => Ok(Matches { frees: vec![ &os("-") ], flags: vec![] }));
test!(two_dashes: [os("--")] => Ok(Matches { frees: vec![], flags: vec![] }));
test!(two_file: [os("--"), os("file")] => Ok(Matches { frees: vec![ &os("file") ], flags: vec![] }));
test!(two_arg_l: [os("--"), os("--long")] => Ok(Matches { frees: vec![ &os("--long") ], flags: vec![] }));
test!(two_arg_s: [os("--"), os("-l")] => Ok(Matches { frees: vec![ &os("-l") ], flags: vec![] }));
// Long args
test!(long: [os("--long")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Long("long"), None) ] }));
test!(long_then: [os("--long"), os("4")] => Ok(Matches { frees: vec![ &os("4") ], flags: vec![ (Flag::Long("long"), None) ] }));
test!(long_two: [os("--long"), os("--verbose")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Long("long"), None), (Flag::Long("verbose"), None) ] }));
// Long args with values
test!(bad_equals: [os("--long=equals")] => Err(ParseError::ForbiddenValue { flag: Flag::Long("long") }));
test!(no_arg: [os("--count")] => Err(ParseError::NeedsValue { flag: Flag::Long("count") }));
test!(arg_equals: [os("--count=4")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Long("count"), Some(&*os("4"))) ] }));
test!(arg_then: [os("--count"), os("4")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Long("count"), Some(&*os("4"))) ] }));
// Short args
test!(short: [os("-l")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'l'), None) ] }));
test!(short_then: [os("-l"), os("4")] => Ok(Matches { frees: vec![ &*os("4") ], flags: vec![ (Flag::Short(b'l'), None) ] }));
test!(short_two: [os("-lv")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'l'), None), (Flag::Short(b'v'), None) ] }));
test!(mixed: [os("-v"), os("--long")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'v'), None), (Flag::Long("long"), None) ] }));
// Short args with values
test!(bad_short: [os("-l=equals")] => Err(ParseError::ForbiddenValue { flag: Flag::Short(b'l') }));
test!(short_none: [os("-c")] => Err(ParseError::NeedsValue { flag: Flag::Short(b'c') }));
test!(short_arg_eq: [os("-c=4")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'c'), Some(&*os("4"))) ] }));
test!(short_arg_then: [os("-c"), os("4")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'c'), Some(&*os("4"))) ] }));
test!(short_two_together: [os("-lctwo")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'l'), None), (Flag::Short(b'c'), Some(&*os("two"))) ] }));
test!(short_two_equals: [os("-lc=two")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'l'), None), (Flag::Short(b'c'), Some(&*os("two"))) ] }));
test!(short_two_next: [os("-lc"), os("two")] => Ok(Matches { frees: vec![], flags: vec![ (Flag::Short(b'l'), None), (Flag::Short(b'c'), Some(&*os("two"))) ] }));
// Unknown args
test!(unknown_long: [os("--quiet")] => Err(ParseError::UnknownArgument { attempt: os("quiet") }));
test!(unknown_long_eq: [os("--quiet=shhh")] => Err(ParseError::UnknownArgument { attempt: os("quiet") }));
test!(unknown_short: [os("-q")] => Err(ParseError::UnknownShortArgument { attempt: b'q' }));
test!(unknown_short_2nd: [os("-lq")] => Err(ParseError::UnknownShortArgument { attempt: b'q' }));
test!(unknown_short_eq: [os("-q=shhh")] => Err(ParseError::UnknownShortArgument { attempt: b'q' }));
test!(unknown_short_2nd_eq: [os("-lq=shhh")] => Err(ParseError::UnknownShortArgument { attempt: b'q' }));
}
#[cfg(test)]
mod matches_test {
use super::*;
macro_rules! test {
($name:ident: $input:expr, has $param:expr => $result:expr) => {
#[test]
fn $name() {
let frees = Vec::new();
let flags = $input.to_vec();
assert_eq!(Matches { frees, flags }.has(&$param), $result);
}
};
}
static VERBOSE: Arg = Arg { short: Some(b'v'), long: "verbose", takes_value: TakesValue::Forbidden };
static COUNT: Arg = Arg { short: Some(b'c'), long: "count", takes_value: TakesValue::Necessary };
static TEST_ARGS: &[&Arg] = &[ &VERBOSE, &COUNT ];
test!(short_never: [], has VERBOSE => false);
test!(short_once: [(Flag::Short(b'v'), None)], has VERBOSE => true);
test!(short_twice: [(Flag::Short(b'v'), None), (Flag::Short(b'v'), None)], has VERBOSE => true);
test!(long_once: [(Flag::Long("verbose"), None)], has VERBOSE => true);
test!(long_twice: [(Flag::Long("verbose"), None), (Flag::Long("verbose"), None)], has VERBOSE => true);
test!(long_mixed: [(Flag::Long("verbose"), None), (Flag::Short(b'v'), None)], has VERBOSE => true);
#[test]
fn only_count() {
let everything = os("everything");
let frees = Vec::new();
let flags = vec![ (Flag::Short(b'c'), Some(&*everything)) ];
assert_eq!(Matches { frees, flags }.get(&COUNT), Some(&*everything));
}
#[test]
fn rightmost_count() {
let everything = os("everything");
let nothing = os("nothing");
let frees = Vec::new();
let flags = vec![ (Flag::Short(b'c'), Some(&*everything)),
(Flag::Short(b'c'), Some(&*nothing)) ];
assert_eq!(Matches { frees, flags }.get(&COUNT), Some(&*nothing));
}
#[test]
fn no_count() {
let frees = Vec::new();
let flags = Vec::new();
assert!(!Matches { frees, flags }.has(&COUNT));
}
}