exa/src/fs/feature/git.rs

337 lines
12 KiB
Rust
Raw Normal View History

//! Getting the Git status of files and directories.
use std::ffi::OsStr;
#[cfg(target_family = "unix")]
use std::os::unix::ffi::OsStrExt;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use log::*;
2018-12-07 23:43:31 +00:00
use crate::fs::fields as f;
2015-06-08 20:33:39 +00:00
/// A **Git cache** is assembled based on the users input arguments.
///
/// This uses vectors to avoid the overhead of hashing: its not worth it when the
/// expected number of Git repositories per exa invocation is 0 or 1...
pub struct GitCache {
/// A list of discovered Git repositories and their paths.
repos: Vec<GitRepo>,
/// Paths that weve confirmed do not have Git repositories underneath them.
misses: Vec<PathBuf>,
}
impl GitCache {
pub fn has_anything_for(&self, index: &Path) -> bool {
self.repos.iter().any(|e| e.has_path(index))
}
pub fn get(&self, index: &Path, prefix_lookup: bool) -> f::Git {
self.repos.iter()
.find(|e| e.has_path(index))
.map(|repo| repo.search(index, prefix_lookup))
.unwrap_or_default()
}
}
use std::iter::FromIterator;
impl FromIterator<PathBuf> for GitCache {
fn from_iter<I>(iter: I) -> Self
where I: IntoIterator<Item=PathBuf>
{
let iter = iter.into_iter();
let mut git = Self {
repos: Vec::with_capacity(iter.size_hint().0),
misses: Vec::new(),
};
for path in iter {
if git.misses.contains(&path) {
debug!("Skipping {:?} because it already came back Gitless", path);
}
else if git.repos.iter().any(|e| e.has_path(&path)) {
2017-08-28 17:40:52 +00:00
debug!("Skipping {:?} because we already queried it", path);
}
else {
match GitRepo::discover(path) {
Ok(r) => {
2019-07-13 20:17:48 +00:00
if let Some(r2) = git.repos.iter_mut().find(|e| e.has_workdir(&r.workdir)) {
debug!("Adding to existing repo (workdir matches with {:?})", r2.workdir);
r2.extra_paths.push(r.original_path);
continue;
}
debug!("Discovered new Git repo");
git.repos.push(r);
}
Err(miss) => {
git.misses.push(miss)
}
}
}
}
git
}
}
2017-08-26 22:53:47 +00:00
/// A **Git repository** is one weve discovered somewhere on the filesystem.
pub struct GitRepo {
/// The queryable contents of the repository: either a `git2` repo, or the
/// cached results from when we queried it last time.
contents: Mutex<GitContents>,
/// The working directory of this repository.
/// This is used to check whether two repositories are the same.
workdir: PathBuf,
/// The path that was originally checked to discover this repository.
/// This is as important as the extra_paths (it gets checked first), but
/// is separate to avoid having to deal with a non-empty Vec.
original_path: PathBuf,
/// Any other paths that were checked only to result in this same
/// repository.
extra_paths: Vec<PathBuf>,
}
/// A repositorys queried state.
enum GitContents {
/// All the interesting Git stuff goes through this.
Before {
repo: git2::Repository,
},
/// Temporary value used in `repo_to_statuses` so we can move the
/// repository out of the `Before` variant.
Processing,
/// The data weve extracted from the repository, but only after weve
/// actually done so.
After {
statuses: Git,
},
}
2017-08-28 17:40:52 +00:00
impl GitRepo {
/// Searches through this repository for a path (to a file or directory,
/// depending on the prefix-lookup flag) and returns its Git status.
///
/// Actually querying the `git2` repository for the mapping of paths to
/// Git statuses is only done once, and gets cached so we dont need to
/// re-query the entire repository the times after that.
///
/// The temporary `Processing` enum variant is used after the `git2`
/// repository is moved out, but before the results have been moved in!
2020-10-10 14:57:40 +00:00
/// See <https://stackoverflow.com/q/45985827/3484614>
fn search(&self, index: &Path, prefix_lookup: bool) -> f::Git {
use std::mem::replace;
let mut contents = self.contents.lock().unwrap();
if let GitContents::After { ref statuses } = *contents {
debug!("Git repo {:?} has been found in cache", &self.workdir);
return statuses.status(index, prefix_lookup);
}
debug!("Querying Git repo {:?} for the first time", &self.workdir);
let repo = replace(&mut *contents, GitContents::Processing).inner_repo();
2018-06-19 12:58:03 +00:00
let statuses = repo_to_statuses(&repo, &self.workdir);
let result = statuses.status(index, prefix_lookup);
let _processing = replace(&mut *contents, GitContents::After { statuses });
result
}
/// Whether this repository has the given working directory.
fn has_workdir(&self, path: &Path) -> bool {
self.workdir == path
}
/// Whether this repository cares about the given path at all.
fn has_path(&self, path: &Path) -> bool {
path.starts_with(&self.original_path) || self.extra_paths.iter().any(|e| path.starts_with(e))
}
/// Searches for a Git repository at any point above the given path.
/// Returns the original buffer if none is found.
fn discover(path: PathBuf) -> Result<Self, PathBuf> {
2017-08-28 17:40:52 +00:00
info!("Searching for Git repository above {:?}", path);
let repo = match git2::Repository::discover(&path) {
Ok(r) => r,
Err(e) => {
error!("Error discovering Git repositories: {:?}", e);
return Err(path);
2017-08-28 17:40:52 +00:00
}
};
2017-08-28 17:40:52 +00:00
if let Some(workdir) = repo.workdir() {
let workdir = workdir.to_path_buf();
let contents = Mutex::new(GitContents::Before { repo });
Ok(Self { contents, workdir, original_path: path, extra_paths: Vec::new() })
}
else {
warn!("Repository has no workdir?");
Err(path)
}
2017-08-28 17:40:52 +00:00
}
}
impl GitContents {
/// Assumes that the repository hasnt been queried, and extracts it
/// (consuming the value) if it has. This is needed because the entire
/// enum variant gets replaced when a repo is queried (see above).
fn inner_repo(self) -> git2::Repository {
if let Self::Before { repo } = self {
repo
}
else {
unreachable!("Tried to extract a non-Repository")
}
}
}
/// Iterates through a repositorys statuses, consuming it and returning the
/// mapping of files to their Git status.
/// We will have already used the working directory at this point, so it gets
/// passed in rather than deriving it from the `Repository` again.
2018-06-19 12:58:03 +00:00
fn repo_to_statuses(repo: &git2::Repository, workdir: &Path) -> Git {
let mut statuses = Vec::new();
info!("Getting Git statuses for repo with workdir {:?}", workdir);
match repo.statuses(None) {
Ok(es) => {
for e in es.iter() {
#[cfg(target_family = "unix")]
let path = workdir.join(Path::new(OsStr::from_bytes(e.path_bytes())));
// TODO: handle non Unix systems better:
// https://github.com/ogham/exa/issues/698
#[cfg(not(target_family = "unix"))]
let path = workdir.join(Path::new(e.path().unwrap()));
let elem = (path, e.status());
statuses.push(elem);
}
}
Err(e) => {
2021-04-01 08:39:12 +00:00
error!("Error looking up Git statuses: {:?}", e);
}
}
Git { statuses }
}
// The `repo.statuses` call above takes a long time. exa debug output:
//
// 20.311276 INFO:exa::fs::feature::git: Getting Git statuses for repo with workdir "/vagrant/"
// 20.799610 DEBUG:exa::output::table: Getting Git status for file "./Cargo.toml"
//
// Even inserting another logging line immediately afterwards doesnt make it
// look any faster.
/// Container of Git statuses for all the files in this folders Git repository.
struct Git {
statuses: Vec<(PathBuf, git2::Status)>,
}
impl Git {
/// Get either the file or directory status for the given path.
/// “Prefix lookup” means that it should report an aggregate status of all
/// paths starting with the given prefix (in other words, a directory).
fn status(&self, index: &Path, prefix_lookup: bool) -> f::Git {
if prefix_lookup { self.dir_status(index) }
else { self.file_status(index) }
}
/// Get the user-facing status of a file.
/// We check the statuses directly applying to a file, and for the ignored
/// status we check if any of its parents directories is ignored by git.
fn file_status(&self, file: &Path) -> f::Git {
let path = reorient(file);
let s = self.statuses.iter()
.filter(|p| if p.1 == git2::Status::IGNORED {
path.starts_with(&p.0)
} else {
p.0 == path
})
.fold(git2::Status::empty(), |a, b| a | b.1);
let staged = index_status(s);
let unstaged = working_tree_status(s);
f::Git { staged, unstaged }
}
/// Get the combined, user-facing status of a directory.
/// Statuses are aggregating (for example, a directory is considered
/// modified if any file under it has the status modified), except for
/// ignored status which applies to files under (for example, a directory
/// is considered ignored if one of its parent directories is ignored).
fn dir_status(&self, dir: &Path) -> f::Git {
let path = reorient(dir);
let s = self.statuses.iter()
.filter(|p| if p.1 == git2::Status::IGNORED {
path.starts_with(&p.0)
} else {
p.0.starts_with(&path)
})
.fold(git2::Status::empty(), |a, b| a | b.1);
let staged = index_status(s);
let unstaged = working_tree_status(s);
f::Git { staged, unstaged }
}
}
/// Converts a path to an absolute path based on the current directory.
/// Paths need to be absolute for them to be compared properly, otherwise
/// youd ask a repo about “./README.md” but it only knows about
2020-04-19 03:53:17 +00:00
/// “/vagrant/README.md”, prefixed by the workdir.
fn reorient(path: &Path) -> PathBuf {
use std::env::current_dir;
// TODO: Im not 100% on this func tbh
let path = match current_dir() {
Err(_) => Path::new(".").join(&path),
Ok(dir) => dir.join(&path),
};
path.canonicalize().unwrap_or(path)
}
/// The character to display if the file has been modified, but not staged.
fn working_tree_status(status: git2::Status) -> f::GitStatus {
match status {
s if s.contains(git2::Status::WT_NEW) => f::GitStatus::New,
s if s.contains(git2::Status::WT_MODIFIED) => f::GitStatus::Modified,
s if s.contains(git2::Status::WT_DELETED) => f::GitStatus::Deleted,
s if s.contains(git2::Status::WT_RENAMED) => f::GitStatus::Renamed,
s if s.contains(git2::Status::WT_TYPECHANGE) => f::GitStatus::TypeChange,
2018-11-20 17:12:38 +00:00
s if s.contains(git2::Status::IGNORED) => f::GitStatus::Ignored,
s if s.contains(git2::Status::CONFLICTED) => f::GitStatus::Conflicted,
_ => f::GitStatus::NotModified,
}
}
/// The character to display if the file has been modified and the change
/// has been staged.
fn index_status(status: git2::Status) -> f::GitStatus {
match status {
s if s.contains(git2::Status::INDEX_NEW) => f::GitStatus::New,
s if s.contains(git2::Status::INDEX_MODIFIED) => f::GitStatus::Modified,
s if s.contains(git2::Status::INDEX_DELETED) => f::GitStatus::Deleted,
s if s.contains(git2::Status::INDEX_RENAMED) => f::GitStatus::Renamed,
s if s.contains(git2::Status::INDEX_TYPECHANGE) => f::GitStatus::TypeChange,
_ => f::GitStatus::NotModified,
}
}