syncthing/lib/ignore/ignore.go
Simon Frei 605fd6d726
fix(ignore): ensure normalization of patterns and paths match (fixes #9597) (#9717)
In ignores, normalize the input when parsing it.
When scanning, normalize earlier such that the path is already
normalized when checking ignores. This requires splitting normalization
of the string from normalization of the file, as we don't want to
attempt the latter if the file is ignored.

Closes #9598

---------

Co-authored-by: Jakob Borg <jakob@kastelo.net>
2024-09-28 17:16:44 +02:00

634 lines
15 KiB
Go

// Copyright (C) 2014 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package ignore
import (
"bufio"
"bytes"
"crypto/sha256"
"errors"
"fmt"
"io"
"path/filepath"
"strings"
"time"
"github.com/gobwas/glob"
"golang.org/x/text/unicode/norm"
"github.com/syncthing/syncthing/lib/build"
"github.com/syncthing/syncthing/lib/fs"
"github.com/syncthing/syncthing/lib/ignore/ignoreresult"
"github.com/syncthing/syncthing/lib/osutil"
"github.com/syncthing/syncthing/lib/sync"
)
// A ParseError signifies an error with contents of an ignore file,
// including I/O errors on included files. An I/O error on the root level
// ignore file is not a ParseError.
type ParseError struct {
inner error
}
func (e *ParseError) Error() string {
return fmt.Sprintf("parse error: %v", e.inner)
}
func (e *ParseError) Unwrap() error {
return e.inner
}
func IsParseError(err error) bool {
var e *ParseError
return errors.As(err, &e)
}
func parseError(err error) error {
if err == nil {
return nil
}
return &ParseError{err}
}
type Pattern struct {
pattern string
match glob.Glob
result ignoreresult.R
}
func (p Pattern) String() string {
ret := p.pattern
if !p.result.IsIgnored() {
ret = "!" + ret
}
if p.result.IsCaseFolded() {
ret = "(?i)" + ret
}
if p.result.IsDeletable() {
ret = "(?d)" + ret
}
return ret
}
func (p Pattern) allowsSkippingIgnoredDirs() bool {
if p.result.IsIgnored() {
return true
}
if p.pattern[0] != '/' {
return false
}
// A "/**" at the end is allowed and doesn't have any bearing on the
// below checks; remove it before checking.
pattern := strings.TrimSuffix(p.pattern, "/**")
if len(pattern) == 0 {
return true
}
if strings.Contains(pattern[1:], "/") {
return false
}
// Double asterisk everywhere in the path except at the end is bad
return !strings.Contains(strings.TrimSuffix(pattern, "**"), "**")
}
// The ChangeDetector is responsible for determining if files have changed
// on disk. It gets told to Remember() files (name and modtime) and will
// then get asked if a file has been Seen() (i.e., Remember() has been
// called on it) and if any of the files have Changed(). To forget all
// files, call Reset().
type ChangeDetector interface {
Remember(fs fs.Filesystem, name string, modtime time.Time)
Seen(fs fs.Filesystem, name string) bool
Changed() bool
Reset()
}
type Matcher struct {
fs fs.Filesystem
lines []string // exact lines read from .stignore
patterns []Pattern // patterns including those from included files
withCache bool
matches *cache
curHash string
stop chan struct{}
changeDetector ChangeDetector
mut sync.Mutex
}
// An Option can be passed to New()
type Option func(*Matcher)
// WithCache enables or disables lookup caching. The default is disabled.
func WithCache(v bool) Option {
return func(m *Matcher) {
m.withCache = v
}
}
// WithChangeDetector sets a custom ChangeDetector. The default is to simply
// use the on disk modtime for comparison.
func WithChangeDetector(cd ChangeDetector) Option {
return func(m *Matcher) {
m.changeDetector = cd
}
}
func New(fs fs.Filesystem, opts ...Option) *Matcher {
m := &Matcher{
fs: fs,
stop: make(chan struct{}),
mut: sync.NewMutex(),
}
for _, opt := range opts {
opt(m)
}
if m.changeDetector == nil {
m.changeDetector = newModtimeChecker()
}
if m.withCache {
go m.clean(2 * time.Hour)
}
return m
}
// Load and parse a file. The returned error may be of type *ParseError in
// which case a file was loaded from disk but the patterns could not be
// parsed. In this case the contents of the file are nonetheless available
// in the Lines() method.
func (m *Matcher) Load(file string) error {
m.mut.Lock()
defer m.mut.Unlock()
if m.changeDetector.Seen(m.fs, file) && !m.changeDetector.Changed() {
return nil
}
fd, info, err := loadIgnoreFile(m.fs, file)
if err != nil {
m.parseLocked(&bytes.Buffer{}, file)
return err
}
defer fd.Close()
m.changeDetector.Reset()
err = m.parseLocked(fd, file)
// If we failed to parse, don't cache, as next time Load is called
// we'll pretend it's all good.
if err == nil {
m.changeDetector.Remember(m.fs, file, info.ModTime())
}
return err
}
// Load and parse an io.Reader. See Load() for notes on the returned error.
func (m *Matcher) Parse(r io.Reader, file string) error {
m.mut.Lock()
defer m.mut.Unlock()
return m.parseLocked(r, file)
}
func (m *Matcher) parseLocked(r io.Reader, file string) error {
lines, patterns, err := parseIgnoreFile(m.fs, r, file, m.changeDetector, make(map[string]struct{}))
// Error is saved and returned at the end. We process the patterns
// (possibly blank) anyway.
m.lines = lines
newHash := hashPatterns(patterns)
if newHash == m.curHash {
// We've already loaded exactly these patterns.
return err
}
m.curHash = newHash
m.patterns = patterns
if m.withCache {
m.matches = newCache()
}
return err
}
// Match matches the patterns plus temporary and internal files.
//
// The "file" parameter must be in the OS' native unicode format (NFD on macos,
// NFC everywhere else). This is always the case in real usage in syncthing, as
// we ensure native unicode normalisation on all entry points (scanning and from
// protocol) - so no need to normalize when calling this, except e.g. in tests.
func (m *Matcher) Match(file string) (result ignoreresult.R) {
switch {
case fs.IsTemporary(file):
return ignoreresult.IgnoreAndSkip
case fs.IsInternal(file):
return ignoreresult.IgnoreAndSkip
case file == ".":
return ignoreresult.NotIgnored
}
m.mut.Lock()
defer m.mut.Unlock()
if len(m.patterns) == 0 {
return ignoreresult.NotIgnored
}
file = filepath.ToSlash(file)
if m.matches != nil {
// Check the cache for a known result.
res, ok := m.matches.get(file)
if ok {
return res
}
// Update the cache with the result at return time
defer func() {
m.matches.set(file, result)
}()
}
// Check all the patterns for a match. Track whether the patterns so far
// allow skipping matched directories or not. As soon as we hit an
// exclude pattern (with some exceptions), we can't skip directories
// anymore.
var lowercaseFile string
canSkipDir := true
for _, pattern := range m.patterns {
if canSkipDir && !pattern.allowsSkippingIgnoredDirs() {
canSkipDir = false
}
res := pattern.result
if canSkipDir {
res = res.WithSkipDir()
}
if pattern.result.IsCaseFolded() {
if lowercaseFile == "" {
lowercaseFile = strings.ToLower(file)
}
if pattern.match.Match(lowercaseFile) {
return res
}
} else if pattern.match.Match(file) {
return res
}
}
// Default to not matching.
return ignoreresult.NotIgnored
}
// Lines return a list of the unprocessed lines in .stignore at last load
func (m *Matcher) Lines() []string {
m.mut.Lock()
defer m.mut.Unlock()
return m.lines
}
// Patterns return a list of the loaded patterns, as they've been parsed
func (m *Matcher) Patterns() []string {
m.mut.Lock()
defer m.mut.Unlock()
patterns := make([]string, len(m.patterns))
for i, pat := range m.patterns {
patterns[i] = pat.String()
}
return patterns
}
func (m *Matcher) String() string {
return fmt.Sprintf("Matcher/%v@%p", m.Patterns(), m)
}
func (m *Matcher) Hash() string {
m.mut.Lock()
defer m.mut.Unlock()
return m.curHash
}
func (m *Matcher) Stop() {
close(m.stop)
}
func (m *Matcher) clean(d time.Duration) {
t := time.NewTimer(d / 2)
for {
select {
case <-m.stop:
return
case <-t.C:
m.mut.Lock()
if m.matches != nil {
m.matches.clean(d)
}
t.Reset(d / 2)
m.mut.Unlock()
}
}
}
func hashPatterns(patterns []Pattern) string {
h := sha256.New()
for _, pat := range patterns {
h.Write([]byte(pat.String()))
h.Write([]byte("\n"))
}
return fmt.Sprintf("%x", h.Sum(nil))
}
func loadIgnoreFile(fs fs.Filesystem, file string) (fs.File, fs.FileInfo, error) {
fd, err := fs.Open(file)
if err != nil {
return fd, nil, err
}
info, err := fd.Stat()
if err != nil {
fd.Close()
}
return fd, info, err
}
func loadParseIncludeFile(filesystem fs.Filesystem, file string, cd ChangeDetector, linesSeen map[string]struct{}) ([]Pattern, error) {
// Allow escaping the folders filesystem.
// TODO: Deprecate, somehow?
if filesystem.Type() == fs.FilesystemTypeBasic {
uri := filesystem.URI()
joined := filepath.Join(uri, file)
if !fs.IsParent(joined, uri) {
filesystem = fs.NewFilesystem(filesystem.Type(), filepath.Dir(joined))
file = filepath.Base(joined)
}
}
if cd.Seen(filesystem, file) {
return nil, errors.New("multiple include")
}
fd, info, err := loadIgnoreFile(filesystem, file)
if err != nil {
// isNotExist is considered "ok" in a sense of that a folder doesn't have to act
// upon it. This is because it is allowed for .stignore to not exist. However,
// included ignore files are not allowed to be missing and these errors should be
// acted upon on. So we don't preserve the error chain here and manually set an
// error instead, if the file is missing.
if fs.IsNotExist(err) {
err = errors.New("file not found")
}
return nil, err
}
defer fd.Close()
cd.Remember(filesystem, file, info.ModTime())
_, patterns, err := parseIgnoreFile(filesystem, fd, file, cd, linesSeen)
return patterns, err
}
func parseLine(line string) ([]Pattern, error) {
// We use native normalization internally, thus the patterns must match
// that to avoid false negative matches.
line = nativeUnicodeNorm(line)
pattern := Pattern{
result: ignoreresult.Ignored,
}
// Allow prefixes to be specified in any order, but only once.
var seenPrefix [3]bool
for {
if strings.HasPrefix(line, "!") && !seenPrefix[0] {
seenPrefix[0] = true
line = line[1:]
pattern.result = pattern.result.ToggleIgnored()
} else if strings.HasPrefix(line, "(?i)") && !seenPrefix[1] {
seenPrefix[1] = true
pattern.result = pattern.result.WithFoldCase()
line = line[4:]
} else if strings.HasPrefix(line, "(?d)") && !seenPrefix[2] {
seenPrefix[2] = true
pattern.result = pattern.result.WithDeletable()
line = line[4:]
} else {
break
}
}
if line == "" {
return nil, parseError(errors.New("missing pattern"))
}
if pattern.result.IsCaseFolded() {
line = strings.ToLower(line)
}
pattern.pattern = line
var err error
if strings.HasPrefix(line, "/") {
// Pattern is rooted in the current dir only
pattern.match, err = glob.Compile(line[1:], '/')
return []Pattern{pattern}, parseError(err)
}
patterns := make([]Pattern, 2)
if strings.HasPrefix(line, "**/") {
// Add the pattern as is, and without **/ so it matches in current dir
pattern.match, err = glob.Compile(line, '/')
if err != nil {
return nil, parseError(err)
}
patterns[0] = pattern
line = line[3:]
pattern.pattern = line
pattern.match, err = glob.Compile(line, '/')
if err != nil {
return nil, parseError(err)
}
patterns[1] = pattern
return patterns, nil
}
// Path name or pattern, add it so it matches files both in
// current directory and subdirs.
pattern.match, err = glob.Compile(line, '/')
if err != nil {
return nil, parseError(err)
}
patterns[0] = pattern
line = "**/" + line
pattern.pattern = line
pattern.match, err = glob.Compile(line, '/')
if err != nil {
return nil, parseError(err)
}
patterns[1] = pattern
return patterns, nil
}
func nativeUnicodeNorm(s string) string {
if build.IsDarwin || build.IsIOS {
return norm.NFD.String(s)
}
return norm.NFC.String(s)
}
func parseIgnoreFile(fs fs.Filesystem, fd io.Reader, currentFile string, cd ChangeDetector, linesSeen map[string]struct{}) ([]string, []Pattern, error) {
var patterns []Pattern
addPattern := func(line string) error {
newPatterns, err := parseLine(line)
if err != nil {
return fmt.Errorf("invalid pattern %q in ignore file: %w", line, err)
}
patterns = append(patterns, newPatterns...)
return nil
}
scanner := bufio.NewScanner(fd)
var lines []string
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
lines = append(lines, line)
}
if err := scanner.Err(); err != nil {
return nil, nil, err
}
var err error
for _, line := range lines {
if _, ok := linesSeen[line]; ok {
continue
}
linesSeen[line] = struct{}{}
switch {
case line == "":
continue
case strings.HasPrefix(line, "//"):
continue
}
line = filepath.ToSlash(line)
switch {
case strings.HasPrefix(line, "#include"):
fields := strings.SplitN(line, " ", 2)
if len(fields) != 2 {
err = parseError(errors.New("failed to parse #include line: no file?"))
break
}
includeRel := strings.TrimSpace(fields[1])
if includeRel == "" {
err = parseError(errors.New("failed to parse #include line: no file?"))
break
}
includeFile := filepath.Join(filepath.Dir(currentFile), includeRel)
var includePatterns []Pattern
if includePatterns, err = loadParseIncludeFile(fs, includeFile, cd, linesSeen); err == nil {
patterns = append(patterns, includePatterns...)
} else {
// Wrap the error, as if the include does not exist, we get a
// IsNotExists(err) == true error, which we use to check
// existence of the .stignore file, and just end up assuming
// there is none, rather than a broken include.
err = parseError(fmt.Errorf("failed to load include file %s: %w", includeFile, err))
}
case strings.HasSuffix(line, "/**"):
err = addPattern(line)
case strings.HasSuffix(line, "/"):
err = addPattern(line + "**")
default:
err = addPattern(line)
if err == nil {
err = addPattern(line + "/**")
}
}
if err != nil {
return lines, nil, err
}
}
return lines, patterns, nil
}
// WriteIgnores is a convenience function to avoid code duplication
func WriteIgnores(filesystem fs.Filesystem, path string, content []string) error {
if len(content) == 0 {
err := filesystem.Remove(path)
if fs.IsNotExist(err) {
return nil
}
return err
}
fd, err := osutil.CreateAtomicFilesystem(filesystem, path)
if err != nil {
return err
}
wr := osutil.LineEndingsWriter(fd)
for _, line := range content {
fmt.Fprintln(wr, line)
}
if err := fd.Close(); err != nil {
return err
}
filesystem.Hide(path)
return nil
}
type modtimeCheckerKey struct {
fs fs.Filesystem
name string
}
// modtimeChecker is the default implementation of ChangeDetector
type modtimeChecker struct {
modtimes map[modtimeCheckerKey]time.Time
}
func newModtimeChecker() *modtimeChecker {
return &modtimeChecker{
modtimes: map[modtimeCheckerKey]time.Time{},
}
}
func (c *modtimeChecker) Remember(fs fs.Filesystem, name string, modtime time.Time) {
c.modtimes[modtimeCheckerKey{fs, name}] = modtime
}
func (c *modtimeChecker) Seen(fs fs.Filesystem, name string) bool {
_, ok := c.modtimes[modtimeCheckerKey{fs, name}]
return ok
}
func (c *modtimeChecker) Reset() {
c.modtimes = map[modtimeCheckerKey]time.Time{}
}
func (c *modtimeChecker) Changed() bool {
for key, modtime := range c.modtimes {
info, err := key.fs.Stat(key.name)
if err != nil {
return true
}
if !info.ModTime().Equal(modtime) {
return true
}
}
return false
}