syncthing/lib/fs/folding_test.go
greatroar 3e24d82513
lib/fs: Optimize UnicodeLowercase (#6979)
Most notably, it now detects all-lowercase files and returns these
as-is. The tests have been expanded with two cases and are now used
as a benchmark (admittedly a rather trivial one).

name                           old time/op    new time/op    delta
UnicodeLowercaseMaybeChange-8    4.59µs ± 2%    4.57µs ± 1%    ~     (p=0.197 n=10+10)
UnicodeLowercaseNoChange-8       3.26µs ± 1%    3.09µs ± 1%  -5.27%  (p=0.000 n=9+10)
2020-09-11 09:16:10 +02:00

77 lines
2.2 KiB
Go

// Copyright (C) 2017 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package fs
import (
"testing"
)
var caseCases = [][2]string{
{"", ""},
{"hej", "hej"},
{"HeJ!@#", "hej!@#"},
// Western Europe diacritical stuff is trivial.
{"ÜBERRÄKSMÖRGÅS", "überräksmörgås"},
// As are ligatures.
{"Æglefinus", "æglefinus"},
{"IJssel", "ijssel"},
// Cyrillic seems regular as well.
{"Привет", "привет"},
// Greek has multiple lower case characters for things depending on
// context; we should always choose the same one.
{"Ὀδυσσεύς", "ὀδυσσεύσ"},
{"ὈΔΥΣΣΕΎΣ", "ὀδυσσεύσ"},
// German ß doesn't really have an upper case variant, and we
// shouldn't mess things up when lower casing it either. We don't
// attempt to make ß equivalent to "ss".
{"Reichwaldstraße", "reichwaldstraße"},
// The Turks do their thing with the Is.... Like the Greek example
// we pick just the one canonicalized "i" although you can argue
// with this... From what I understand most operating systems don't
// get this right anyway.
{"İI", "ii"},
// Arabic doesn't do case folding.
{"العَرَبِيَّة", "العَرَبِيَّة"},
// Neither does Hebrew.
{"עברית", "עברית"},
// Nor Chinese, in any variant.
{"汉语/漢語 or 中文", "汉语/漢語 or 中文"},
// Nor katakana, as far as I can tell.
{"チャーハン", "チャーハン"},
// Some special Unicode characters, however, are folded by OSes.
{"\u212A", "k"},
}
func TestUnicodeLowercase(t *testing.T) {
for _, tc := range caseCases {
res := UnicodeLowercase(tc[0])
if res != tc[1] {
t.Errorf("UnicodeLowercase(%q) => %q, expected %q", tc[0], res, tc[1])
}
}
}
func BenchmarkUnicodeLowercaseMaybeChange(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, s := range caseCases {
UnicodeLowercase(s[0])
}
}
}
func BenchmarkUnicodeLowercaseNoChange(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, s := range caseCases {
UnicodeLowercase(s[1])
}
}
}