diff --git a/.drone.yml b/.drone.yml
index e8f9ba8d269..b5d6294004f 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -7,7 +7,7 @@ pipeline:
image: joomlaprojects/docker-phpcs
commands:
- echo $(date)
- - /root/.composer/vendor/bin/phpcs --report=full --extensions=php -p --standard=build/phpcs/Joomla .
+ - /root/.composer/vendor/bin/phpcs --report=full --extensions=php -p --encoding=utf-8 --standard=build/phpcs/Joomla .
- echo $(date)
initdb:
diff --git a/.gitignore b/.gitignore
index b1b4ecee9eb..1280353a22d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -221,6 +221,11 @@ Desktop.ini
/libraries/vendor/simplepie/simplepie/build
/libraries/vendor/simplepie/simplepie/idn/ReadMe.txt
/libraries/vendor/simplepie/simplepie/composer.json
+/libraries/vendor/wamania/php-stemmer/.gitignore
+/libraries/vendor/wamania/php-stemmer/README.md
+/libraries/vendor/wamania/php-stemmer/composer.json
+/libraries/vendor/wamania/php-stemmer/phpunit.xml.dist
+/libraries/vendor/wamania/php-stemmer/test
/libraries/vendor/zendframework/zend-diactoros/.coveralls.yml
/libraries/vendor/zendframework/zend-diactoros/CHANGELOG.md
/libraries/vendor/zendframework/zend-diactoros/composer.json
diff --git a/administrator/components/com_finder/config.xml b/administrator/components/com_finder/config.xml
index cb4a383edfa..cb8b6a995a5 100644
--- a/administrator/components/com_finder/config.xml
+++ b/administrator/components/com_finder/config.xml
@@ -269,29 +269,6 @@
default="0.3"
/>
-
-
-
-
-
-
-
-
-
-
-
tokenise($input);
+ $terms = array_filter($terms);
/*
* If we have to handle the input as a phrase, that means we don't
@@ -158,14 +77,14 @@ class FinderIndexerHelper
if ($phrase === true && count($terms) > 1)
{
// Create tokens from the phrase.
- $tokens[] = new FinderIndexerToken($terms, $lang);
+ $tokens[] = new FinderIndexerToken($terms, $language->language, $language->spacer);
}
else
{
// Create tokens from the terms.
for ($i = 0, $n = count($terms); $i < $n; $i++)
{
- $tokens[] = new FinderIndexerToken($terms[$i], $lang);
+ $tokens[] = new FinderIndexerToken($terms[$i], $language->language);
}
// Create two and three word phrase tokens from the individual words.
@@ -179,7 +98,7 @@ class FinderIndexerHelper
if ($i2 < $n && isset($tokens[$i2]))
{
// Tokenize the two word phrase.
- $token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term), $lang, $lang === 'zh' ? '' : ' ');
+ $token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term), $language->language, $language->spacer);
$token->derived = true;
// Add the token to the stack.
@@ -190,7 +109,7 @@ class FinderIndexerHelper
if ($i3 < $n && isset($tokens[$i3]))
{
// Tokenize the three word phrase.
- $token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term, $tokens[$i3]->term), $lang, $lang === 'zh' ? '' : ' ');
+ $token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term, $tokens[$i3]->term), $language->language, $language->spacer);
$token->derived = true;
// Add the token to the stack.
@@ -199,22 +118,13 @@ class FinderIndexerHelper
}
}
- if ($store)
- {
- $cache[$store] = count($tokens) > 1 ? $tokens : array_shift($tokens);
+ $cache[$store] = $tokens;
- return $cache[$store];
- }
- else
- {
- return count($tokens) > 1 ? $tokens : array_shift($tokens);
- }
+ return $cache[$store];
}
/**
- * Method to get the base word of a token. This method uses the public
- * {@link FinderIndexerHelper::$stemmer} object if it is set. If no stemmer is set,
- * the original token is returned.
+ * Method to get the base word of a token.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
@@ -225,31 +135,9 @@ class FinderIndexerHelper
*/
public static function stem($token, $lang)
{
- // Trim apostrophes at either end of the token.
- $token = trim($token, '\'');
+ $language = FinderIndexerLanguage::getInstance($lang);
- // Trim everything after any apostrophe in the token.
- if ($res = explode('\'', $token))
- {
- $token = $res[0];
- }
-
- if (static::$stemmerOK === true)
- {
- return static::$stemmer->stem($token, $lang);
- }
- else
- {
- // Stem the token if we have a valid stemmer to use.
- if (static::$stemmer instanceof FinderIndexerStemmer)
- {
- static::$stemmerOK = true;
-
- return static::$stemmer->stem($token, $lang);
- }
- }
-
- return $token;
+ return $language->stem($token);
}
/**
diff --git a/administrator/components/com_finder/helpers/indexer/indexer.php b/administrator/components/com_finder/helpers/indexer/indexer.php
index b6d38108bac..7181f3b3581 100644
--- a/administrator/components/com_finder/helpers/indexer/indexer.php
+++ b/administrator/components/com_finder/helpers/indexer/indexer.php
@@ -12,8 +12,8 @@ defined('_JEXEC') or die;
use Joomla\String\StringHelper;
JLoader::register('FinderIndexerHelper', __DIR__ . '/helper.php');
+JLoader::register('FinderIndexerLanguage', __DIR__ . '/language.php');
JLoader::register('FinderIndexerParser', __DIR__ . '/parser.php');
-JLoader::register('FinderIndexerStemmer', __DIR__ . '/stemmer.php');
JLoader::register('FinderIndexerTaxonomy', __DIR__ . '/taxonomy.php');
JLoader::register('FinderIndexerToken', __DIR__ . '/token.php');
@@ -213,12 +213,6 @@ abstract class FinderIndexer
static::$profiler = JProfiler::getInstance('FinderIndexer');
}
- // Setup the stemmer.
- if ($data->options->get('stem', 1) && $data->options->get('stemmer', 'porter_en'))
- {
- FinderIndexerHelper::$stemmer = FinderIndexerStemmer::getInstance($data->options->get('stemmer', 'porter_en'));
- }
-
// Set the state.
static::$state = $data;
@@ -471,6 +465,11 @@ abstract class FinderIndexer
// Tokenize the input.
$tokens = FinderIndexerHelper::tokenize($input, $lang);
+ if (count($tokens) == 0)
+ {
+ return $count;
+ }
+
// Add the tokens to the database.
$count += $this->addTokensToDb($tokens, $context);
diff --git a/administrator/components/com_finder/helpers/indexer/language.php b/administrator/components/com_finder/helpers/indexer/language.php
new file mode 100644
index 00000000000..bd7055ca9d7
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language.php
@@ -0,0 +1,146 @@
+language = $locale;
+ }
+
+ return $instances[$language];
+ }
+
+ /**
+ * Method to tokenise a text string.
+ *
+ * @param string $input The input to tokenise.
+ *
+ * @return array An array of term strings.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function tokenise($input)
+ {
+ $quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8');
+
+ /*
+ * Parsing the string input into terms is a multi-step process.
+ *
+ * Regexes:
+ * 1. Remove everything except letters, numbers, quotes, apostrophe, plus, dash, period, and comma.
+ * 2. Remove plus, dash, period, and comma characters located before letter characters.
+ * 3. Remove plus, dash, period, and comma characters located after other characters.
+ * 4. Remove plus, period, and comma characters enclosed in alphabetical characters. Ungreedy.
+ * 5. Remove orphaned apostrophe, plus, dash, period, and comma characters.
+ * 6. Remove orphaned quote characters.
+ * 7. Replace the assorted single quotation marks with the ASCII standard single quotation.
+ * 8. Remove multiple space characters and replaces with a single space.
+ */
+ $input = StringHelper::strtolower($input);
+ $input = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $input);
+ $input = preg_replace('#(^|\s)[+-.,]+([\pL\pM]+)#mui', ' $1', $input);
+ $input = preg_replace('#([\pL\pM\pN]+)[+-.,]+(\s|$)#mui', '$1 ', $input);
+ $input = preg_replace('#([\pL\pM]+)[+.,]+([\pL\pM]+)#muiU', '$1 $2', $input);
+ $input = preg_replace('#(^|\s)[\'+-.,]+(\s|$)#mui', ' ', $input);
+ $input = preg_replace('#(^|\s)[\p{Pi}\p{Pf}]+(\s|$)#mui', ' ', $input);
+ $input = preg_replace('#[' . $quotes . ']+#mui', '\'', $input);
+ $input = preg_replace('#\s+#mui', ' ', $input);
+ $input = trim($input);
+
+ // Explode the normalized string to get the terms.
+ $terms = explode(' ', $input);
+
+ return $terms;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $token;
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/da.php b/administrator/components/com_finder/helpers/indexer/language/da.php
new file mode 100644
index 00000000000..8a9630af9dd
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/da.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Danish;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/de.php b/administrator/components/com_finder/helpers/indexer/language/de.php
new file mode 100644
index 00000000000..cd5e97e609a
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/de.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\German;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/el.php b/administrator/components/com_finder/helpers/indexer/language/el.php
new file mode 100644
index 00000000000..49e5b477eea
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/el.php
@@ -0,0 +1,1015 @@
+. This is
+ * derivative work, based on the Greek stemmer for Drupal, see
+ * https://github.com/magaras/greek_stemmer/blob/master/mod_stemmer.php
+ */
+
+defined('_JEXEC') or die;
+
+/**
+ * Greek language support class for the Finder indexer package.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+class FinderIndexerLanguageel extends FinderIndexerLanguage
+{
+ /**
+ * Language locale of the class
+ *
+ * @var string
+ * @since __DEPLOY_VERSION__
+ */
+ public $language = 'el';
+
+ /**
+ * Method to tokenise a text string. It takes into account the odd punctuation commonly used in Greek text, mapping
+ * it to ASCII punctuation.
+ *
+ * Reference: http://www.teicrete.gr/users/kutrulis/Glosika/Stixi.htm
+ *
+ * @param string $input The input to tokenise.
+ *
+ * @return array An array of term strings.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function tokenise($input)
+ {
+ // Replace Greek calligraphic double quotes (various styles) to dumb double quotes
+ $input = str_replace(['“', '”', '„', '«' ,'»'], '"', $input);
+
+ // Replace Greek calligraphic single quotes (various styles) to dumb single quotes
+ $input = str_replace(['‘','’','‚'], "'", $input);
+
+ // Replace the middle dot (ano teleia) with a comma, adequate for the purpose of stemming
+ $input = str_replace('·', ',', $input);
+
+ // Dot and dash (τελεία και παύλα), used to denote the end of a context at the end of a paragraph.
+ $input = str_replace('.–', '.', $input);
+
+ // Ellipsis, two styles (separate dots or single glyph)
+ $input = str_replace(['...', '…'], '.', $input);
+
+ // Cross. Marks the death date of a person. Removed.
+ $input = str_replace('†', '', $input);
+
+ // Star. Reference, supposition word (in philology), birth date of a person.
+ $input = str_replace('*', '', $input);
+
+ // Paragraph. Indicates change of subject.
+ $input = str_replace('§', '.', $input);
+
+ // Plus/minus. Shows approximation. Not relevant for the stemmer, hence its conversion to a space.
+ $input = str_replace('±', ' ', $input);
+
+ return parent::tokenise($input);
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ $token = $this->toUpperCase($token, $w_CASE);
+
+ // Stop-word removal
+ $stop_words = '/^(ΕΚΟ|ΑΒΑ|ΑΓΑ|ΑΓΗ|ΑΓΩ|ΑΔΗ|ΑΔΩ|ΑΕ|ΑΕΙ|ΑΘΩ|ΑΙ|ΑΙΚ|ΑΚΗ|ΑΚΟΜΑ|ΑΚΟΜΗ|ΑΚΡΙΒΩΣ|ΑΛΑ|ΑΛΗΘΕΙΑ|ΑΛΗΘΙΝΑ|ΑΛΛΑΧΟΥ|ΑΛΛΙΩΣ|ΑΛΛΙΩΤΙΚΑ|'
+ . 'ΑΛΛΟΙΩΣ|ΑΛΛΟΙΩΤΙΚΑ|ΑΛΛΟΤΕ|ΑΛΤ|ΑΛΩ|ΑΜΑ|ΑΜΕ|ΑΜΕΣΑ|ΑΜΕΣΩΣ|ΑΜΩ|ΑΝ|ΑΝΑ|ΑΝΑΜΕΣΑ|ΑΝΑΜΕΤΑΞΥ|ΑΝΕΥ|ΑΝΤΙ|ΑΝΤΙΠΕΡΑ|ΑΝΤΙΣ|ΑΝΩ|ΑΝΩΤΕΡΩ|ΑΞΑΦΝΑ|'
+ . 'ΑΠ|ΑΠΕΝΑΝΤΙ|ΑΠΟ|ΑΠΟΨΕ|ΑΠΩ|ΑΡΑ|ΑΡΑΓΕ|ΑΡΕ|ΑΡΚ|ΑΡΚΕΤΑ|ΑΡΛ|ΑΡΜ|ΑΡΤ|ΑΡΥ|ΑΡΩ|ΑΣ|ΑΣΑ|ΑΣΟ|ΑΤΑ|ΑΤΕ|ΑΤΗ|ΑΤΙ|ΑΤΜ|ΑΤΟ|ΑΥΡΙΟ|ΑΦΗ|ΑΦΟΤΟΥ|ΑΦΟΥ|'
+ . 'ΑΧ|ΑΧΕ|ΑΧΟ|ΑΨΑ|ΑΨΕ|ΑΨΗ|ΑΨΥ|ΑΩΕ|ΑΩΟ|ΒΑΝ|ΒΑΤ|ΒΑΧ|ΒΕΑ|ΒΕΒΑΙΟΤΑΤΑ|ΒΗΞ|ΒΙΑ|ΒΙΕ|ΒΙΗ|ΒΙΟ|ΒΟΗ|ΒΟΩ|ΒΡΕ|ΓΑ|ΓΑΒ|ΓΑΡ|ΓΕΝ|ΓΕΣ||ΓΗ|ΓΗΝ|ΓΙ|ΓΙΑ|'
+ . 'ΓΙΕ|ΓΙΝ|ΓΙΟ|ΓΚΙ|ΓΙΑΤΙ|ΓΚΥ|ΓΟΗ|ΓΟΟ|ΓΡΗΓΟΡΑ|ΓΡΙ|ΓΡΥ|ΓΥΗ|ΓΥΡΩ|ΔΑ|ΔΕ|ΔΕΗ|ΔΕΙ|ΔΕΝ|ΔΕΣ|ΔΗ|ΔΗΘΕΝ|ΔΗΛΑΔΗ|ΔΗΩ|ΔΙ|ΔΙΑ|ΔΙΑΡΚΩΣ|ΔΙΟΛΟΥ|ΔΙΣ|'
+ . 'ΔΙΧΩΣ|ΔΟΛ|ΔΟΝ|ΔΡΑ|ΔΡΥ|ΔΡΧ|ΔΥΕ|ΔΥΟ|ΔΩ|ΕΑΜ|ΕΑΝ|ΕΑΡ|ΕΘΗ|ΕΙ|ΕΙΔΕΜΗ|ΕΙΘΕ|ΕΙΜΑΙ|ΕΙΜΑΣΤΕ|ΕΙΝΑΙ|ΕΙΣ|ΕΙΣΑΙ|ΕΙΣΑΣΤΕ|ΕΙΣΤΕ|ΕΙΤΕ|ΕΙΧΑ|ΕΙΧΑΜΕ|'
+ . 'ΕΙΧΑΝ|ΕΙΧΑΤΕ|ΕΙΧΕ|ΕΙΧΕΣ|ΕΚ|ΕΚΕΙ|ΕΛΑ|ΕΛΙ|ΕΜΠ|ΕΝ|ΕΝΤΕΛΩΣ|ΕΝΤΟΣ|ΕΝΤΩΜΕΤΑΞΥ|ΕΝΩ|ΕΞ|ΕΞΑΦΝΑ|ΕΞΙ|ΕΞΙΣΟΥ|ΕΞΩ|ΕΟΚ|ΕΠΑΝΩ|ΕΠΕΙΔΗ|ΕΠΕΙΤΑ|ΕΠΗ|'
+ . 'ΕΠΙ|ΕΠΙΣΗΣ|ΕΠΟΜΕΝΩΣ|ΕΡΑ|ΕΣ|ΕΣΑΣ|ΕΣΕ|ΕΣΕΙΣ|ΕΣΕΝΑ|ΕΣΗ|ΕΣΤΩ|ΕΣΥ|ΕΣΩ|ΕΤΙ|ΕΤΣΙ|ΕΥ|ΕΥΑ|ΕΥΓΕ|ΕΥΘΥΣ|ΕΥΤΥΧΩΣ|ΕΦΕ|ΕΦΕΞΗΣ|ΕΦΤ|ΕΧΕ|ΕΧΕΙ|'
+ . 'ΕΧΕΙΣ|ΕΧΕΤΕ|ΕΧΘΕΣ|ΕΧΟΜΕ|ΕΧΟΥΜΕ|ΕΧΟΥΝ|ΕΧΤΕΣ|ΕΧΩ|ΕΩΣ|ΖΕΑ|ΖΕΗ|ΖΕΙ|ΖΕΝ|ΖΗΝ|ΖΩ|Η|ΗΔΗ|ΗΔΥ|ΗΘΗ|ΗΛΟ|ΗΜΙ|ΗΠΑ|ΗΣΑΣΤΕ|ΗΣΟΥΝ|ΗΤΑ|ΗΤΑΝ|ΗΤΑΝΕ|'
+ . 'ΗΤΟΙ|ΗΤΤΟΝ|ΗΩ|ΘΑ|ΘΥΕ|ΘΩΡ|Ι|ΙΑ|ΙΒΟ|ΙΔΗ|ΙΔΙΩΣ|ΙΕ|ΙΙ|ΙΙΙ|ΙΚΑ|ΙΛΟ|ΙΜΑ|ΙΝΑ|ΙΝΩ|ΙΞΕ|ΙΞΟ|ΙΟ|ΙΟΙ|ΙΣΑ|ΙΣΑΜΕ|ΙΣΕ|ΙΣΗ|ΙΣΙΑ|ΙΣΟ|ΙΣΩΣ|ΙΩΒ|ΙΩΝ|'
+ . 'ΙΩΣ|ΙΑΝ|ΚΑΘ|ΚΑΘΕ|ΚΑΘΕΤΙ|ΚΑΘΟΛΟΥ|ΚΑΘΩΣ|ΚΑΙ|ΚΑΝ|ΚΑΠΟΤΕ|ΚΑΠΟΥ|ΚΑΠΩΣ|ΚΑΤ|ΚΑΤΑ|ΚΑΤΙ|ΚΑΤΙΤΙ|ΚΑΤΟΠΙΝ|ΚΑΤΩ|ΚΑΩ|ΚΒΟ|ΚΕΑ|ΚΕΙ|ΚΕΝ|ΚΙ|ΚΙΜ|'
+ . 'ΚΙΟΛΑΣ|ΚΙΤ|ΚΙΧ|ΚΚΕ|ΚΛΙΣΕ|ΚΛΠ|ΚΟΚ|ΚΟΝΤΑ|ΚΟΧ|ΚΤΛ|ΚΥΡ|ΚΥΡΙΩΣ|ΚΩ|ΚΩΝ|ΛΑ|ΛΕΑ|ΛΕΝ|ΛΕΟ|ΛΙΑ|ΛΙΓΑΚΙ|ΛΙΓΟΥΛΑΚΙ|ΛΙΓΟ|ΛΙΓΩΤΕΡΟ|ΛΙΟ|ΛΙΡ|ΛΟΓΩ|'
+ . 'ΛΟΙΠΑ|ΛΟΙΠΟΝ|ΛΟΣ|ΛΣ|ΛΥΩ|ΜΑ|ΜΑΖΙ|ΜΑΚΑΡΙ|ΜΑΛΙΣΤΑ|ΜΑΛΛΟΝ|ΜΑΝ|ΜΑΞ|ΜΑΣ|ΜΑΤ|ΜΕ|ΜΕΘΑΥΡΙΟ|ΜΕΙ|ΜΕΙΟΝ|ΜΕΛ|ΜΕΛΕΙ|ΜΕΛΛΕΤΑΙ|ΜΕΜΙΑΣ|ΜΕΝ|ΜΕΣ|'
+ . 'ΜΕΣΑ|ΜΕΤ|ΜΕΤΑ|ΜΕΤΑΞΥ|ΜΕΧΡΙ|ΜΗ|ΜΗΔΕ|ΜΗΝ|ΜΗΠΩΣ|ΜΗΤΕ|ΜΙ|ΜΙΞ|ΜΙΣ|ΜΜΕ|ΜΝΑ|ΜΟΒ|ΜΟΛΙΣ|ΜΟΛΟΝΟΤΙ|ΜΟΝΑΧΑ|ΜΟΝΟΜΙΑΣ|ΜΙΑ|ΜΟΥ|ΜΠΑ|ΜΠΟΡΕΙ|'
+ . 'ΜΠΟΡΟΥΝ|ΜΠΡΑΒΟ|ΜΠΡΟΣ|ΜΠΩ|ΜΥ|ΜΥΑ|ΜΥΝ|ΝΑ|ΝΑΕ|ΝΑΙ|ΝΑΟ|ΝΔ|ΝΕΐ|ΝΕΑ|ΝΕΕ|ΝΕΟ|ΝΙ|ΝΙΑ|ΝΙΚ|ΝΙΛ|ΝΙΝ|ΝΙΟ|ΝΤΑ|ΝΤΕ|ΝΤΙ|ΝΤΟ|ΝΥΝ|ΝΩΕ|ΝΩΡΙΣ|ΞΑΝΑ|'
+ . 'ΞΑΦΝΙΚΑ|ΞΕΩ|ΞΙ|Ο|ΟΑ|ΟΑΠ|ΟΔΟ|ΟΕ|ΟΖΟ|ΟΗΕ|ΟΙ|ΟΙΑ|ΟΙΗ|ΟΚΑ|ΟΛΟΓΥΡΑ|ΟΛΟΝΕΝ|ΟΛΟΤΕΛΑ|ΟΛΩΣΔΙΟΛΟΥ|ΟΜΩΣ|ΟΝ|ΟΝΕ|ΟΝΟ|ΟΠΑ|ΟΠΕ|ΟΠΗ|ΟΠΟ|'
+ . 'ΟΠΟΙΑΔΗΠΟΤΕ|ΟΠΟΙΑΝΔΗΠΟΤΕ|ΟΠΟΙΑΣΔΗΠΟΤΕ|ΟΠΟΙΔΗΠΟΤΕ|ΟΠΟΙΕΣΔΗΠΟΤΕ|ΟΠΟΙΟΔΗΠΟΤΕ|ΟΠΟΙΟΝΔΗΠΟΤΕ|ΟΠΟΙΟΣΔΗΠΟΤΕ|ΟΠΟΙΟΥΔΗΠΟΤΕ|ΟΠΟΙΟΥΣΔΗΠΟΤΕ|'
+ . 'ΟΠΟΙΩΝΔΗΠΟΤΕ|ΟΠΟΤΕΔΗΠΟΤΕ|ΟΠΟΥ|ΟΠΟΥΔΗΠΟΤΕ|ΟΠΩΣ|ΟΡΑ|ΟΡΕ|ΟΡΗ|ΟΡΟ|ΟΡΦ|ΟΡΩ|ΟΣΑ|ΟΣΑΔΗΠΟΤΕ|ΟΣΕ|ΟΣΕΣΔΗΠΟΤΕ|ΟΣΗΔΗΠΟΤΕ|ΟΣΗΝΔΗΠΟΤΕ|'
+ . 'ΟΣΗΣΔΗΠΟΤΕ|ΟΣΟΔΗΠΟΤΕ|ΟΣΟΙΔΗΠΟΤΕ|ΟΣΟΝΔΗΠΟΤΕ|ΟΣΟΣΔΗΠΟΤΕ|ΟΣΟΥΔΗΠΟΤΕ|ΟΣΟΥΣΔΗΠΟΤΕ|ΟΣΩΝΔΗΠΟΤΕ|ΟΤΑΝ|ΟΤΕ|ΟΤΙ|ΟΤΙΔΗΠΟΤΕ|ΟΥ|ΟΥΔΕ|ΟΥΚ|ΟΥΣ|'
+ . 'ΟΥΤΕ|ΟΥΦ|ΟΧΙ|ΟΨΑ|ΟΨΕ|ΟΨΗ|ΟΨΙ|ΟΨΟ|ΠΑ|ΠΑΛΙ|ΠΑΝ|ΠΑΝΤΟΤΕ|ΠΑΝΤΟΥ|ΠΑΝΤΩΣ|ΠΑΠ|ΠΑΡ|ΠΑΡΑ|ΠΕΙ|ΠΕΡ|ΠΕΡΑ|ΠΕΡΙ|ΠΕΡΙΠΟΥ|ΠΕΡΣΙ|ΠΕΡΥΣΙ|ΠΕΣ|ΠΙ|'
+ . 'ΠΙΑ|ΠΙΘΑΝΟΝ|ΠΙΚ|ΠΙΟ|ΠΙΣΩ|ΠΙΤ|ΠΙΩ|ΠΛΑΙ|ΠΛΕΟΝ|ΠΛΗΝ|ΠΛΩ|ΠΜ|ΠΟΑ|ΠΟΕ|ΠΟΛ|ΠΟΛΥ|ΠΟΠ|ΠΟΤΕ|ΠΟΥ|ΠΟΥΘΕ|ΠΟΥΘΕΝΑ|ΠΡΕΠΕΙ|ΠΡΙ|ΠΡΙΝ|ΠΡΟ|'
+ . 'ΠΡΟΚΕΙΜΕΝΟΥ|ΠΡΟΚΕΙΤΑΙ|ΠΡΟΠΕΡΣΙ|ΠΡΟΣ|ΠΡΟΤΟΥ|ΠΡΟΧΘΕΣ|ΠΡΟΧΤΕΣ|ΠΡΩΤΥΤΕΡΑ|ΠΥΑ|ΠΥΞ|ΠΥΟ|ΠΥΡ|ΠΧ|ΠΩ|ΠΩΛ|ΠΩΣ|ΡΑ|ΡΑΙ|ΡΑΠ|ΡΑΣ|ΡΕ|ΡΕΑ|ΡΕΕ|ΡΕΙ|'
+ . 'ΡΗΣ|ΡΘΩ|ΡΙΟ|ΡΟ|ΡΟΐ|ΡΟΕ|ΡΟΖ|ΡΟΗ|ΡΟΘ|ΡΟΙ|ΡΟΚ|ΡΟΛ|ΡΟΝ|ΡΟΣ|ΡΟΥ|ΣΑΙ|ΣΑΝ|ΣΑΟ|ΣΑΣ|ΣΕ|ΣΕΙΣ|ΣΕΚ|ΣΕΞ|ΣΕΡ|ΣΕΤ|ΣΕΦ|ΣΗΜΕΡΑ|ΣΙ|ΣΙΑ|ΣΙΓΑ|ΣΙΚ|'
+ . 'ΣΙΧ|ΣΚΙ|ΣΟΙ|ΣΟΚ|ΣΟΛ|ΣΟΝ|ΣΟΣ|ΣΟΥ|ΣΡΙ|ΣΤΑ|ΣΤΗ|ΣΤΗΝ|ΣΤΗΣ|ΣΤΙΣ|ΣΤΟ|ΣΤΟΝ|ΣΤΟΥ|ΣΤΟΥΣ|ΣΤΩΝ|ΣΥ|ΣΥΓΧΡΟΝΩΣ|ΣΥΝ|ΣΥΝΑΜΑ|ΣΥΝΕΠΩΣ|ΣΥΝΗΘΩΣ|'
+ . 'ΣΧΕΔΟΝ|ΣΩΣΤΑ|ΤΑ|ΤΑΔΕ|ΤΑΚ|ΤΑΝ|ΤΑΟ|ΤΑΥ|ΤΑΧΑ|ΤΑΧΑΤΕ|ΤΕ|ΤΕΙ|ΤΕΛ|ΤΕΛΙΚΑ|ΤΕΛΙΚΩΣ|ΤΕΣ|ΤΕΤ|ΤΖΟ|ΤΗ|ΤΗΛ|ΤΗΝ|ΤΗΣ|ΤΙ|ΤΙΚ|ΤΙΜ|ΤΙΠΟΤΑ|ΤΙΠΟΤΕ|'
+ . 'ΤΙΣ|ΤΝΤ|ΤΟ|ΤΟΙ|ΤΟΚ|ΤΟΜ|ΤΟΝ|ΤΟΠ|ΤΟΣ|ΤΟΣ?Ν|ΤΟΣΑ|ΤΟΣΕΣ|ΤΟΣΗ|ΤΟΣΗΝ|ΤΟΣΗΣ|ΤΟΣΟ|ΤΟΣΟΙ|ΤΟΣΟΝ|ΤΟΣΟΣ|ΤΟΣΟΥ|ΤΟΣΟΥΣ|ΤΟΤΕ|ΤΟΥ|ΤΟΥΛΑΧΙΣΤΟ|'
+ . 'ΤΟΥΛΑΧΙΣΤΟΝ|ΤΟΥΣ|ΤΣ|ΤΣΑ|ΤΣΕ|ΤΥΧΟΝ|ΤΩ|ΤΩΝ|ΤΩΡΑ|ΥΑΣ|ΥΒΑ|ΥΒΟ|ΥΙΕ|ΥΙΟ|ΥΛΑ|ΥΛΗ|ΥΝΙ|ΥΠ|ΥΠΕΡ|ΥΠΟ|ΥΠΟΨΗ|ΥΠΟΨΙΝ|ΥΣΤΕΡΑ|ΥΦΗ|ΥΨΗ|ΦΑ|ΦΑΐ|ΦΑΕ|'
+ . 'ΦΑΝ|ΦΑΞ|ΦΑΣ|ΦΑΩ|ΦΕΖ|ΦΕΙ|ΦΕΤΟΣ|ΦΕΥ|ΦΙ|ΦΙΛ|ΦΙΣ|ΦΟΞ|ΦΠΑ|ΦΡΙ|ΧΑ|ΧΑΗ|ΧΑΛ|ΧΑΝ|ΧΑΦ|ΧΕ|ΧΕΙ|ΧΘΕΣ|ΧΙ|ΧΙΑ|ΧΙΛ|ΧΙΟ|ΧΛΜ|ΧΜ|ΧΟΗ|ΧΟΛ|ΧΡΩ|ΧΤΕΣ|'
+ . 'ΧΩΡΙΣ|ΧΩΡΙΣΤΑ|ΨΕΣ|ΨΗΛΑ|ΨΙ|ΨΙΤ|Ω|ΩΑ|ΩΑΣ|ΩΔΕ|ΩΕΣ|ΩΘΩ|ΩΜΑ|ΩΜΕ|ΩΝ|ΩΟ|ΩΟΝ|ΩΟΥ|ΩΣ|ΩΣΑΝ|ΩΣΗ|ΩΣΟΤΟΥ|ΩΣΠΟΥ|ΩΣΤΕ|ΩΣΤΟΣΟ|ΩΤΑ|ΩΧ|ΩΩΝ)$/';
+
+ if (preg_match($stop_words, $token))
+ {
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Vowels
+ $v = '(Α|Ε|Η|Ι|Ο|Υ|Ω)';
+
+ // Vowels without Y
+ $v2 = '(Α|Ε|Η|Ι|Ο|Ω)';
+
+ $test1 = true;
+
+ // Step S1. 14 stems
+ $re = '/^(.+?)(ΙΖΑ|ΙΖΕΣ|ΙΖΕ|ΙΖΑΜΕ|ΙΖΑΤΕ|ΙΖΑΝ|ΙΖΑΝΕ|ΙΖΩ|ΙΖΕΙΣ|ΙΖΕΙ|ΙΖΟΥΜΕ|ΙΖΕΤΕ|ΙΖΟΥΝ|ΙΖΟΥΝΕ)$/';
+ $exceptS1 = '/^(ΑΝΑΜΠΑ|ΕΜΠΑ|ΕΠΑ|ΞΑΝΑΠΑ|ΠΑ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ)$/';
+ $exceptS2 = '/^(ΜΑΡΚ|ΚΟΡΝ|ΑΜΠΑΡ|ΑΡΡ|ΒΑΘΥΡΙ|ΒΑΡΚ|Β|ΒΟΛΒΟΡ|ΓΚΡ|ΓΛΥΚΟΡ|ΓΛΥΚΥΡ|ΙΜΠ|Λ|ΛΟΥ|ΜΑΡ|Μ|ΠΡ|ΜΠΡ|ΠΟΛΥΡ|Π|Ρ|ΠΙΠΕΡΟΡ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . 'I';
+ }
+
+ if (preg_match($exceptS2, $token))
+ {
+ $token = $token . 'IΖ';
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step S2. 7 stems
+ $re = '/^(.+?)(ΩΘΗΚΑ|ΩΘΗΚΕΣ|ΩΘΗΚΕ|ΩΘΗΚΑΜΕ|ΩΘΗΚΑΤΕ|ΩΘΗΚΑΝ|ΩΘΗΚΑΝΕ)$/';
+ $exceptS1 = '/^(ΑΛ|ΒΙ|ΕΝ|ΥΨ|ΛΙ|ΖΩ|Σ|Χ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . 'ΩΝ';
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step S3. 7 stems
+ $re = '/^(.+?)(ΙΣΑ|ΙΣΕΣ|ΙΣΕ|ΙΣΑΜΕ|ΙΣΑΤΕ|ΙΣΑΝ|ΙΣΑΝΕ)$/';
+ $exceptS1 = '/^(ΑΝΑΜΠΑ|ΑΘΡΟ|ΕΜΠΑ|ΕΣΕ|ΕΣΩΚΛΕ|ΕΠΑ|ΞΑΝΑΠΑ|ΕΠΕ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ|ΚΛΕ|ΧΑΡΤΟΠΑ|ΕΞΑΡΧΑ|ΜΕΤΕΠΕ|ΑΠΟΚΛΕ|ΑΠΕΚΛΕ|ΕΚΛΕ|ΠΕ|ΠΕΡΙΠΑ)$/';
+ $exceptS2 = '/^(ΑΝ|ΑΦ|ΓΕ|ΓΙΓΑΝΤΟΑΦ|ΓΚΕ|ΔΗΜΟΚΡΑΤ|ΚΟΜ|ΓΚ|Μ|Π|ΠΟΥΚΑΜ|ΟΛΟ|ΛΑΡ)$/';
+
+ if ($token == "ΙΣΑ")
+ {
+ $token = "ΙΣ";
+
+ return $token;
+ }
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . 'Ι';
+ }
+
+ if (preg_match($exceptS2, $token))
+ {
+ $token = $token . 'ΙΣ';
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+
+ // Step S4. 7 stems
+ $re = '/^(.+?)(ΙΣΩ|ΙΣΕΙΣ|ΙΣΕΙ|ΙΣΟΥΜΕ|ΙΣΕΤΕ|ΙΣΟΥΝ|ΙΣΟΥΝΕ)$/';
+ $exceptS1 = '/^(ΑΝΑΜΠΑ|ΕΜΠΑ|ΕΣΕ|ΕΣΩΚΛΕ|ΕΠΑ|ΞΑΝΑΠΑ|ΕΠΕ|ΠΕΡΙΠΑ|ΑΘΡΟ|ΣΥΝΑΘΡΟ|ΔΑΝΕ|ΚΛΕ|ΧΑΡΤΟΠΑ|ΕΞΑΡΧΑ|ΜΕΤΕΠΕ|ΑΠΟΚΛΕ|ΑΠΕΚΛΕ|ΕΚΛΕ|ΠΕ|ΠΕΡΙΠΑ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . 'Ι';
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step S5. 11 stems
+ $re = '/^(.+?)(ΙΣΤΟΣ|ΙΣΤΟΥ|ΙΣΤΟ|ΙΣΤΕ|ΙΣΤΟΙ|ΙΣΤΩΝ|ΙΣΤΟΥΣ|ΙΣΤΗ|ΙΣΤΗΣ|ΙΣΤΑ|ΙΣΤΕΣ)$/';
+ $exceptS1 = '/^(Μ|Π|ΑΠ|ΑΡ|ΗΔ|ΚΤ|ΣΚ|ΣΧ|ΥΨ|ΦΑ|ΧΡ|ΧΤ|ΑΚΤ|ΑΟΡ|ΑΣΧ|ΑΤΑ|ΑΧΝ|ΑΧΤ|ΓΕΜ|ΓΥΡ|ΕΜΠ|ΕΥΠ|ΕΧΘ|ΗΦΑ|ΚΑΘ|ΚΑΚ|ΚΥΛ|ΛΥΓ|ΜΑΚ|ΜΕΓ|ΤΑΧ|ΦΙΛ|ΧΩΡ)$/';
+ $exceptS2 = '/^(ΔΑΝΕ|ΣΥΝΑΘΡΟ|ΚΛΕ|ΣΕ|ΕΣΩΚΛΕ|ΑΣΕ|ΠΛΕ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . 'ΙΣΤ';
+ }
+
+ if (preg_match($exceptS2, $token))
+ {
+ $token = $token . 'Ι';
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step S6. 6 stems
+ $re = '/^(.+?)(ΙΣΜΟ|ΙΣΜΟΙ|ΙΣΜΟΣ|ΙΣΜΟΥ|ΙΣΜΟΥΣ|ΙΣΜΩΝ)$/';
+ $exceptS1 = '/^(ΑΓΝΩΣΤΙΚ|ΑΤΟΜΙΚ|ΓΝΩΣΤΙΚ|ΕΘΝΙΚ|ΕΚΛΕΚΤΙΚ|ΣΚΕΠΤΙΚ|ΤΟΠΙΚ)$/';
+ $exceptS2 = '/^(ΣΕ|ΜΕΤΑΣΕ|ΜΙΚΡΟΣΕ|ΕΓΚΛΕ|ΑΠΟΚΛΕ)$/';
+ $exceptS3 = '/^(ΔΑΝΕ|ΑΝΤΙΔΑΝΕ)$/';
+ $exceptS4 = '/^(ΑΛΕΞΑΝΔΡΙΝ|ΒΥΖΑΝΤΙΝ|ΘΕΑΤΡΙΝ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = str_replace('ΙΚ', "", $token);
+ }
+
+ if (preg_match($exceptS2, $token))
+ {
+ $token = $token . "ΙΣΜ";
+ }
+
+ if (preg_match($exceptS3, $token))
+ {
+ $token = $token . "Ι";
+ }
+
+ if (preg_match($exceptS4, $token))
+ {
+ $token = str_replace('ΙΝ', "", $token);
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step S7. 4 stems
+ $re = '/^(.+?)(ΑΡΑΚΙ|ΑΡΑΚΙΑ|ΟΥΔΑΚΙ|ΟΥΔΑΚΙΑ)$/';
+ $exceptS1 = '/^(Σ|Χ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . "AΡΑΚ";
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+
+ // Step S8. 8 stems
+ $re = '/^(.+?)(ΑΚΙ|ΑΚΙΑ|ΙΤΣΑ|ΙΤΣΑΣ|ΙΤΣΕΣ|ΙΤΣΩΝ|ΑΡΑΚΙ|ΑΡΑΚΙΑ)$/';
+ $exceptS1 = '/^(ΑΝΘΡ|ΒΑΜΒ|ΒΡ|ΚΑΙΜ|ΚΟΝ|ΚΟΡ|ΛΑΒΡ|ΛΟΥΛ|ΜΕΡ|ΜΟΥΣΤ|ΝΑΓΚΑΣ|ΠΛ|Ρ|ΡΥ|Σ|ΣΚ|ΣΟΚ|ΣΠΑΝ|ΤΖ|ΦΑΡΜ|Χ|'
+ . 'ΚΑΠΑΚ|ΑΛΙΣΦ|ΑΜΒΡ|ΑΝΘΡ|Κ|ΦΥΛ|ΚΑΤΡΑΠ|ΚΛΙΜ|ΜΑΛ|ΣΛΟΒ|Φ|ΣΦ|ΤΣΕΧΟΣΛΟΒ)$/';
+ $exceptS2 = '/^(Β|ΒΑΛ|ΓΙΑΝ|ΓΛ|Ζ|ΗΓΟΥΜΕΝ|ΚΑΡΔ|ΚΟΝ|ΜΑΚΡΥΝ|ΝΥΦ|ΠΑΤΕΡ|Π|ΣΚ|ΤΟΣ|ΤΡΙΠΟΛ)$/';
+
+ // For words like ΠΛΟΥΣΙΟΚΟΡΙΤΣΑ, ΠΑΛΙΟΚΟΡΙΤΣΑ etc
+ $exceptS3 = '/(ΚΟΡ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . "ΑΚ";
+ }
+
+ if (preg_match($exceptS2, $token))
+ {
+ $token = $token . "ΙΤΣ";
+ }
+
+ if (preg_match($exceptS3, $token))
+ {
+ $token = $token . "ΙΤΣ";
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step S9. 3 stems
+ $re = '/^(.+?)(ΙΔΙΟ|ΙΔΙΑ|ΙΔΙΩΝ)$/';
+ $exceptS1 = '/^(ΑΙΦΝ|ΙΡ|ΟΛΟ|ΨΑΛ)$/';
+ $exceptS2 = '/(Ε|ΠΑΙΧΝ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . "ΙΔ";
+ }
+
+ if (preg_match($exceptS2, $token))
+ {
+ $token = $token . "ΙΔ";
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step S10. 4 stems
+ $re = '/^(.+?)(ΙΣΚΟΣ|ΙΣΚΟΥ|ΙΣΚΟ|ΙΣΚΕ)$/';
+ $exceptS1 = '/^(Δ|ΙΒ|ΜΗΝ|Ρ|ΦΡΑΓΚ|ΛΥΚ|ΟΒΕΛ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+
+ if (preg_match($exceptS1, $token))
+ {
+ $token = $token . "ΙΣΚ";
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ // Step 1
+ // step1list is used in Step 1. 41 stems
+ $step1list = Array();
+ $step1list["ΦΑΓΙΑ"] = "ΦΑ";
+ $step1list["ΦΑΓΙΟΥ"] = "ΦΑ";
+ $step1list["ΦΑΓΙΩΝ"] = "ΦΑ";
+ $step1list["ΣΚΑΓΙΑ"] = "ΣΚΑ";
+ $step1list["ΣΚΑΓΙΟΥ"] = "ΣΚΑ";
+ $step1list["ΣΚΑΓΙΩΝ"] = "ΣΚΑ";
+ $step1list["ΟΛΟΓΙΟΥ"] = "ΟΛΟ";
+ $step1list["ΟΛΟΓΙΑ"] = "ΟΛΟ";
+ $step1list["ΟΛΟΓΙΩΝ"] = "ΟΛΟ";
+ $step1list["ΣΟΓΙΟΥ"] = "ΣΟ";
+ $step1list["ΣΟΓΙΑ"] = "ΣΟ";
+ $step1list["ΣΟΓΙΩΝ"] = "ΣΟ";
+ $step1list["ΤΑΤΟΓΙΑ"] = "ΤΑΤΟ";
+ $step1list["ΤΑΤΟΓΙΟΥ"] = "ΤΑΤΟ";
+ $step1list["ΤΑΤΟΓΙΩΝ"] = "ΤΑΤΟ";
+ $step1list["ΚΡΕΑΣ"] = "ΚΡΕ";
+ $step1list["ΚΡΕΑΤΟΣ"] = "ΚΡΕ";
+ $step1list["ΚΡΕΑΤΑ"] = "ΚΡΕ";
+ $step1list["ΚΡΕΑΤΩΝ"] = "ΚΡΕ";
+ $step1list["ΠΕΡΑΣ"] = "ΠΕΡ";
+ $step1list["ΠΕΡΑΤΟΣ"] = "ΠΕΡ";
+
+ // Added by Spyros. Also at $re in step1
+ $step1list["ΠΕΡΑΤΗ"] = "ΠΕΡ";
+ $step1list["ΠΕΡΑΤΑ"] = "ΠΕΡ";
+ $step1list["ΠΕΡΑΤΩΝ"] = "ΠΕΡ";
+ $step1list["ΤΕΡΑΣ"] = "ΤΕΡ";
+ $step1list["ΤΕΡΑΤΟΣ"] = "ΤΕΡ";
+ $step1list["ΤΕΡΑΤΑ"] = "ΤΕΡ";
+ $step1list["ΤΕΡΑΤΩΝ"] = "ΤΕΡ";
+ $step1list["ΦΩΣ"] = "ΦΩ";
+ $step1list["ΦΩΤΟΣ"] = "ΦΩ";
+ $step1list["ΦΩΤΑ"] = "ΦΩ";
+ $step1list["ΦΩΤΩΝ"] = "ΦΩ";
+ $step1list["ΚΑΘΕΣΤΩΣ"] = "ΚΑΘΕΣΤ";
+ $step1list["ΚΑΘΕΣΤΩΤΟΣ"] = "ΚΑΘΕΣΤ";
+ $step1list["ΚΑΘΕΣΤΩΤΑ"] = "ΚΑΘΕΣΤ";
+ $step1list["ΚΑΘΕΣΤΩΤΩΝ"] = "ΚΑΘΕΣΤ";
+ $step1list["ΓΕΓΟΝΟΣ"] = "ΓΕΓΟΝ";
+ $step1list["ΓΕΓΟΝΟΤΟΣ"] = "ΓΕΓΟΝ";
+ $step1list["ΓΕΓΟΝΟΤΑ"] = "ΓΕΓΟΝ";
+ $step1list["ΓΕΓΟΝΟΤΩΝ"] = "ΓΕΓΟΝ";
+
+ $re = '/(.*)(ΦΑΓΙΑ|ΦΑΓΙΟΥ|ΦΑΓΙΩΝ|ΣΚΑΓΙΑ|ΣΚΑΓΙΟΥ|ΣΚΑΓΙΩΝ|ΟΛΟΓΙΟΥ|ΟΛΟΓΙΑ|ΟΛΟΓΙΩΝ|ΣΟΓΙΟΥ|ΣΟΓΙΑ|ΣΟΓΙΩΝ|ΤΑΤΟΓΙΑ|ΤΑΤΟΓΙΟΥ|ΤΑΤΟΓΙΩΝ|ΚΡΕΑΣ|ΚΡΕΑΤΟΣ|'
+ . 'ΚΡΕΑΤΑ|ΚΡΕΑΤΩΝ|ΠΕΡΑΣ|ΠΕΡΑΤΟΣ|ΠΕΡΑΤΗ|ΠΕΡΑΤΑ|ΠΕΡΑΤΩΝ|ΤΕΡΑΣ|ΤΕΡΑΤΟΣ|ΤΕΡΑΤΑ|ΤΕΡΑΤΩΝ|ΦΩΣ|ΦΩΤΟΣ|ΦΩΤΑ|ΦΩΤΩΝ|ΚΑΘΕΣΤΩΣ|ΚΑΘΕΣΤΩΤΟΣ|'
+ . 'ΚΑΘΕΣΤΩΤΑ|ΚΑΘΕΣΤΩΤΩΝ|ΓΕΓΟΝΟΣ|ΓΕΓΟΝΟΤΟΣ|ΓΕΓΟΝΟΤΑ|ΓΕΓΟΝΟΤΩΝ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $stem = $match[1];
+ $suffix = $match[2];
+ $token = $stem . (array_key_exists($suffix, $step1list) ? $step1list[$suffix] : '');
+ $test1 = false;
+ }
+
+ // Step 2a. 2 stems
+ $re = '/^(.+?)(ΑΔΕΣ|ΑΔΩΝ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1];
+ $re = '/(ΟΚ|ΜΑΜ|ΜΑΝ|ΜΠΑΜΠ|ΠΑΤΕΡ|ΓΙΑΓΙ|ΝΤΑΝΤ|ΚΥΡ|ΘΕΙ|ΠΕΘΕΡ)$/';
+
+ if (!preg_match($re, $token))
+ {
+ $token = $token . "ΑΔ";
+ }
+ }
+
+ // Step 2b. 2 stems
+ $re = '/^(.+?)(ΕΔΕΣ|ΕΔΩΝ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $exept2 = '/(ΟΠ|ΙΠ|ΕΜΠ|ΥΠ|ΓΗΠ|ΔΑΠ|ΚΡΑΣΠ|ΜΙΛ)$/';
+
+ if (preg_match($exept2, $token))
+ {
+ $token = $token . 'ΕΔ';
+ }
+ }
+
+ // Step 2c
+ $re = '/^(.+?)(ΟΥΔΕΣ|ΟΥΔΩΝ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+
+ $exept3 = '/(ΑΡΚ|ΚΑΛΙΑΚ|ΠΕΤΑΛ|ΛΙΧ|ΠΛΕΞ|ΣΚ|Σ|ΦΛ|ΦΡ|ΒΕΛ|ΛΟΥΛ|ΧΝ|ΣΠ|ΤΡΑΓ|ΦΕ)$/';
+
+ if (preg_match($exept3, $token))
+ {
+ $token = $token . 'ΟΥΔ';
+ }
+ }
+
+ // Step 2d
+ $re = '/^(.+?)(ΕΩΣ|ΕΩΝ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept4 = '/^(Θ|Δ|ΕΛ|ΓΑΛ|Ν|Π|ΙΔ|ΠΑΡ)$/';
+
+ if (preg_match($exept4, $token))
+ {
+ $token = $token . 'Ε';
+ }
+
+ }
+
+ // Step 3
+ $re = '/^(.+?)(ΙΑ|ΙΟΥ|ΙΩΝ)$/';
+
+ if (preg_match($re, $token, $fp))
+ {
+ $stem = $fp[1];
+ $token = $stem;
+ $re = '/' . $v . '$/';
+ $test1 = false;
+
+ if (preg_match($re, $token))
+ {
+ $token = $stem . 'Ι';
+ }
+ }
+
+ // Step 4
+ $re = '/^(.+?)(ΙΚΑ|ΙΚΟ|ΙΚΟΥ|ΙΚΩΝ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $re = '/' . $v . '$/';
+ $exept5 = '/^(ΑΛ|ΑΔ|ΕΝΔ|ΑΜΑΝ|ΑΜΜΟΧΑΛ|ΗΘ|ΑΝΗΘ|ΑΝΤΙΔ|ΦΥΣ|ΒΡΩΜ|ΓΕΡ|ΕΞΩΔ|ΚΑΛΠ|ΚΑΛΛΙΝ|ΚΑΤΑΔ|ΜΟΥΛ|ΜΠΑΝ|ΜΠΑΓΙΑΤ|ΜΠΟΛ|ΜΠΟΣ|ΝΙΤ|ΞΙΚ|ΣΥΝΟΜΗΛ|ΠΕΤΣ|'
+ . 'ΠΙΤΣ|ΠΙΚΑΝΤ|ΠΛΙΑΤΣ|ΠΟΣΤΕΛΝ|ΠΡΩΤΟΔ|ΣΕΡΤ|ΣΥΝΑΔ|ΤΣΑΜ|ΥΠΟΔ|ΦΙΛΟΝ|ΦΥΛΟΔ|ΧΑΣ)$/';
+
+ if (preg_match($re, $token) || preg_match($exept5, $token))
+ {
+ $token = $token . 'ΙΚ';
+ }
+ }
+
+ // Step 5a
+ $re = '/^(.+?)(ΑΜΕ)$/';
+ $re2 = '/^(.+?)(ΑΓΑΜΕ|ΗΣΑΜΕ|ΟΥΣΑΜΕ|ΗΚΑΜΕ|ΗΘΗΚΑΜΕ)$/';
+
+ if ($token == "ΑΓΑΜΕ")
+ {
+ $token = "ΑΓΑΜ";
+
+ }
+
+ if (preg_match($re2, $token))
+ {
+ preg_match($re2, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ }
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept6 = '/^(ΑΝΑΠ|ΑΠΟΘ|ΑΠΟΚ|ΑΠΟΣΤ|ΒΟΥΒ|ΞΕΘ|ΟΥΛ|ΠΕΘ|ΠΙΚΡ|ΠΟΤ|ΣΙΧ|Χ)$/';
+
+ if (preg_match($exept6, $token))
+ {
+ $token = $token . "ΑΜ";
+ }
+ }
+
+ // Step 5b
+ $re2 = '/^(.+?)(ΑΝΕ)$/';
+ $re3 = '/^(.+?)(ΑΓΑΝΕ|ΗΣΑΝΕ|ΟΥΣΑΝΕ|ΙΟΝΤΑΝΕ|ΙΟΤΑΝΕ|ΙΟΥΝΤΑΝΕ|ΟΝΤΑΝΕ|ΟΤΑΝΕ|ΟΥΝΤΑΝΕ|ΗΚΑΝΕ|ΗΘΗΚΑΝΕ)$/';
+
+ if (preg_match($re3, $token))
+ {
+ preg_match($re3, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $re3 = '/^(ΤΡ|ΤΣ)$/';
+
+ if (preg_match($re3, $token))
+ {
+ $token = $token . "ΑΓΑΝ";
+ }
+ }
+
+ if (preg_match($re2, $token))
+ {
+ preg_match($re2, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $re2 = '/' . $v2 . '$/';
+ $exept7 = '/^(ΒΕΤΕΡ|ΒΟΥΛΚ|ΒΡΑΧΜ|Γ|ΔΡΑΔΟΥΜ|Θ|ΚΑΛΠΟΥΖ|ΚΑΣΤΕΛ|ΚΟΡΜΟΡ|ΛΑΟΠΛ|ΜΩΑΜΕΘ|Μ|ΜΟΥΣΟΥΛΜ|Ν|ΟΥΛ|Π|ΠΕΛΕΚ|ΠΛ|ΠΟΛΙΣ|ΠΟΡΤΟΛ|ΣΑΡΑΚΑΤΣ|ΣΟΥΛΤ|'
+ . 'ΤΣΑΡΛΑΤ|ΟΡΦ|ΤΣΙΓΓ|ΤΣΟΠ|ΦΩΤΟΣΤΕΦ|Χ|ΨΥΧΟΠΛ|ΑΓ|ΟΡΦ|ΓΑΛ|ΓΕΡ|ΔΕΚ|ΔΙΠΛ|ΑΜΕΡΙΚΑΝ|ΟΥΡ|ΠΙΘ|ΠΟΥΡΙΤ|Σ|ΖΩΝΤ|ΙΚ|ΚΑΣΤ|ΚΟΠ|ΛΙΧ|ΛΟΥΘΗΡ|ΜΑΙΝΤ|'
+ . 'ΜΕΛ|ΣΙΓ|ΣΠ|ΣΤΕΓ|ΤΡΑΓ|ΤΣΑΓ|Φ|ΕΡ|ΑΔΑΠ|ΑΘΙΓΓ|ΑΜΗΧ|ΑΝΙΚ|ΑΝΟΡΓ|ΑΠΗΓ|ΑΠΙΘ|ΑΤΣΙΓΓ|ΒΑΣ|ΒΑΣΚ|ΒΑΘΥΓΑΛ|ΒΙΟΜΗΧ|ΒΡΑΧΥΚ|ΔΙΑΤ|ΔΙΑΦ|ΕΝΟΡΓ|'
+ . 'ΘΥΣ|ΚΑΠΝΟΒΙΟΜΗΧ|ΚΑΤΑΓΑΛ|ΚΛΙΒ|ΚΟΙΛΑΡΦ|ΛΙΒ|ΜΕΓΛΟΒΙΟΜΗΧ|ΜΙΚΡΟΒΙΟΜΗΧ|ΝΤΑΒ|ΞΗΡΟΚΛΙΒ|ΟΛΙΓΟΔΑΜ|ΟΛΟΓΑΛ|ΠΕΝΤΑΡΦ|ΠΕΡΗΦ|ΠΕΡΙΤΡ|ΠΛΑΤ|'
+ . 'ΠΟΛΥΔΑΠ|ΠΟΛΥΜΗΧ|ΣΤΕΦ|ΤΑΒ|ΤΕΤ|ΥΠΕΡΗΦ|ΥΠΟΚΟΠ|ΧΑΜΗΛΟΔΑΠ|ΨΗΛΟΤΑΒ)$/';
+
+
+ if (preg_match($re2, $token) || preg_match($exept7, $token))
+ {
+ $token = $token . "ΑΝ";
+ }
+ }
+
+ // Step 5c
+ $re3 = '/^(.+?)(ΕΤΕ)$/';
+ $re4 = '/^(.+?)(ΗΣΕΤΕ)$/';
+
+ if (preg_match($re4, $token))
+ {
+ preg_match($re4, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ }
+
+ if (preg_match($re3, $token))
+ {
+ preg_match($re3, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $re3 = '/' . $v2 . '$/';
+ $exept8 = '/(ΟΔ|ΑΙΡ|ΦΟΡ|ΤΑΘ|ΔΙΑΘ|ΣΧ|ΕΝΔ|ΕΥΡ|ΤΙΘ|ΥΠΕΡΘ|ΡΑΘ|ΕΝΘ|ΡΟΘ|ΣΘ|ΠΥΡ|ΑΙΝ|ΣΥΝΔ|ΣΥΝ|ΣΥΝΘ|ΧΩΡ|ΠΟΝ|ΒΡ|ΚΑΘ|ΕΥΘ|ΕΚΘ|ΝΕΤ|ΡΟΝ|ΑΡΚ|ΒΑΡ|ΒΟΛ|ΩΦΕΛ)$/';
+ $exept9 = '/^(ΑΒΑΡ|ΒΕΝ|ΕΝΑΡ|ΑΒΡ|ΑΔ|ΑΘ|ΑΝ|ΑΠΛ|ΒΑΡΟΝ|ΝΤΡ|ΣΚ|ΚΟΠ|ΜΠΟΡ|ΝΙΦ|ΠΑΓ|ΠΑΡΑΚΑΛ|ΣΕΡΠ|ΣΚΕΛ|ΣΥΡΦ|ΤΟΚ|Υ|Δ|ΕΜ|ΘΑΡΡ|Θ)$/';
+
+ if (preg_match($re3, $token) || preg_match($exept8, $token) || preg_match($exept9, $token))
+ {
+ $token = $token . "ΕΤ";
+ }
+ }
+
+ // Step 5d
+ $re = '/^(.+?)(ΟΝΤΑΣ|ΩΝΤΑΣ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept10 = '/^(ΑΡΧ)$/';
+ $exept11 = '/(ΚΡΕ)$/';
+
+ if (preg_match($exept10, $token))
+ {
+ $token = $token . "ΟΝΤ";
+ }
+
+ if (preg_match($exept11, $token))
+ {
+ $token = $token . "ΩΝΤ";
+ }
+ }
+
+ // Step 5e
+ $re = '/^(.+?)(ΟΜΑΣΤΕ|ΙΟΜΑΣΤΕ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept11 = '/^(ΟΝ)$/';
+
+ if (preg_match($exept11, $token))
+ {
+ $token = $token . "ΟΜΑΣΤ";
+ }
+ }
+
+ // Step 5f
+ $re = '/^(.+?)(ΕΣΤΕ)$/';
+ $re2 = '/^(.+?)(ΙΕΣΤΕ)$/';
+
+ if (preg_match($re2, $token))
+ {
+ preg_match($re2, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $re2 = '/^(Π|ΑΠ|ΣΥΜΠ|ΑΣΥΜΠ|ΑΚΑΤΑΠ|ΑΜΕΤΑΜΦ)$/';
+
+ if (preg_match($re2, $token))
+ {
+ $token = $token . "ΙΕΣΤ";
+ }
+ }
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept12 = '/^(ΑΛ|ΑΡ|ΕΚΤΕΛ|Ζ|Μ|Ξ|ΠΑΡΑΚΑΛ|ΑΡ|ΠΡΟ|ΝΙΣ)$/';
+
+ if (preg_match($exept12, $token))
+ {
+ $token = $token . "ΕΣΤ";
+ }
+ }
+
+ // Step 5g
+ $re = '/^(.+?)(ΗΚΑ|ΗΚΕΣ|ΗΚΕ)$/';
+ $re2 = '/^(.+?)(ΗΘΗΚΑ|ΗΘΗΚΕΣ|ΗΘΗΚΕ)$/';
+
+ if (preg_match($re2, $token))
+ {
+ preg_match($re2, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ }
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept13 = '/(ΣΚΩΛ|ΣΚΟΥΛ|ΝΑΡΘ|ΣΦ|ΟΘ|ΠΙΘ)$/';
+ $exept14 = '/^(ΔΙΑΘ|Θ|ΠΑΡΑΚΑΤΑΘ|ΠΡΟΣΘ|ΣΥΝΘ|)$/';
+
+ if (preg_match($exept13, $token) || preg_match($exept14, $token))
+ {
+ $token = $token . "ΗΚ";
+ }
+ }
+
+ // Step 5h
+ $re = '/^(.+?)(ΟΥΣΑ|ΟΥΣΕΣ|ΟΥΣΕ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept15 = '/^(ΦΑΡΜΑΚ|ΧΑΔ|ΑΓΚ|ΑΝΑΡΡ|ΒΡΟΜ|ΕΚΛΙΠ|ΛΑΜΠΙΔ|ΛΕΧ|Μ|ΠΑΤ|Ρ|Λ|ΜΕΔ|ΜΕΣΑΖ|ΥΠΟΤΕΙΝ|ΑΜ|ΑΙΘ|ΑΝΗΚ|ΔΕΣΠΟΖ|ΕΝΔΙΑΦΕΡ|ΔΕ|ΔΕΥΤΕΡΕΥ|ΚΑΘΑΡΕΥ|ΠΛΕ|ΤΣΑ)$/';
+ $exept16 = '/(ΠΟΔΑΡ|ΒΛΕΠ|ΠΑΝΤΑΧ|ΦΡΥΔ|ΜΑΝΤΙΛ|ΜΑΛΛ|ΚΥΜΑΤ|ΛΑΧ|ΛΗΓ|ΦΑΓ|ΟΜ|ΠΡΩΤ)$/';
+
+ if (preg_match($exept15, $token) || preg_match($exept16, $token))
+ {
+ $token = $token . "ΟΥΣ";
+ }
+ }
+
+ // Step 5i
+ $re = '/^(.+?)(ΑΓΑ|ΑΓΕΣ|ΑΓΕ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept17 = '/^(ΨΟΦ|ΝΑΥΛΟΧ)$/';
+ $exept20 = '/(ΚΟΛΛ)$/';
+ $exept18 = '/^(ΑΒΑΣΤ|ΠΟΛΥΦ|ΑΔΗΦ|ΠΑΜΦ|Ρ|ΑΣΠ|ΑΦ|ΑΜΑΛ|ΑΜΑΛΛΙ|ΑΝΥΣΤ|ΑΠΕΡ|ΑΣΠΑΡ|ΑΧΑΡ|ΔΕΡΒΕΝ|ΔΡΟΣΟΠ|ΞΕΦ|ΝΕΟΠ|ΝΟΜΟΤ|ΟΛΟΠ|ΟΜΟΤ|ΠΡΟΣΤ|ΠΡΟΣΩΠΟΠ|'
+ . 'ΣΥΜΠ|ΣΥΝΤ|Τ|ΥΠΟΤ|ΧΑΡ|ΑΕΙΠ|ΑΙΜΟΣΤ|ΑΝΥΠ|ΑΠΟΤ|ΑΡΤΙΠ|ΔΙΑΤ|ΕΝ|ΕΠΙΤ|ΚΡΟΚΑΛΟΠ|ΣΙΔΗΡΟΠ|Λ|ΝΑΥ|ΟΥΛΑΜ|ΟΥΡ|Π|ΤΡ|Μ)$/';
+ $exept19 = '/(ΟΦ|ΠΕΛ|ΧΟΡΤ|ΛΛ|ΣΦ|ΡΠ|ΦΡ|ΠΡ|ΛΟΧ|ΣΜΗΝ)$/';
+
+ if ((preg_match($exept18, $token) || preg_match($exept19, $token))
+ && !(preg_match($exept17, $token) || preg_match($exept20, $token)))
+ {
+ $token = $token . "ΑΓ";
+ }
+ }
+
+
+ // Step 5j
+ $re = '/^(.+?)(ΗΣΕ|ΗΣΟΥ|ΗΣΑ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept21 = '/^(Ν|ΧΕΡΣΟΝ|ΔΩΔΕΚΑΝ|ΕΡΗΜΟΝ|ΜΕΓΑΛΟΝ|ΕΠΤΑΝ)$/';
+
+ if (preg_match($exept21, $token))
+ {
+ $token = $token . "ΗΣ";
+ }
+ }
+
+ // Step 5k
+ $re = '/^(.+?)(ΗΣΤΕ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept22 = '/^(ΑΣΒ|ΣΒ|ΑΧΡ|ΧΡ|ΑΠΛ|ΑΕΙΜΝ|ΔΥΣΧΡ|ΕΥΧΡ|ΚΟΙΝΟΧΡ|ΠΑΛΙΜΨ)$/';
+
+ if (preg_match($exept22, $token))
+ {
+ $token = $token . "ΗΣΤ";
+ }
+ }
+
+ // Step 5l
+ $re = '/^(.+?)(ΟΥΝΕ|ΗΣΟΥΝΕ|ΗΘΟΥΝΕ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept23 = '/^(Ν|Ρ|ΣΠΙ|ΣΤΡΑΒΟΜΟΥΤΣ|ΚΑΚΟΜΟΥΤΣ|ΕΞΩΝ)$/';
+
+ if (preg_match($exept23, $token))
+ {
+ $token = $token . "ΟΥΝ";
+ }
+ }
+
+ // Step 5m
+ $re = '/^(.+?)(ΟΥΜΕ|ΗΣΟΥΜΕ|ΗΘΟΥΜΕ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ $test1 = false;
+ $exept24 = '/^(ΠΑΡΑΣΟΥΣ|Φ|Χ|ΩΡΙΟΠΛ|ΑΖ|ΑΛΛΟΣΟΥΣ|ΑΣΟΥΣ)$/';
+
+ if (preg_match($exept24, $token))
+ {
+ $token = $token . "ΟΥΜ";
+ }
+ }
+
+ // Step 6
+ $re = '/^(.+?)(ΜΑΤΑ|ΜΑΤΩΝ|ΜΑΤΟΣ)$/';
+ $re2 = '/^(.+?)(Α|ΑΓΑΤΕ|ΑΓΑΝ|ΑΕΙ|ΑΜΑΙ|ΑΝ|ΑΣ|ΑΣΑΙ|ΑΤΑΙ|ΑΩ|Ε|ΕΙ|ΕΙΣ|ΕΙΤΕ|ΕΣΑΙ|ΕΣ|ΕΤΑΙ|Ι|ΙΕΜΑΙ|ΙΕΜΑΣΤΕ|ΙΕΤΑΙ|ΙΕΣΑΙ|ΙΕΣΑΣΤΕ|ΙΟΜΑΣΤΑΝ|ΙΟΜΟΥΝ|'
+ . 'ΙΟΜΟΥΝΑ|ΙΟΝΤΑΝ|ΙΟΝΤΟΥΣΑΝ|ΙΟΣΑΣΤΑΝ|ΙΟΣΑΣΤΕ|ΙΟΣΟΥΝ|ΙΟΣΟΥΝΑ|ΙΟΤΑΝ|ΙΟΥΜΑ|ΙΟΥΜΑΣΤΕ|ΙΟΥΝΤΑΙ|ΙΟΥΝΤΑΝ|Η|ΗΔΕΣ|ΗΔΩΝ|ΗΘΕΙ|ΗΘΕΙΣ|ΗΘΕΙΤΕ|'
+ . 'ΗΘΗΚΑΤΕ|ΗΘΗΚΑΝ|ΗΘΟΥΝ|ΗΘΩ|ΗΚΑΤΕ|ΗΚΑΝ|ΗΣ|ΗΣΑΝ|ΗΣΑΤΕ|ΗΣΕΙ|ΗΣΕΣ|ΗΣΟΥΝ|ΗΣΩ|Ο|ΟΙ|ΟΜΑΙ|ΟΜΑΣΤΑΝ|ΟΜΟΥΝ|ΟΜΟΥΝΑ|ΟΝΤΑΙ|ΟΝΤΑΝ|ΟΝΤΟΥΣΑΝ|ΟΣ|'
+ . 'ΟΣΑΣΤΑΝ|ΟΣΑΣΤΕ|ΟΣΟΥΝ|ΟΣΟΥΝΑ|ΟΤΑΝ|ΟΥ|ΟΥΜΑΙ|ΟΥΜΑΣΤΕ|ΟΥΝ|ΟΥΝΤΑΙ|ΟΥΝΤΑΝ|ΟΥΣ|ΟΥΣΑΝ|ΟΥΣΑΤΕ|Υ|ΥΣ|Ω|ΩΝ)$/';
+
+ if (preg_match($re, $token, $match))
+ {
+ $token = $match[1] . "ΜΑ";
+ }
+
+ if (preg_match($re2, $token) && $test1)
+ {
+ preg_match($re2, $token, $match);
+ $token = $match[1];
+ }
+
+ // Step 7 (ΠΑΡΑΘΕΤΙΚΑ)
+ $re = '/^(.+?)(ΕΣΤΕΡ|ΕΣΤΑΤ|ΟΤΕΡ|ΟΤΑΤ|ΥΤΕΡ|ΥΤΑΤ|ΩΤΕΡ|ΩΤΑΤ)$/';
+
+ if (preg_match($re, $token))
+ {
+ preg_match($re, $token, $match);
+ $token = $match[1];
+ }
+
+ return $this->toLowerCase($token, $w_CASE);
+ }
+
+ /**
+ * Converts the token to uppercase, suppressing accents and diaeresis. The array $w_CASE contains a special map of
+ * the uppercase rule used to convert each character at each position.
+ *
+ * @param string $token Token to process
+ * @param array &$w_CASE Map of uppercase rules
+ *
+ * @return string
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ protected function toUpperCase($token, &$w_CASE)
+ {
+ $w_CASE = array_fill(0, mb_strlen($token, 'UTF-8'), 0);
+ $caseConvert = array(
+ "α" => 'Α',
+ "β" => 'Β',
+ "γ" => 'Γ',
+ "δ" => 'Δ',
+ "ε" => 'Ε',
+ "ζ" => 'Ζ',
+ "η" => 'Η',
+ "θ" => 'Θ',
+ "ι" => 'Ι',
+ "κ" => 'Κ',
+ "λ" => 'Λ',
+ "μ" => 'Μ',
+ "ν" => 'Ν',
+ "ξ" => 'Ξ',
+ "ο" => 'Ο',
+ "π" => 'Π',
+ "ρ" => 'Ρ',
+ "σ" => 'Σ',
+ "τ" => 'Τ',
+ "υ" => 'Υ',
+ "φ" => 'Φ',
+ "χ" => 'Χ',
+ "ψ" => 'Ψ',
+ "ω" => 'Ω',
+ "ά" => 'Α',
+ "έ" => 'Ε',
+ "ή" => 'Η',
+ "ί" => 'Ι',
+ "ό" => 'Ο',
+ "ύ" => 'Υ',
+ "ώ" => 'Ω',
+ "ς" => 'Σ',
+ "ϊ" => 'Ι',
+ "ϋ" => 'Ι',
+ "ΐ" => 'Ι',
+ "ΰ" => 'Υ',
+ );
+ $newToken = '';
+
+ for ($i = 0; $i < mb_strlen($token); $i++)
+ {
+ $char = mb_substr($token, $i, 1);
+ $isLower = array_key_exists($char, $caseConvert);
+
+ if (!$isLower)
+ {
+ $newToken .= $char;
+
+ continue;
+ }
+
+ $upperCase = $caseConvert[$char];
+ $newToken .= $upperCase;
+
+ $w_CASE[$i] = 1;
+
+ if (in_array($char, ['ά', 'έ', 'ή', 'ί', 'ό', 'ύ', 'ώ', 'ς']))
+ {
+ $w_CASE[$i] = 2;
+ }
+
+ if (in_array($char, ['ϊ', 'ϋ']))
+ {
+ $w_CASE[$i] = 3;
+ }
+
+ if (in_array($char, ['ΐ', 'ΰ']))
+ {
+ $w_CASE[$i] = 4;
+ }
+ }
+
+ return $newToken;
+ }
+
+ /**
+ * Converts the suppressed uppercase token back to lowercase, using the $w_CASE map to add back the accents,
+ * diaeresis and handle the special case of final sigma (different lowercase glyph than the regular sigma, only
+ * used at the end of words).
+ *
+ * @param string $token Token to process
+ * @param array $w_CASE Map of lowercase rules
+ *
+ * @return string
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ protected function toLowerCase($token, $w_CASE)
+ {
+ $newToken = '';
+
+ for ($i = 0; $i < mb_strlen($token); $i++)
+ {
+ $char = mb_substr($token, $i, 1);
+
+ // Is $w_CASE not set at this position? We assume no case conversion ever took place.
+ if (!isset($w_CASE[$i]))
+ {
+ $newToken .= $char;
+
+ continue;
+ }
+
+ // The character was not case-converted
+ if ($w_CASE[$i] == 0)
+ {
+ $newToken .= $char;
+
+ continue;
+ }
+
+ // Case 1: Unaccented letter
+ if ($w_CASE[$i] == 1)
+ {
+ $newToken .= mb_strtolower($char);
+
+ continue;
+ }
+
+ // Case 2: Vowel with accent (tonos); or the special case of final sigma
+ if ($w_CASE[$i] == 2)
+ {
+ $charMap = [
+ 'Α' => 'ά',
+ 'Ε' => 'έ',
+ 'Η' => 'ή',
+ 'Ι' => 'ί',
+ 'Ο' => 'ό',
+ 'Υ' => 'ύ',
+ 'Ω' => 'ώ',
+ 'Σ' => 'ς'
+ ];
+
+ $newToken .= $charMap[$char];
+
+ continue;
+ }
+
+ // Case 3: vowels with diaeresis (dialytika)
+ if ($w_CASE[$i] == 3)
+ {
+ $charMap = [
+ 'Ι' => 'ϊ',
+ 'Υ' => 'ϋ'
+ ];
+
+ $newToken .= $charMap[$char];
+
+ continue;
+ }
+
+ // Case 4: vowels with both diaeresis (dialytika) and accent (tonos)
+ if ($w_CASE[$i] == 4)
+ {
+ $charMap = [
+ 'Ι' => 'ΐ',
+ 'Υ' => 'ΰ'
+ ];
+
+ $newToken .= $charMap[$char];
+
+ continue;
+ }
+
+ // This should never happen!
+ $newToken .= $char;
+ }
+
+ return $newToken;
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/en.php b/administrator/components/com_finder/helpers/indexer/language/en.php
new file mode 100644
index 00000000000..43891ba0b2d
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/en.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\English;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/es.php b/administrator/components/com_finder/helpers/indexer/language/es.php
new file mode 100644
index 00000000000..6ce871c64e8
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/es.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Spanish;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/fr.php b/administrator/components/com_finder/helpers/indexer/language/fr.php
new file mode 100644
index 00000000000..5b28b92ebf3
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/fr.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\French;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/it.php b/administrator/components/com_finder/helpers/indexer/language/it.php
new file mode 100644
index 00000000000..44248c74f5c
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/it.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Italian;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/nl.php b/administrator/components/com_finder/helpers/indexer/language/nl.php
new file mode 100644
index 00000000000..c57a06a5568
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/nl.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Dutch;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/nn.php b/administrator/components/com_finder/helpers/indexer/language/nn.php
new file mode 100644
index 00000000000..d996f25f56c
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/nn.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Norwegian;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/pt.php b/administrator/components/com_finder/helpers/indexer/language/pt.php
new file mode 100644
index 00000000000..5075c1ce612
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/pt.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Portuguese;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/ro.php b/administrator/components/com_finder/helpers/indexer/language/ro.php
new file mode 100644
index 00000000000..954eb2bd8ba
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/ro.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Romanian;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/ru.php b/administrator/components/com_finder/helpers/indexer/language/ru.php
new file mode 100644
index 00000000000..87548c236f2
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/ru.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Russian;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/sv.php b/administrator/components/com_finder/helpers/indexer/language/sv.php
new file mode 100644
index 00000000000..6cb9295fbeb
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/sv.php
@@ -0,0 +1,58 @@
+stemmer = new \Wamania\Snowball\Swedish;
+ }
+
+ /**
+ * Method to stem a token.
+ *
+ * @param string $token The token to stem.
+ *
+ * @return string The stemmed token.
+ *
+ * @since __DEPLOY_VERSION__
+ */
+ public function stem($token)
+ {
+ return $this->stemmer->stem($token);
+ }
+}
diff --git a/administrator/components/com_finder/helpers/indexer/language/zh.php b/administrator/components/com_finder/helpers/indexer/language/zh.php
new file mode 100644
index 00000000000..c0797e54582
--- /dev/null
+++ b/administrator/components/com_finder/helpers/indexer/language/zh.php
@@ -0,0 +1,74 @@
+language = !empty($options['language']) ? $options['language'] : FinderIndexerHelper::getDefaultLanguage();
- $this->language = FinderIndexerHelper::getPrimaryLanguage($this->language);
// Get the matching mode.
$this->mode = 'AND';
@@ -995,7 +1002,7 @@ class FinderIndexerQuery
{
// Tokenize the current term.
$token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
- $token = $this->getTokenData($token);
+ $token = $this->getTokenData(array_shift($token));
// Set the required flag.
$token->required = true;
@@ -1009,7 +1016,7 @@ class FinderIndexerQuery
// Tokenize the term after the next term (current plus two).
$other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
- $other = $this->getTokenData($other);
+ $other = $this->getTokenData(array_shift($other));
// Set the required flag.
$other->required = true;
@@ -1147,7 +1154,7 @@ class FinderIndexerQuery
// Tokenize the next term (current plus one).
$other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
- $other = $this->getTokenData($other);
+ $other = $this->getTokenData(array_shift($other));
// Set the required flag.
$other->required = false;
@@ -1187,7 +1194,7 @@ class FinderIndexerQuery
{
// Tokenize the phrase.
$token = FinderIndexerHelper::tokenize($phrases[$i], $lang, true);
- $token = $this->getTokenData($token);
+ $token = $this->getTokenData(array_shift($token));
// Set the required flag.
$token->required = true;
diff --git a/administrator/components/com_finder/helpers/indexer/stemmer.php b/administrator/components/com_finder/helpers/indexer/stemmer.php
deleted file mode 100644
index 15ecc7cbb55..00000000000
--- a/administrator/components/com_finder/helpers/indexer/stemmer.php
+++ /dev/null
@@ -1,83 +0,0 @@
-clean($adapter, 'cmd');
- $path = __DIR__ . '/stemmer/' . $adapter . '.php';
- $class = 'FinderIndexerStemmer' . ucfirst($adapter);
-
- // Check if a stemmer exists for the adapter.
- if (!file_exists($path))
- {
- // Throw invalid adapter exception.
- throw new Exception(JText::sprintf('COM_FINDER_INDEXER_INVALID_STEMMER', $adapter));
- }
-
- // Instantiate the stemmer.
- JLoader::register($class, $path);
- $instances[$adapter] = new $class;
-
- return $instances[$adapter];
- }
-
- /**
- * Method to stem a token and return the root.
- *
- * @param string $token The token to stem.
- * @param string $lang The language of the token.
- *
- * @return string The root token.
- *
- * @since 2.5
- */
- abstract public function stem($token, $lang);
-}
diff --git a/administrator/components/com_finder/helpers/indexer/stemmer/fr.php b/administrator/components/com_finder/helpers/indexer/stemmer/fr.php
deleted file mode 100644
index 8aa080efc5f..00000000000
--- a/administrator/components/com_finder/helpers/indexer/stemmer/fr.php
+++ /dev/null
@@ -1,265 +0,0 @@
-cache[$lang][$token]))
- {
- // Stem the token.
- $result = self::getStem($token);
-
- // Add the token to the cache.
- $this->cache[$lang][$token] = $result;
- }
-
- return $this->cache[$lang][$token];
- }
-
- /**
- * French stemmer rules variables.
- *
- * @return array The rules
- *
- * @since 3.0
- */
- protected static function getStemRules()
- {
- if (self::$stemRules)
- {
- return self::$stemRules;
- }
-
- $vars = array();
-
- // French accented letters in ISO-8859-1 encoding
- $vars['accents'] = chr(224) . chr(226) . chr(232) . chr(233) . chr(234) . chr(235) . chr(238) . chr(239)
- . chr(244) . chr(251) . chr(249) . chr(231);
-
- // The rule patterns include all accented words for french language
- $vars['rule_pattern'] = '/^([a-z' . $vars['accents'] . ']*)(\*){0,1}(\d)([a-z' . $vars['accents'] . ']*)([.|>])/';
-
- // French vowels (including y) in ISO-8859-1 encoding
- $vars['vowels'] = chr(97) . chr(224) . chr(226) . chr(101) . chr(232) . chr(233) . chr(234) . chr(235)
- . chr(105) . chr(238) . chr(239) . chr(111) . chr(244) . chr(117) . chr(251) . chr(249) . chr(121);
-
- // The French rules in ISO-8859-1 encoding
- $vars['rules'] = array(
- 'esre1>', 'esio1>', 'siol1.', 'siof0.', 'sioe0.', 'sio3>', 'st1>', 'sf1>', 'sle1>', 'slo1>', 's' . chr(233) . '1>', chr(233) . 'tuae5.',
- chr(233) . 'tuae2.', 'tnia0.', 'tniv1.', 'tni3>', 'suor1.', 'suo0.', 'sdrail5.', 'sdrai4.', 'er' . chr(232) . 'i1>', 'sesue3x>',
- 'esuey5i.', 'esue2x>', 'se1>', 'er' . chr(232) . 'g3.', 'eca1>', 'esiah0.', 'esi1>', 'siss2.', 'sir2>', 'sit2>', 'egan' . chr(233) . '1.',
- 'egalli6>', 'egass1.', 'egas0.', 'egat3.', 'ega3>', 'ette4>', 'ett2>', 'etio1.', 'tio' . chr(231) . '4c.', 'tio0.', 'et1>', 'eb1>',
- 'snia1>', 'eniatnau8>', 'eniatn4.', 'enia1>', 'niatnio3.', 'niatg3.', 'e' . chr(233) . '1>', chr(233) . 'hcat1.', chr(233) . 'hca4.',
- chr(233) . 'tila5>', chr(233) . 'tici5.', chr(233) . 'tir1.', chr(233) . 'ti3>', chr(233) . 'gan1.', chr(233) . 'ga3>',
- chr(233) . 'tehc1.', chr(233) . 'te3>', chr(233) . 'it0.', chr(233) . '1>', 'eire4.', 'eirue5.', 'eio1.', 'eia1.', 'ei1>', 'eng1.',
- 'xuaessi7.', 'xuae1>', 'uaes0.', 'uae3.', 'xuave2l.', 'xuav2li>', 'xua3la>', 'ela1>', 'lart2.', 'lani2>', 'la' . chr(233) . '2>',
- 'siay4i.', 'siassia7.', 'siarv1*.', 'sia1>', 'tneiayo6i.', 'tneiay6i.', 'tneiassia9.', 'tneiareio7.', 'tneia5>', 'tneia4>', 'tiario4.',
- 'tiarim3.', 'tiaria3.', 'tiaris3.', 'tiari5.', 'tiarve6>', 'tiare5>', 'iare4>', 'are3>', 'tiay4i.', 'tia3>', 'tnay4i.',
- 'em' . chr(232) . 'iu5>', 'em' . chr(232) . 'i4>', 'tnaun3.', 'tnauqo3.', 'tnau4>', 'tnaf0.', 'tnat' . chr(233) . '2>', 'tna3>', 'tno3>',
- 'zeiy4i.', 'zey3i.', 'zeire5>', 'zeird4.', 'zeirio4.', 'ze2>', 'ssiab0.', 'ssia4.', 'ssi3.', 'tnemma6>', 'tnemesuey9i.', 'tnemesue8>',
- 'tnemevi7.', 'tnemessia5.', 'tnemessi8.', 'tneme5>', 'tnemia4.', 'tnem' . chr(233) . '5>', 'el2l>', 'lle3le>', 'let' . chr(244) . '0.',
- 'lepp0.', 'le2>', 'srei1>', 'reit3.', 'reila2.', 'rei3>', 'ert' . chr(226) . 'e5.', 'ert' . chr(226) . chr(233) . '1.',
- 'ert' . chr(226) . '4.', 'drai4.', 'erdro0.', 'erute5.', 'ruta0.', 'eruta1.', 'erutiov1.', 'erub3.', 'eruh3.', 'erul3.', 'er2r>', 'nn1>',
- 'r' . chr(232) . 'i3.', 'srev0.', 'sr1>', 'rid2>', 're2>', 'xuei4.', 'esuei5.', 'lbati3.', 'lba3>', 'rueis0.', 'ruehcn4.', 'ecirta6.',
- 'ruetai6.', 'rueta5.', 'rueir0.', 'rue3>', 'esseti6.', 'essere6>', 'esserd1.', 'esse4>', 'essiab1.', 'essia5.', 'essio1.', 'essi4.',
- 'essal4.', 'essa1>', 'ssab1.', 'essurp1.', 'essu4.', 'essi1.', 'ssor1.', 'essor2.', 'esso1>', 'ess2>', 'tio3.', 'r' . chr(232) . 's2re.',
- 'r' . chr(232) . '0e.', 'esn1.', 'eu1>', 'sua0.', 'su1>', 'utt1>', 'tu' . chr(231) . '3c.', 'u' . chr(231) . '2c.', 'ur1.', 'ehcn2>',
- 'ehcu1>', 'snorr3.', 'snoru3.', 'snorua3.', 'snorv3.', 'snorio4.', 'snori5.', 'snore5>', 'snortt4>', 'snort' . chr(238) . 'a7.', 'snort3.',
- 'snor4.', 'snossi6.', 'snoire6.', 'snoird5.', 'snoitai7.', 'snoita6.', 'snoits1>', 'noits0.', 'snoi4>', 'noitaci7>', 'noitai6.', 'noita5.',
- 'noitu4.', 'noi3>', 'snoya0.', 'snoy4i.', 'sno' . chr(231) . 'a1.', 'sno' . chr(231) . 'r1.', 'snoe4.', 'snosiar1>', 'snola1.', 'sno3>',
- 'sno1>', 'noll2.', 'tnennei4.', 'ennei2>', 'snei1>', 'sne' . chr(233) . '1>', 'enne' . chr(233) . '5e.', 'ne' . chr(233) . '3e.', 'neic0.',
- 'neiv0.', 'nei3.', 'sc1.', 'sd1.', 'sg1.', 'sni1.', 'tiu0.', 'ti2.', 'sp1>', 'sna1>', 'sue1.', 'enn2>', 'nong2.', 'noss2.', 'rioe4.',
- 'riot0.', 'riorc1.', 'riovec5.', 'rio3.', 'ric2.', 'ril2.', 'tnerim3.', 'tneris3>', 'tneri5.', 't' . chr(238) . 'a3.', 'riss2.',
- 't' . chr(238) . '2.', 't' . chr(226) . '2>', 'ario2.', 'arim1.', 'ara1.', 'aris1.', 'ari3.', 'art1>', 'ardn2.', 'arr1.', 'arua1.',
- 'aro1.', 'arv1.', 'aru1.', 'ar2.', 'rd1.', 'ud1.', 'ul1.', 'ini1.', 'rin2.', 'tnessiab3.', 'tnessia7.', 'tnessi6.', 'tnessni4.', 'sini2.',
- 'sl1.', 'iard3.', 'iario3.', 'ia2>', 'io0.', 'iule2.', 'i1>', 'sid2.', 'sic2.', 'esoi4.', 'ed1.', 'ai2>', 'a1>', 'adr1.',
- 'tner' . chr(232) . '5>', 'evir1.', 'evio4>', 'evi3.', 'fita4.', 'fi2>', 'enie1.', 'sare4>', 'sari4>', 'sard3.', 'sart2>', 'sa2.',
- 'tnessa6>', 'tnessu6>', 'tnegna3.', 'tnegi3.', 'tneg0.', 'tneru5>', 'tnemg0.', 'tnerni4.', 'tneiv1.', 'tne3>', 'une1.', 'en1>', 'nitn2.',
- 'ecnay5i.', 'ecnal1.', 'ecna4.', 'ec1>', 'nn1.', 'rit2>', 'rut2>', 'rud2.', 'ugn1>', 'eg1>', 'tuo0.', 'tul2>', 't' . chr(251) . '2>',
- 'ev1>', 'v' . chr(232) . '2ve>', 'rtt1>', 'emissi6.', 'em1.', 'ehc1.', 'c' . chr(233) . 'i2c' . chr(232) . '.', 'libi2l.', 'llie1.',
- 'liei4i.', 'xuev1.', 'xuey4i.', 'xueni5>', 'xuell4.', 'xuere5.', 'xue3>', 'rb' . chr(233) . '3rb' . chr(232) . '.', 'tur2.',
- 'rir' . chr(233) . '4re.', 'rir2.', 'c' . chr(226) . '2ca.', 'snu1.', 'rt' . chr(238) . 'a4.', 'long2.', 'vec2.', chr(231) . '1c>',
- 'ssilp3.', 'silp2.', 't' . chr(232) . 'hc2te.', 'n' . chr(232) . 'm2ne.', 'llepp1.', 'tan2.', 'rv' . chr(232) . '3rve.',
- 'rv' . chr(233) . '3rve.', 'r' . chr(232) . '2re.', 'r' . chr(233) . '2re.', 't' . chr(232) . '2te.', 't' . chr(233) . '2te.', 'epp1.',
- 'eya2i.', 'ya1i.', 'yo1i.', 'esu1.', 'ugi1.', 'tt1.', 'end0.'
- );
-
- self::$stemRules = $vars;
-
- return self::$stemRules;
- }
-
- /**
- * Returns the number of the first rule from the rule number
- * that can be applied to the given reversed input.
- * returns -1 if no rule can be applied, ie the stem has been found
- *
- * @param string $reversed_input The input to check in reversed order
- * @param integer $rule_number The rule number to check
- *
- * @return integer Number of the first rule
- *
- * @since 3.0
- */
- private static function getFirstRule($reversed_input, $rule_number)
- {
- $vars = static::getStemRules();
-
- $nb_rules = count($vars['rules']);
-
- for ($i = $rule_number; $i < $nb_rules; $i++)
- {
- // Gets the letters from the current rule
- $rule = $vars['rules'][$i];
- $rule = preg_replace($vars['rule_pattern'], "\\1", $rule);
-
- if (strncasecmp(utf8_decode($rule), $reversed_input, strlen(utf8_decode($rule))) == 0)
- {
- return $i;
- }
- }
-
- return -1;
- }
-
- /**
- * Check the acceptability of a stem for French language
- *
- * @param string $reversed_stem The stem to check in reverse form
- *
- * @return boolean True if stem is acceptable
- *
- * @since 3.0
- */
- private static function check($reversed_stem)
- {
- $vars = static::getStemRules();
-
- if (preg_match('/[' . $vars['vowels'] . ']$/', utf8_encode($reversed_stem)))
- {
- // If the form starts with a vowel then at least two letters must remain after stemming (e.g.: "etaient" --> "et")
- return (strlen($reversed_stem) > 2);
- }
- else
- {
- // If the reversed stem starts with a consonant then at least two letters must remain after stemming
- if (strlen($reversed_stem) <= 2)
- {
- return false;
- }
-
- // And at least one of these must be a vowel or "y"
- return preg_match('/[' . $vars['vowels'] . ']/', utf8_encode($reversed_stem));
- }
- }
-
- /**
- * Paice/Husk stemmer which returns a stem for the given $input
- *
- * @param string $input The word for which we want the stem in UTF-8
- *
- * @return string The stem
- *
- * @since 3.0
- */
- private static function getStem($input)
- {
- $vars = static::getStemRules();
-
- $intact = true;
- $reversed_input = strrev(utf8_decode($input));
- $rule_number = 0;
-
- // This loop goes through the rules' array until it finds an ending one (ending by '.') or the last one ('end0.')
- while (true)
- {
- $rule_number = self::getFirstRule($reversed_input, $rule_number);
-
- if ($rule_number == -1)
- {
- // No other rule can be applied => the stem has been found
- break;
- }
-
- $rule = $vars['rules'][$rule_number];
- preg_match($vars['rule_pattern'], $rule, $matches);
-
- if ($matches[2] != '*' || $intact)
- {
- $reversed_stem = utf8_decode($matches[4]) . substr($reversed_input, $matches[3], strlen($reversed_input) - $matches[3]);
-
- if (self::check($reversed_stem))
- {
- $reversed_input = $reversed_stem;
-
- if ($matches[5] == '.')
- {
- break;
- }
- }
- else
- {
- // Go to another rule
- $rule_number++;
- }
- }
- else
- {
- // Go to another rule
- $rule_number++;
- }
- }
-
- return utf8_encode(strrev($reversed_input));
- }
-}
diff --git a/administrator/components/com_finder/helpers/indexer/stemmer/porter_en.php b/administrator/components/com_finder/helpers/indexer/stemmer/porter_en.php
deleted file mode 100644
index e1649958f2b..00000000000
--- a/administrator/components/com_finder/helpers/indexer/stemmer/porter_en.php
+++ /dev/null
@@ -1,446 +0,0 @@
-cache[$lang][$token]))
- {
- // Stem the token.
- $result = $token;
- $result = self::step1ab($result);
- $result = self::step1c($result);
- $result = self::step2($result);
- $result = self::step3($result);
- $result = self::step4($result);
- $result = self::step5($result);
-
- // Add the token to the cache.
- $this->cache[$lang][$token] = $result;
- }
-
- return $this->cache[$lang][$token];
- }
-
- /**
- * Step 1
- *
- * @param string $word The token to stem.
- *
- * @return string
- *
- * @since 2.5
- */
- private static function step1ab($word)
- {
- // Part a
- if (substr($word, -1) == 's')
- {
- self::replace($word, 'sses', 'ss')
- || self::replace($word, 'ies', 'i')
- || self::replace($word, 'ss', 'ss')
- || self::replace($word, 's', '');
- }
-
- // Part b
- if (substr($word, -2, 1) != 'e' || !self::replace($word, 'eed', 'ee', 0))
- {
- // First rule
- $v = self::$regex_vowel;
-
- // Words ending with ing and ed
- // Note use of && and OR, for precedence reasons
- if (preg_match("#$v+#", substr($word, 0, -3)) && self::replace($word, 'ing', '')
- || preg_match("#$v+#", substr($word, 0, -2)) && self::replace($word, 'ed', ''))
- {
- // If one of above two test successful
- if (!self::replace($word, 'at', 'ate') && !self::replace($word, 'bl', 'ble') && !self::replace($word, 'iz', 'ize'))
- {
- // Double consonant ending
- if (self::doubleConsonant($word) && substr($word, -2) != 'll' && substr($word, -2) != 'ss' && substr($word, -2) != 'zz')
- {
- $word = substr($word, 0, -1);
- }
- elseif (self::m($word) == 1 && self::cvc($word))
- {
- $word .= 'e';
- }
- }
- }
- }
-
- return $word;
- }
-
- /**
- * Step 1c
- *
- * @param string $word The token to stem.
- *
- * @return string
- *
- * @since 2.5
- */
- private static function step1c($word)
- {
- $v = self::$regex_vowel;
-
- if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1)))
- {
- self::replace($word, 'y', 'i');
- }
-
- return $word;
- }
-
- /**
- * Step 2
- *
- * @param string $word The token to stem.
- *
- * @return string
- *
- * @since 2.5
- */
- private static function step2($word)
- {
- switch (substr($word, -2, 1))
- {
- case 'a':
- self::replace($word, 'ational', 'ate', 0)
- || self::replace($word, 'tional', 'tion', 0);
- break;
- case 'c':
- self::replace($word, 'enci', 'ence', 0)
- || self::replace($word, 'anci', 'ance', 0);
- break;
- case 'e':
- self::replace($word, 'izer', 'ize', 0);
- break;
- case 'g':
- self::replace($word, 'logi', 'log', 0);
- break;
- case 'l':
- self::replace($word, 'entli', 'ent', 0)
- || self::replace($word, 'ousli', 'ous', 0)
- || self::replace($word, 'alli', 'al', 0)
- || self::replace($word, 'bli', 'ble', 0)
- || self::replace($word, 'eli', 'e', 0);
- break;
- case 'o':
- self::replace($word, 'ization', 'ize', 0)
- || self::replace($word, 'ation', 'ate', 0)
- || self::replace($word, 'ator', 'ate', 0);
- break;
- case 's':
- self::replace($word, 'iveness', 'ive', 0)
- || self::replace($word, 'fulness', 'ful', 0)
- || self::replace($word, 'ousness', 'ous', 0)
- || self::replace($word, 'alism', 'al', 0);
- break;
- case 't':
- self::replace($word, 'biliti', 'ble', 0)
- || self::replace($word, 'aliti', 'al', 0)
- || self::replace($word, 'iviti', 'ive', 0);
- break;
- }
-
- return $word;
- }
-
- /**
- * Step 3
- *
- * @param string $word The token to stem.
- *
- * @return string
- *
- * @since 2.5
- */
- private static function step3($word)
- {
- switch (substr($word, -2, 1))
- {
- case 'a':
- self::replace($word, 'ical', 'ic', 0);
- break;
- case 's':
- self::replace($word, 'ness', '', 0);
- break;
- case 't':
- self::replace($word, 'icate', 'ic', 0)
- || self::replace($word, 'iciti', 'ic', 0);
- break;
- case 'u':
- self::replace($word, 'ful', '', 0);
- break;
- case 'v':
- self::replace($word, 'ative', '', 0);
- break;
- case 'z':
- self::replace($word, 'alize', 'al', 0);
- break;
- }
-
- return $word;
- }
-
- /**
- * Step 4
- *
- * @param string $word The token to stem.
- *
- * @return string
- *
- * @since 2.5
- */
- private static function step4($word)
- {
- switch (substr($word, -2, 1))
- {
- case 'a':
- self::replace($word, 'al', '', 1);
- break;
- case 'c':
- self::replace($word, 'ance', '', 1)
- || self::replace($word, 'ence', '', 1);
- break;
- case 'e':
- self::replace($word, 'er', '', 1);
- break;
- case 'i':
- self::replace($word, 'ic', '', 1);
- break;
- case 'l':
- self::replace($word, 'able', '', 1)
- || self::replace($word, 'ible', '', 1);
- break;
- case 'n':
- self::replace($word, 'ant', '', 1)
- || self::replace($word, 'ement', '', 1)
- || self::replace($word, 'ment', '', 1)
- || self::replace($word, 'ent', '', 1);
- break;
- case 'o':
- if (substr($word, -4) == 'tion' || substr($word, -4) == 'sion')
- {
- self::replace($word, 'ion', '', 1);
- }
- else
- {
- self::replace($word, 'ou', '', 1);
- }
- break;
- case 's':
- self::replace($word, 'ism', '', 1);
- break;
- case 't':
- self::replace($word, 'ate', '', 1)
- || self::replace($word, 'iti', '', 1);
- break;
- case 'u':
- self::replace($word, 'ous', '', 1);
- break;
- case 'v':
- self::replace($word, 'ive', '', 1);
- break;
- case 'z':
- self::replace($word, 'ize', '', 1);
- break;
- }
-
- return $word;
- }
-
- /**
- * Step 5
- *
- * @param string $word The token to stem.
- *
- * @return string
- *
- * @since 2.5
- */
- private static function step5($word)
- {
- // Part a
- if (substr($word, -1) == 'e')
- {
- if (self::m(substr($word, 0, -1)) > 1)
- {
- self::replace($word, 'e', '');
- }
- elseif (self::m(substr($word, 0, -1)) == 1)
- {
- if (!self::cvc(substr($word, 0, -1)))
- {
- self::replace($word, 'e', '');
- }
- }
- }
-
- // Part b
- if (self::m($word) > 1 && self::doubleConsonant($word) && substr($word, -1) == 'l')
- {
- $word = substr($word, 0, -1);
- }
-
- return $word;
- }
-
- /**
- * Replaces the first string with the second, at the end of the string. If third
- * arg is given, then the preceding string must match that m count at least.
- *
- * @param string &$str String to check
- * @param string $check Ending to check for
- * @param string $repl Replacement string
- * @param integer $m Optional minimum number of m() to meet
- *
- * @return boolean Whether the $check string was at the end
- * of the $str string. True does not necessarily mean
- * that it was replaced.
- *
- * @since 2.5
- */
- private static function replace(&$str, $check, $repl, $m = null)
- {
- $len = 0 - strlen($check);
-
- if (substr($str, $len) == $check)
- {
- $substr = substr($str, 0, $len);
-
- if (is_null($m) || self::m($substr) > $m)
- {
- $str = $substr . $repl;
- }
-
- return true;
- }
-
- return false;
- }
-
- /**
- * m() measures the number of consonant sequences in $str. if c is
- * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
- * presence,
- *
- * gives 0
- * vc gives 1
- * vcvc gives 2
- * vcvcvc gives 3
- *
- * @param string $str The string to return the m count for
- *
- * @return integer The m count
- *
- * @since 2.5
- */
- private static function m($str)
- {
- $c = self::$regex_consonant;
- $v = self::$regex_vowel;
-
- $str = preg_replace("#^$c+#", '', $str);
- $str = preg_replace("#$v+$#", '', $str);
-
- preg_match_all("#($v+$c+)#", $str, $matches);
-
- return count($matches[1]);
- }
-
- /**
- * Returns true/false as to whether the given string contains two
- * of the same consonant next to each other at the end of the string.
- *
- * @param string $str String to check
- *
- * @return boolean Result
- *
- * @since 2.5
- */
- private static function doubleConsonant($str)
- {
- $c = self::$regex_consonant;
-
- return preg_match("#$c{2}$#", $str, $matches) && $matches[0]{0} == $matches[0]{1};
- }
-
- /**
- * Checks for ending CVC sequence where second C is not W, X or Y
- *
- * @param string $str String to check
- *
- * @return boolean Result
- *
- * @since 2.5
- */
- private static function cvc($str)
- {
- $c = self::$regex_consonant;
- $v = self::$regex_vowel;
-
- return preg_match("#($c$v$c)$#", $str, $matches) && strlen($matches[1]) == 3 && $matches[1]{2} != 'w' && $matches[1]{2} != 'x'
- && $matches[1]{2} != 'y';
- }
-}
diff --git a/administrator/components/com_finder/helpers/indexer/stemmer/snowball.php b/administrator/components/com_finder/helpers/indexer/stemmer/snowball.php
deleted file mode 100644
index b41973434b4..00000000000
--- a/administrator/components/com_finder/helpers/indexer/stemmer/snowball.php
+++ /dev/null
@@ -1,133 +0,0 @@
-sef ?? '*';
- $lang = $defaultLang;
- }
-
- // Stem the token if it is not in the cache.
- if (!isset($this->cache[$lang][$token]))
- {
- // Get the stem function from the language string.
- switch ($lang)
- {
- // Danish stemmer.
- case 'da':
- $function = 'stem_danish';
- break;
-
- // German stemmer.
- case 'de':
- $function = 'stem_german';
- break;
-
- // English stemmer.
- default:
- case 'en':
- $function = 'stem_english';
- break;
-
- // Spanish stemmer.
- case 'es':
- $function = 'stem_spanish';
- break;
-
- // Finnish stemmer.
- case 'fi':
- $function = 'stem_finnish';
- break;
-
- // French stemmer.
- case 'fr':
- $function = 'stem_french';
- break;
-
- // Hungarian stemmer.
- case 'hu':
- $function = 'stem_hungarian';
- break;
-
- // Italian stemmer.
- case 'it':
- $function = 'stem_italian';
- break;
-
- // Norwegian stemmer.
- case 'nb':
- $function = 'stem_norwegian';
- break;
-
- // Dutch stemmer.
- case 'nl':
- $function = 'stem_dutch';
- break;
-
- // Portuguese stemmer.
- case 'pt':
- $function = 'stem_portuguese';
- break;
-
- // Romanian stemmer.
- case 'ro':
- $function = 'stem_romanian';
- break;
-
- // Russian stemmer.
- case 'ru':
- $function = 'stem_russian_unicode';
- break;
-
- // Swedish stemmer.
- case 'sv':
- $function = 'stem_swedish';
- break;
-
- // Turkish stemmer.
- case 'tr':
- $function = 'stem_turkish_unicode';
- break;
- }
-
- // Stem the word if the stemmer method exists.
- $this->cache[$lang][$token] = function_exists($function) ? $function($token) : $token;
- }
-
- return $this->cache[$lang][$token];
- }
-}
diff --git a/administrator/components/com_finder/helpers/indexer/taxonomy.php b/administrator/components/com_finder/helpers/indexer/taxonomy.php
index 92637310965..0b90dde30d3 100644
--- a/administrator/components/com_finder/helpers/indexer/taxonomy.php
+++ b/administrator/components/com_finder/helpers/indexer/taxonomy.php
@@ -10,7 +10,7 @@
defined('_JEXEC') or die;
/**
- * Stemmer base class for the Finder indexer package.
+ * Taxonomy base class for the Finder indexer package.
*
* @since 2.5
*/
diff --git a/administrator/components/com_finder/helpers/indexer/token.php b/administrator/components/com_finder/helpers/indexer/token.php
index ec804098360..40ea5ec9ff4 100644
--- a/administrator/components/com_finder/helpers/indexer/token.php
+++ b/administrator/components/com_finder/helpers/indexer/token.php
@@ -98,7 +98,14 @@ class FinderIndexerToken
*/
public function __construct($term, $lang, $spacer = ' ')
{
- $this->language = $lang;
+ if (!$lang)
+ {
+ $this->language = '*';
+ }
+ else
+ {
+ $this->language = $lang;
+ }
// Tokens can be a single word or an array of words representing a phrase.
if (is_array($term))
diff --git a/administrator/language/en-GB/en-GB.com_finder.ini b/administrator/language/en-GB/en-GB.com_finder.ini
index 3d427037f7b..1109182b3e6 100644
--- a/administrator/language/en-GB/en-GB.com_finder.ini
+++ b/administrator/language/en-GB/en-GB.com_finder.ini
@@ -35,11 +35,6 @@ COM_FINDER_CONFIG_SORT_OPTION_LIST_PRICE="List price"
COM_FINDER_CONFIG_SORT_OPTION_RELEVANCE="Relevance"
COM_FINDER_CONFIG_SORT_OPTION_START_DATE="Date"
COM_FINDER_CONFIG_SORT_ORDER_LABEL="Sort Field"
-COM_FINDER_CONFIG_STEMMER_ENABLE_LABEL="Enable Language Stemmer"
-COM_FINDER_CONFIG_STEMMER_FR="French Only"
-COM_FINDER_CONFIG_STEMMER_LABEL="Select Language Stemmer"
-COM_FINDER_CONFIG_STEMMER_PORTER_EN="English Only"
-COM_FINDER_CONFIG_STEMMER_SNOWBALL="Snowball"
COM_FINDER_CONFIG_TEXT_MULTIPLIER_DESCRIPTION="The multiplier is used to control how much influence matching text has on the overall relevance score of a search result. A multiplier is considered in relationship to the other multipliers. The body text comes from the summary and/or body of the content."
COM_FINDER_CONFIG_TEXT_MULTIPLIER_LABEL="Body Text Weight Multiplier"
COM_FINDER_CONFIG_TITLE_MULTIPLIER_DESCRIPTION="The multiplier is used to control how much influence matching text has on the overall relevance score of a search result. A multiplier is considered in relationship to the other multipliers. The title text comes from the title of the content."
diff --git a/build.xml b/build.xml
index ee810a4e3cc..34f3e2859eb 100644
--- a/build.xml
+++ b/build.xml
@@ -37,6 +37,7 @@
+
diff --git a/components/com_finder/Model/SearchModel.php b/components/com_finder/Model/SearchModel.php
index c2423e89f37..a3fb724f8ae 100644
--- a/components/com_finder/Model/SearchModel.php
+++ b/components/com_finder/Model/SearchModel.php
@@ -18,9 +18,9 @@ use Joomla\Utilities\ArrayHelper;
// Register dependent classes.
define('FINDER_PATH_INDEXER', JPATH_ADMINISTRATOR . '/components/com_finder/helpers/indexer');
\JLoader::register('FinderIndexerHelper', FINDER_PATH_INDEXER . '/helper.php');
+\JLoader::register('FinderIndexerLanguage', FINDER_PATH_INDEXER . '/language.php');
\JLoader::register('FinderIndexerQuery', FINDER_PATH_INDEXER . '/query.php');
\JLoader::register('FinderIndexerResult', FINDER_PATH_INDEXER . '/result.php');
-\JLoader::register('FinderIndexerStemmer', FINDER_PATH_INDEXER . '/stemmer.php');
/**
* Search model class for the Finder package.
@@ -221,6 +221,7 @@ class SearchModel extends ListModel
$query->where($db->quoteName('l.start_date') . ' = ' . $date2);
}
}
+
// Filter by language
if ($this->getState('filter.language'))
{
@@ -266,12 +267,27 @@ class SearchModel extends ListModel
* If there are no optional or required search terms in the query, we
* can get the results in one relatively simple database query.
*/
- if (empty($this->includedTerms))
+ if (empty($this->includedTerms) && $this->searchquery->empty)
{
// Return the results.
return $query;
}
+ /*
+ * If there are no optional or required search terms in the query and
+ * empty searches are not allowed, we return an empty query.
+ */
+ if (empty($this->includedTerms) && !$this->searchquery->empty)
+ {
+ // Since we need to return a query, we simplify this one.
+ $query->clear('join')
+ ->clear('where')
+ ->clear('group')
+ ->where('false');
+
+ return $query;
+ }
+
$included = call_user_func_array('array_merge', $this->includedTerms);
$query->join('INNER', $this->_db->quoteName('#__finder_links_terms') . ' AS m ON m.link_id = l.link_id')
->where('m.term_id IN (' . implode(',', $included) . ')');
@@ -291,10 +307,14 @@ class SearchModel extends ListModel
*/
if (count($this->requiredTerms))
{
- $required = call_user_func_array('array_merge', $this->requiredTerms);
- $query->join('INNER', $this->_db->quoteName('#__finder_links_terms') . ' AS r ON r.link_id = l.link_id')
- ->where('r.term_id IN (' . implode(',', $required) . ')')
- ->having('COUNT(DISTINCT r.term_id) = ' . count($required));
+ $i = 0;
+
+ foreach ($this->requiredTerms as $terms)
+ {
+ $query->join('INNER', $this->_db->quoteName('#__finder_links_terms') . ' AS r' . $i . ' ON r' . $i . '.link_id = l.link_id')
+ ->where('r' . $i . '.term_id IN (' . implode(',', $terms) . ')');
+ $i++;
+ }
}
return $query;
@@ -361,12 +381,6 @@ class SearchModel extends ListModel
$this->setState('filter.language', Multilanguage::isEnabled());
- // Setup the stemmer.
- if ($params->get('stem', 1) && $params->get('stemmer', 'porter_en'))
- {
- \FinderIndexerHelper::$stemmer = \FinderIndexerStemmer::getInstance($params->get('stemmer', 'porter_en'));
- }
-
$request = $input->request;
$options = array();
diff --git a/components/com_finder/Model/SuggestionsModel.php b/components/com_finder/Model/SuggestionsModel.php
index 0993efe1b6d..3f86a89ff82 100644
--- a/components/com_finder/Model/SuggestionsModel.php
+++ b/components/com_finder/Model/SuggestionsModel.php
@@ -65,13 +65,14 @@ class SuggestionsModel extends ListModel
// Create a new query object.
$db = $this->getDbo();
$query = $db->getQuery(true);
+ $lang = \FinderIndexerHelper::getPrimaryLanguage($this->getState('language'));
// Select required fields
$query->select('t.term')
->from($db->quoteName('#__finder_terms') . ' AS t')
->where('t.term LIKE ' . $db->quote($db->escape($this->getState('input'), true) . '%'))
->where('t.common = 0')
- ->where('t.language IN (' . $db->quote($db->escape($this->getState('language'), true)) . ', ' . $db->quote('*') . ')')
+ ->where('t.language IN (' . $db->quote($lang) . ', ' . $db->quote('*') . ')')
->order('t.links DESC')
->order('t.weight DESC');
diff --git a/composer.json b/composer.json
index 36b1b6d11cb..c219c5b7863 100644
--- a/composer.json
+++ b/composer.json
@@ -80,7 +80,8 @@
"symfony/debug": "3.4.*",
"symfony/ldap": "3.4.*",
"symfony/web-link": "3.4.*",
- "symfony/yaml": "3.4.*"
+ "symfony/yaml": "3.4.*",
+ "wamania/php-stemmer": "^1.2"
},
"require-dev": {
"phpunit/phpunit": "~6.0",
diff --git a/composer.lock b/composer.lock
index 4349fe50e9f..01d31e8268a 100644
--- a/composer.lock
+++ b/composer.lock
@@ -1,10 +1,10 @@
{
"_readme": [
"This file locks the dependencies of your project to a known state",
- "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
+ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
- "content-hash": "3bd7695b38b737c3b555d83058b877fd",
+ "content-hash": "81c9ca521a0712b07e07143b469e835a",
"packages": [
{
"name": "composer/ca-bundle",
@@ -2513,6 +2513,50 @@
"homepage": "https://symfony.com",
"time": "2018-05-03T23:18:14+00:00"
},
+ {
+ "name": "wamania/php-stemmer",
+ "version": "1.2",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/wamania/php-stemmer.git",
+ "reference": "6cc76829bddd46f7ae7678e0bf87a0c872c8cf58"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/wamania/php-stemmer/zipball/6cc76829bddd46f7ae7678e0bf87a0c872c8cf58",
+ "reference": "6cc76829bddd46f7ae7678e0bf87a0c872c8cf58",
+ "shasum": ""
+ },
+ "require": {
+ "php": ">=5.3.0"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^4.8"
+ },
+ "type": "library",
+ "autoload": {
+ "psr-4": {
+ "Wamania\\Snowball\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Wamania",
+ "homepage": "http://wamania.com"
+ }
+ ],
+ "description": "Native PHP5 Stemmer",
+ "keywords": [
+ "php",
+ "porter",
+ "stemmer"
+ ],
+ "time": "2017-01-27T17:16:44+00:00"
+ },
{
"name": "zendframework/zend-diactoros",
"version": "1.7.2",
diff --git a/libraries/vendor/composer/ClassLoader.php b/libraries/vendor/composer/ClassLoader.php
index dc02dfb114f..2c72175e772 100644
--- a/libraries/vendor/composer/ClassLoader.php
+++ b/libraries/vendor/composer/ClassLoader.php
@@ -379,9 +379,9 @@ class ClassLoader
$subPath = substr($subPath, 0, $lastPos);
$search = $subPath.'\\';
if (isset($this->prefixDirsPsr4[$search])) {
- $pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
foreach ($this->prefixDirsPsr4[$search] as $dir) {
- if (file_exists($file = $dir . $pathEnd)) {
+ $length = $this->prefixLengthsPsr4[$first][$search];
+ if (file_exists($file = $dir . DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $length))) {
return $file;
}
}
diff --git a/libraries/vendor/composer/autoload_classmap.php b/libraries/vendor/composer/autoload_classmap.php
index b6e387161c9..2c82b493040 100644
--- a/libraries/vendor/composer/autoload_classmap.php
+++ b/libraries/vendor/composer/autoload_classmap.php
@@ -946,6 +946,21 @@ return array(
'Symfony\\Polyfill\\Util\\BinaryOnFuncOverload' => $vendorDir . '/symfony/polyfill-util/BinaryOnFuncOverload.php',
'Symfony\\Polyfill\\Util\\LegacyTestListener' => $vendorDir . '/symfony/polyfill-util/LegacyTestListener.php',
'Symfony\\Polyfill\\Util\\TestListenerTrait' => $vendorDir . '/symfony/polyfill-util/TestListenerTrait.php',
+ 'Wamania\\Snowball\\Danish' => $vendorDir . '/wamania/php-stemmer/src/Danish.php',
+ 'Wamania\\Snowball\\Dutch' => $vendorDir . '/wamania/php-stemmer/src/Dutch.php',
+ 'Wamania\\Snowball\\English' => $vendorDir . '/wamania/php-stemmer/src/English.php',
+ 'Wamania\\Snowball\\French' => $vendorDir . '/wamania/php-stemmer/src/French.php',
+ 'Wamania\\Snowball\\German' => $vendorDir . '/wamania/php-stemmer/src/German.php',
+ 'Wamania\\Snowball\\Italian' => $vendorDir . '/wamania/php-stemmer/src/Italian.php',
+ 'Wamania\\Snowball\\Norwegian' => $vendorDir . '/wamania/php-stemmer/src/Norwegian.php',
+ 'Wamania\\Snowball\\Portuguese' => $vendorDir . '/wamania/php-stemmer/src/Portuguese.php',
+ 'Wamania\\Snowball\\Romanian' => $vendorDir . '/wamania/php-stemmer/src/Romanian.php',
+ 'Wamania\\Snowball\\Russian' => $vendorDir . '/wamania/php-stemmer/src/Russian.php',
+ 'Wamania\\Snowball\\Spanish' => $vendorDir . '/wamania/php-stemmer/src/Spanish.php',
+ 'Wamania\\Snowball\\Stem' => $vendorDir . '/wamania/php-stemmer/src/Stem.php',
+ 'Wamania\\Snowball\\Stemmer' => $vendorDir . '/wamania/php-stemmer/src/Stemmer.php',
+ 'Wamania\\Snowball\\Swedish' => $vendorDir . '/wamania/php-stemmer/src/Swedish.php',
+ 'Wamania\\Snowball\\Utf8' => $vendorDir . '/wamania/php-stemmer/src/Utf8.php',
'Zend\\Diactoros\\AbstractSerializer' => $vendorDir . '/zendframework/zend-diactoros/src/AbstractSerializer.php',
'Zend\\Diactoros\\CallbackStream' => $vendorDir . '/zendframework/zend-diactoros/src/CallbackStream.php',
'Zend\\Diactoros\\Exception\\DeprecatedMethodException' => $vendorDir . '/zendframework/zend-diactoros/src/Exception/DeprecatedMethodException.php',
diff --git a/libraries/vendor/composer/autoload_psr4.php b/libraries/vendor/composer/autoload_psr4.php
index ea5de7a75f6..db5c51e5d91 100644
--- a/libraries/vendor/composer/autoload_psr4.php
+++ b/libraries/vendor/composer/autoload_psr4.php
@@ -7,6 +7,7 @@ $baseDir = dirname(dirname($vendorDir));
return array(
'Zend\\Diactoros\\' => array($vendorDir . '/zendframework/zend-diactoros/src'),
+ 'Wamania\\Snowball\\' => array($vendorDir . '/wamania/php-stemmer/src'),
'Symfony\\Polyfill\\Util\\' => array($vendorDir . '/symfony/polyfill-util'),
'Symfony\\Polyfill\\Php56\\' => array($vendorDir . '/symfony/polyfill-php56'),
'Symfony\\Polyfill\\Mbstring\\' => array($vendorDir . '/symfony/polyfill-mbstring'),
diff --git a/libraries/vendor/composer/autoload_static.php b/libraries/vendor/composer/autoload_static.php
index 73ce2ad8fbc..50352e006e6 100644
--- a/libraries/vendor/composer/autoload_static.php
+++ b/libraries/vendor/composer/autoload_static.php
@@ -34,6 +34,10 @@ class ComposerStaticInita4c4383b02fcf9dfb95cc0397c641cf1
array (
'Zend\\Diactoros\\' => 15,
),
+ 'W' =>
+ array (
+ 'Wamania\\Snowball\\' => 17,
+ ),
'S' =>
array (
'Symfony\\Polyfill\\Util\\' => 22,
@@ -110,6 +114,10 @@ class ComposerStaticInita4c4383b02fcf9dfb95cc0397c641cf1
array (
0 => __DIR__ . '/..' . '/zendframework/zend-diactoros/src',
),
+ 'Wamania\\Snowball\\' =>
+ array (
+ 0 => __DIR__ . '/..' . '/wamania/php-stemmer/src',
+ ),
'Symfony\\Polyfill\\Util\\' =>
array (
0 => __DIR__ . '/..' . '/symfony/polyfill-util',
@@ -1233,6 +1241,21 @@ class ComposerStaticInita4c4383b02fcf9dfb95cc0397c641cf1
'Symfony\\Polyfill\\Util\\BinaryOnFuncOverload' => __DIR__ . '/..' . '/symfony/polyfill-util/BinaryOnFuncOverload.php',
'Symfony\\Polyfill\\Util\\LegacyTestListener' => __DIR__ . '/..' . '/symfony/polyfill-util/LegacyTestListener.php',
'Symfony\\Polyfill\\Util\\TestListenerTrait' => __DIR__ . '/..' . '/symfony/polyfill-util/TestListenerTrait.php',
+ 'Wamania\\Snowball\\Danish' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Danish.php',
+ 'Wamania\\Snowball\\Dutch' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Dutch.php',
+ 'Wamania\\Snowball\\English' => __DIR__ . '/..' . '/wamania/php-stemmer/src/English.php',
+ 'Wamania\\Snowball\\French' => __DIR__ . '/..' . '/wamania/php-stemmer/src/French.php',
+ 'Wamania\\Snowball\\German' => __DIR__ . '/..' . '/wamania/php-stemmer/src/German.php',
+ 'Wamania\\Snowball\\Italian' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Italian.php',
+ 'Wamania\\Snowball\\Norwegian' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Norwegian.php',
+ 'Wamania\\Snowball\\Portuguese' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Portuguese.php',
+ 'Wamania\\Snowball\\Romanian' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Romanian.php',
+ 'Wamania\\Snowball\\Russian' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Russian.php',
+ 'Wamania\\Snowball\\Spanish' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Spanish.php',
+ 'Wamania\\Snowball\\Stem' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Stem.php',
+ 'Wamania\\Snowball\\Stemmer' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Stemmer.php',
+ 'Wamania\\Snowball\\Swedish' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Swedish.php',
+ 'Wamania\\Snowball\\Utf8' => __DIR__ . '/..' . '/wamania/php-stemmer/src/Utf8.php',
'Zend\\Diactoros\\AbstractSerializer' => __DIR__ . '/..' . '/zendframework/zend-diactoros/src/AbstractSerializer.php',
'Zend\\Diactoros\\CallbackStream' => __DIR__ . '/..' . '/zendframework/zend-diactoros/src/CallbackStream.php',
'Zend\\Diactoros\\Exception\\DeprecatedMethodException' => __DIR__ . '/..' . '/zendframework/zend-diactoros/src/Exception/DeprecatedMethodException.php',
diff --git a/libraries/vendor/composer/installed.json b/libraries/vendor/composer/installed.json
index 25ec8b10950..abc49529b01 100644
--- a/libraries/vendor/composer/installed.json
+++ b/libraries/vendor/composer/installed.json
@@ -2652,5 +2652,51 @@
"psr",
"psr-7"
]
+ },
+ {
+ "name": "wamania/php-stemmer",
+ "version": "1.2",
+ "version_normalized": "1.2.0.0",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/wamania/php-stemmer.git",
+ "reference": "6cc76829bddd46f7ae7678e0bf87a0c872c8cf58"
+ },
+ "dist": {
+ "type": "zip",
+ "url": "https://api.github.com/repos/wamania/php-stemmer/zipball/6cc76829bddd46f7ae7678e0bf87a0c872c8cf58",
+ "reference": "6cc76829bddd46f7ae7678e0bf87a0c872c8cf58",
+ "shasum": ""
+ },
+ "require": {
+ "php": ">=5.3.0"
+ },
+ "require-dev": {
+ "phpunit/phpunit": "^4.8"
+ },
+ "time": "2017-01-27T17:16:44+00:00",
+ "type": "library",
+ "installation-source": "dist",
+ "autoload": {
+ "psr-4": {
+ "Wamania\\Snowball\\": "src/"
+ }
+ },
+ "notification-url": "https://packagist.org/downloads/",
+ "license": [
+ "MIT"
+ ],
+ "authors": [
+ {
+ "name": "Wamania",
+ "homepage": "http://wamania.com"
+ }
+ ],
+ "description": "Native PHP5 Stemmer",
+ "keywords": [
+ "php",
+ "porter",
+ "stemmer"
+ ]
}
]
diff --git a/libraries/vendor/wamania/php-stemmer/LICENSE b/libraries/vendor/wamania/php-stemmer/LICENSE
new file mode 100644
index 00000000000..5de1b166a74
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 wamania
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/libraries/vendor/wamania/php-stemmer/src/Danish.php b/libraries/vendor/wamania/php-stemmer/src/Danish.php
new file mode 100644
index 00000000000..357f573cfd3
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Danish.php
@@ -0,0 +1,149 @@
+word = Utf8::strtolower($word);
+
+ // R2 is not used: R1 is defined in the same way as in the German stemmer
+ $this->r1();
+
+ // then R1 is adjusted so that the region before it contains at least 3 letters.
+ if ($this->r1Index < 3) {
+ $this->r1Index = 3;
+ $this->r1 = Utf8::substr($this->word, 3);
+ }
+
+ // Do each of steps 1, 2 3 and 4.
+ $this->step1();
+ $this->step2();
+ $this->step3();
+ $this->step4();
+
+ return $this->word;
+ }
+
+ /**
+ * Define a valid s-ending as one of
+ * a b c d f g h j k l m n o p r t v y z å
+ *
+ * @param string $ending
+ * @return boolean
+ */
+ private function hasValidSEnding($word)
+ {
+ $lastLetter = Utf8::substr($word, -1, 1);
+ return in_array($lastLetter, array('a', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z', 'å'));
+ }
+
+ /**
+ * Step 1
+ * Search for the longest among the following suffixes in R1, and perform the action indicated.
+ */
+ private function step1()
+ {
+ // hed ethed ered e erede ende erende ene erne ere en heden eren er heder erer
+ // heds es endes erendes enes ernes eres ens hedens erens ers ets erets et eret
+ // delete
+ if ( ($position = $this->searchIfInR1(array(
+ 'erendes', 'erende', 'hedens', 'erede', 'ethed', 'heden', 'endes', 'erets', 'heder', 'ernes',
+ 'erens', 'ered', 'ende', 'erne', 'eres', 'eren', 'eret', 'erer', 'enes', 'heds',
+ 'ens', 'ene', 'ere', 'ers', 'ets', 'hed', 'es', 'et', 'er', 'en', 'e'
+ ))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // s
+ // delete if preceded by a valid s-ending
+ if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
+ $word = Utf8::substr($this->word, 0, $position);
+ if ($this->hasValidSEnding($word)) {
+ $this->word = $word;
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Step 2
+ * Search for one of the following suffixes in R1, and if found delete the last letter.
+ * gd dt gt kt
+ */
+ private function step2()
+ {
+ if ($this->searchIfInR1(array('gd', 'dt', 'gt', 'kt')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+
+ /**
+ * Step 3:
+ */
+ private function step3()
+ {
+ // If the word ends igst, remove the final st.
+ if ($this->search(array('igst')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -2);
+ }
+
+ // Search for the longest among the following suffixes in R1, and perform the action indicated.
+ // ig lig elig els
+ // delete, and then repeat step 2
+ if ( ($position = $this->searchIfInR1(array('elig', 'lig', 'ig', 'els'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ $this->step2();
+ return true;
+ }
+
+ // løst
+ // replace with løs
+ if ($this->searchIfInR1(array('løst')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+
+ /**
+ * Step 4: undouble
+ * If the word ends with double consonant in R1, remove one of the consonants.
+ */
+ private function step4()
+ {
+ $length = Utf8::strlen($this->word);
+ if (!$this->inR1(($length-1))) {
+ return false;
+ }
+
+ $lastLetter = Utf8::substr($this->word, -1, 1);
+ if (in_array($lastLetter, self::$vowels)) {
+ return false;
+ }
+ $beforeLastLetter = Utf8::substr($this->word, -2, 1);
+
+ if ($lastLetter == $beforeLastLetter) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ return true;
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Dutch.php b/libraries/vendor/wamania/php-stemmer/src/Dutch.php
new file mode 100644
index 00000000000..8b8bb618553
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Dutch.php
@@ -0,0 +1,303 @@
+word = Utf8::strtolower($word);
+
+ // First, remove all umlaut and acute accents.
+ $this->word = Utf8::str_replace(
+ array('ä', 'ë', 'ï', 'ö', 'ü', 'á', 'é', 'í', 'ó', 'ú'),
+ array('a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u'),
+ $this->word);
+
+ $this->plainVowels = implode('', self::$vowels);
+
+ // Put initial y, y after a vowel, and i between vowels into upper case.
+ $this->word = preg_replace('#^y#u', 'Y', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
+
+ // R1 and R2 (see the note on R1 and R2) are then defined as in German.
+ // R1 and R2 are first set up in the standard way
+ $this->r1();
+ $this->r2();
+
+ // but then R1 is adjusted so that the region before it contains at least 3 letters.
+ if ($this->r1Index < 3) {
+ $this->r1Index = 3;
+ $this->r1 = Utf8::substr($this->word, 3);
+ }
+
+ // Do each of steps 1, 2 3 and 4.
+ $this->step1();
+ $removedE = $this->step2();
+ $this->step3a();
+ $this->step3b($removedE);
+ $this->step4();
+ $this->finish();
+
+ return $this->word;
+ }
+
+ /**
+ * Define a valid s-ending as a non-vowel other than j.
+ * @param string $ending
+ * @return boolean
+ */
+ private function hasValidSEnding($word)
+ {
+ $lastLetter = Utf8::substr($word, -1, 1);
+ return !in_array($lastLetter, array_merge(self::$vowels, array('j')));
+ }
+
+ /**
+ * Define a valid en-ending as a non-vowel, and not gem.
+ * @param string $ending
+ * @return boolean
+ */
+ private function hasValidEnEnding($word)
+ {
+ $lastLetter = Utf8::substr($word, -1, 1);
+ if (in_array($lastLetter, self::$vowels)) {
+ return false;
+ }
+
+ $threeLastLetters = Utf8::substr($word, -3, 3);
+ if ($threeLastLetters == 'gem') {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Define undoubling the ending as removing the last letter if the word ends kk, dd or tt.
+ */
+ private function unDoubling()
+ {
+ if ($this->search(array('kk', 'dd', 'tt')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+
+ /**
+ * Step 1
+ * Search for the longest among the following suffixes, and perform the action indicated
+ */
+ private function step1()
+ {
+ // heden
+ // replace with heid if in R1
+ if ( ($position = $this->search(array('heden'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(heden)$#u', 'heid', $this->word);
+ }
+ return true;
+ }
+
+ // en ene
+ // delete if in R1 and preceded by a valid en-ending, and then undouble the ending
+ if ( ($position = $this->search(array('ene', 'en'))) !== false) {
+ if ($this->inR1($position)) {
+ $word = Utf8::substr($this->word, 0, $position);
+ if ($this->hasValidEnEnding($word)) {
+ $this->word = $word;
+ $this->unDoubling();
+ }
+ }
+ return true;
+ }
+
+ // s se
+ // delete if in R1 and preceded by a valid s-ending
+ if ( ($position = $this->search(array('se', 's'))) !== false) {
+ if ($this->inR1($position)) {
+ $word = Utf8::substr($this->word, 0, $position);
+ if ($this->hasValidSEnding($word)) {
+ $this->word = $word;
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2
+ * Delete suffix e if in R1 and preceded by a non-vowel, and then undouble the ending
+ */
+ private function step2()
+ {
+ if ( ($position = $this->search(array('e'))) !== false) {
+ if ($this->inR1($position)) {
+ $letter = Utf8::substr($this->word, -2, 1);
+ if (!in_array($letter, self::$vowels)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ $this->unDoubling();
+
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 3a: heid
+ * delete heid if in R2 and not preceded by c, and treat a preceding en as in step 1(b)
+ */
+ private function step3a()
+ {
+ if ( ($position = $this->search(array('heid'))) !== false) {
+ if ($this->inR2($position)) {
+ $letter = Utf8::substr($this->word, -5, 1);
+ if ($letter !== 'c') {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position = $this->search(array('en'))) !== false) {
+ if ($this->inR1($position)) {
+ $word = Utf8::substr($this->word, 0, $position);
+ if ($this->hasValidEnEnding($word)) {
+ $this->word = $word;
+ $this->unDoubling();
+ }
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+ /**
+ * Step 3b: d-suffixe
+ * Search for the longest among the following suffixes, and perform the action indicated.
+ */
+ private function step3b($removedE)
+ {
+ // end ing
+ // delete if in R2
+ // if preceded by ig, delete if in R2 and not preceded by e, otherwise undouble the ending
+ if ( ($position = $this->search(array('end', 'ing'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position2 = $this->searchIfInR2(array('ig'))) !== false) {
+ $letter = Utf8::substr($this->word, -3, 1);
+ if ($letter !== 'e') {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ } else {
+ $this->unDoubling();
+ }
+ }
+
+
+ return true;
+ }
+
+ // ig
+ // delete if in R2 and not preceded by e
+ if ( ($position = $this->search(array('ig'))) !== false) {
+ if ($this->inR2($position)) {
+ $letter = Utf8::substr($this->word, -3, 1);
+ if ($letter !== 'e') {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+ return true;
+ }
+
+ // lijk
+ // delete if in R2, and then repeat step 2
+ if ( ($position = $this->search(array('lijk'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ $this->step2();
+ }
+ return true;
+ }
+
+ // baar
+ // delete if in R2
+ if ( ($position = $this->search(array('baar'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // bar
+ // delete if in R2 and if step 2 actually removed an e
+ if ( ($position = $this->search(array('bar'))) !== false) {
+ if ($this->inR2($position) && $removedE) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 4: undouble vowel
+ * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u,
+ * remove one of the vowels from V (for example, maan -> man, brood -> brod).
+ */
+ private function step4()
+ {
+ // D is a non-vowel other than I
+ $d = Utf8::substr($this->word, -1, 1);
+ if (in_array($d, array_merge(self::$vowels, array('I')))) {
+ return false;
+ }
+
+ // V is double a, e, o or u
+ $v = Utf8::substr($this->word, -3, 2);
+ if (!in_array($v, array('aa', 'ee', 'oo', 'uu'))) {
+ return false;
+ }
+ $singleV = Utf8::substr($v, 0, 1);
+
+ // C is a non-vowel
+ $c = Utf8::substr($this->word, -4, 1);
+ if (in_array($c, self::$vowels)) {
+ return false;
+ }
+
+ $this->word = Utf8::substr($this->word, 0, -4);
+ $this->word .= $c . $singleV .$d;
+ }
+
+ /**
+ * Finally
+ * Turn I and Y back into lower case.
+ */
+ private function finish()
+ {
+ $this->word = Utf8::str_replace(array('I', 'Y'), array('i', 'y'), $this->word);
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/English.php b/libraries/vendor/wamania/php-stemmer/src/English.php
new file mode 100644
index 00000000000..2ee72b5bc6e
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/English.php
@@ -0,0 +1,599 @@
+word = Utf8::strtolower($word);
+
+ // exceptions
+ if (null !== ($word = $this->exception1())) {
+ return $word;
+ }
+
+
+ $this->plainVowels = implode('', self::$vowels);
+
+ // Remove initial ', if present.
+ $first = Utf8::substr($this->word, 0, 1);
+ if ($first == "'") {
+ $this->word = Utf8::substr($this->word, 1);
+ }
+
+ // Set initial y, or y after a vowel, to Y
+ if ($first == 'y') {
+ $this->word = preg_replace('#^y#u', 'Y', $this->word);
+ }
+ $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word);
+
+ $this->r1();
+ $this->exceptionR1();
+ $this->r2();
+
+ $this->step0();
+ $this->step1a();
+
+ // exceptions 2
+ if (null !== ($word = $this->exception2())) {
+ return $word;
+ }
+
+ $this->step1b();
+ $this->step1c();
+ $this->step2();
+ $this->step3();
+ $this->step4();
+ $this->step5();
+ $this->finish();
+
+ return $this->word;
+ }
+
+ /**
+ * Step 0
+ * Remove ', 's, 's'
+ */
+ private function step0()
+ {
+ if ( ($position = $this->search(array("'s'", "'s", "'"))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+
+ private function step1a()
+ {
+ // sses
+ // replace by ss
+ if ( ($position = $this->search(array('sses'))) !== false) {
+ $this->word = preg_replace('#(sses)$#u', 'ss', $this->word);
+ return true;
+ }
+
+ // ied+ ies*
+ // replace by i if preceded by more than one letter, otherwise by ie (so ties -> tie, cries -> cri)
+ if ( ($position = $this->search(array('ied', 'ies'))) !== false) {
+ if ($position > 1) {
+ $this->word = preg_replace('#(ied|ies)$#u', 'i', $this->word);
+
+ } else {
+ $this->word = preg_replace('#(ied|ies)$#u', 'ie', $this->word);
+ }
+ return true;
+ }
+
+ // us+ ss
+ // do nothing
+ if ( ($position = $this->search(array('us', 'ss'))) !== false) {
+ return true;
+ }
+
+ // s
+ // delete if the preceding word part contains a vowel not immediately before the s (so gas and this retain the s, gaps and kiwis lose it)
+ if ( ($position = $this->search(array('s'))) !== false) {
+ for ($i=0; $i<$position-1; $i++) {
+ $letter = Utf8::substr($this->word, $i, 1);
+
+ if (in_array($letter, self::$vowels)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 1b
+ */
+ private function step1b()
+ {
+ // eed eedly+
+ // replace by ee if in R1
+ if ( ($position = $this->search(array('eedly', 'eed'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(eedly|eed)$#u', 'ee', $this->word);
+ }
+ return true;
+ }
+
+ // ed edly+ ing ingly+
+ // delete if the preceding word part contains a vowel, and after the deletion:
+ // if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or
+ // if the word ends with a double remove the last letter (so hopp -> hop), or
+ // if the word is short, add e (so hop -> hope)
+ if ( ($position = $this->search(array('edly', 'ingly', 'ed', 'ing'))) !== false) {
+ for ($i=0; $i<$position; $i++) {
+ $letter = Utf8::substr($this->word, $i, 1);
+
+ if (in_array($letter, self::$vowels)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ($this->search(array('at', 'bl', 'iz')) !== false) {
+ $this->word .= 'e';
+
+ } elseif ( ($position2 = $this->search(self::$doubles)) !== false) {
+ $this->word = Utf8::substr($this->word, 0, ($position2+1));
+
+ } elseif ($this->isShort()) {
+ $this->word .= 'e';
+ }
+
+ return true;
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 1c: *
+ */
+ private function step1c()
+ {
+ // replace suffix y or Y by i if preceded by a non-vowel
+ // which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
+ $length = Utf8::strlen($this->word);
+
+ if ($length < 3) {
+ return true;
+ }
+
+ if ( ($position = $this->search(array('y', 'Y'))) !== false) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if (! in_array($letter, self::$vowels)) {
+ $this->word = preg_replace('#(y|Y)$#u', 'i', $this->word);
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2
+ * Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.
+ */
+ private function step2()
+ {
+ // iveness iviti: replace by ive
+ if ( ($position = $this->search(array('iveness', 'iviti'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(iveness|iviti)$#u', 'ive', $this->word);
+ }
+ return true;
+ }
+
+ // ousli ousness: replace by ous
+ if ( ($position = $this->search(array('ousli', 'ousness'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(ousli|ousness)$#u', 'ous', $this->word);
+ }
+ return true;
+ }
+
+ // izer ization: replace by ize
+ if ( ($position = $this->search(array('izer', 'ization'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(izer|ization)$#u', 'ize', $this->word);
+ }
+ return true;
+ }
+
+ // ational ation ator: replace by ate
+ if ( ($position = $this->search(array('ational', 'ation', 'ator'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(ational|ation|ator)$#u', 'ate', $this->word);
+ }
+ return true;
+ }
+
+ // biliti bli+: replace by ble
+ if ( ($position = $this->search(array('biliti', 'bli'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(biliti|bli)$#u', 'ble', $this->word);
+ }
+ return true;
+ }
+
+ // lessli+: replace by less
+ if ( ($position = $this->search(array('lessli'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(lessli)$#u', 'less', $this->word);
+ }
+ return true;
+ }
+
+ // fulness: replace by ful
+ if ( ($position = $this->search(array('fulness', 'fulli'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(fulness|fulli)$#u', 'ful', $this->word);
+ }
+ return true;
+ }
+
+ // tional: replace by tion
+ if ( ($position = $this->search(array('tional'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(tional)$#u', 'tion', $this->word);
+ }
+ return true;
+ }
+
+ // alism aliti alli: replace by al
+ if ( ($position = $this->search(array('alism', 'aliti', 'alli'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(alism|aliti|alli)$#u', 'al', $this->word);
+ }
+ return true;
+ }
+
+ // enci: replace by ence
+ if ( ($position = $this->search(array('enci'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(enci)$#u', 'ence', $this->word);
+ }
+ return true;
+ }
+
+ // anci: replace by ance
+ if ( ($position = $this->search(array('anci'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(anci)$#u', 'ance', $this->word);
+ }
+ return true;
+ }
+
+ // abli: replace by able
+ if ( ($position = $this->search(array('abli'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(abli)$#u', 'able', $this->word);
+ }
+ return true;
+ }
+
+ // entli: replace by ent
+ if ( ($position = $this->search(array('entli'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(entli)$#u', 'ent', $this->word);
+ }
+ return true;
+ }
+
+ // ogi+: replace by og if preceded by l
+ if ( ($position = $this->search(array('ogi'))) !== false) {
+
+ if ($this->inR1($position)) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if ($letter == 'l') {
+ $this->word = preg_replace('#(ogi)$#u', 'og', $this->word);
+ }
+ }
+
+ return true;
+ }
+
+ // li+: delete if preceded by a valid li-ending
+ if ( ($position = $this->search(array('li'))) !== false) {
+
+ if ($this->inR1($position)) {
+ // a letter for you
+ $letter = Utf8::substr($this->word, ($position-1), 1);
+
+ if (in_array($letter, self::$liEnding)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 3:
+ * Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.
+ */
+ public function step3()
+ {
+ // ational+: replace by ate
+ if ($this->searchIfInR1(array('ational')) !== false) {
+ $this->word = preg_replace('#(ational)$#u', 'ate', $this->word);
+ return true;
+ }
+
+ // tional+: replace by tion
+ if ($this->searchIfInR1(array('tional')) !== false) {
+ $this->word = preg_replace('#(tional)$#u', 'tion', $this->word);
+ return true;
+ }
+
+ // alize: replace by al
+ if ($this->searchIfInR1(array('alize')) !== false) {
+ $this->word = preg_replace('#(alize)$#u', 'al', $this->word);
+ return true;
+ }
+
+ // icate iciti ical: replace by ic
+ if ($this->searchIfInR1(array('icate', 'iciti', 'ical')) !== false) {
+ $this->word = preg_replace('#(icate|iciti|ical)$#u', 'ic', $this->word);
+ return true;
+ }
+
+ // ful ness: delete
+ if ( ($position = $this->searchIfInR1(array('ful', 'ness'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // ative*: delete if in R2
+ if ( (($position = $this->searchIfInR1(array('ative'))) !== false) && ($this->inR2($position)) ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 4
+ * Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated.
+ */
+ public function step4()
+ {
+ // ement ance ence able ible ant ment ent ism ate iti ous ive ize al er ic
+ // delete
+ if ( ($position = $this->search(array(
+ 'ance', 'ence', 'ement', 'able', 'ible', 'ant', 'ment', 'ent', 'ism',
+ 'ate', 'iti', 'ous', 'ive', 'ize', 'al', 'er', 'ic'))) !== false) {
+
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // ion
+ // delete if preceded by s or t
+ if ( ($position = $this->searchIfInR2(array('ion'))) !== false) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if ($letter == 's' || $letter == 't') {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 5: *
+ * Search for the the following suffixes, and, if found, perform the action indicated.
+ */
+ public function step5()
+ {
+ // e
+ // delete if in R2, or in R1 and not preceded by a short syllable
+ if ( ($position = $this->search(array('e'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ } elseif ($this->inR1($position)) {
+ if ( (! $this->searchShortSyllabe(-4, 3)) && (! $this->searchShortSyllabe(-3, 2)) ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+
+ return true;
+ }
+
+ // l
+ // delete if in R2 and preceded by l
+ if ( ($position = $this->searchIfInR2(array('l'))) !== false) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if ($letter == 'l') {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ public function finish()
+ {
+ $this->word = Utf8::str_replace('Y', 'y', $this->word);
+ }
+
+ private function exceptionR1()
+ {
+ if (Utf8::strpos($this->word, 'gener') === 0) {
+ $this->r1 = Utf8::substr($this->word, 5);
+ $this->r1Index = 5;
+
+ } elseif (Utf8::strpos($this->word, 'commun') === 0) {
+ $this->r1 = Utf8::substr($this->word, 6);
+ $this->r1Index = 6;
+
+ } elseif (Utf8::strpos($this->word, 'arsen') === 0) {
+ $this->r1 = Utf8::substr($this->word, 5);
+ $this->r1Index = 5;
+ }
+ }
+
+ /**
+ * 1/ Stem certain special words as follows,
+ * 2/ If one of the following is found, leave it invariant,
+ */
+ private function exception1()
+ {
+ $exceptions = array(
+ 'skis' => 'ski',
+ 'skies' => 'sky',
+ 'dying' => 'die',
+ 'lying' => 'lie',
+ 'tying' => 'tie',
+ 'idly' => 'idl',
+ 'gently' => 'gentl',
+ 'ugly' => 'ugli',
+ 'early' => 'earli',
+ 'only' => 'onli',
+ 'singly' => 'singl',
+ // invariants
+ 'sky' => 'sky',
+ 'news' => 'news',
+ 'howe' => 'howe',
+ 'atlas' => 'atlas',
+ 'cosmos' => 'cosmos',
+ 'bias' => 'bias',
+ 'andes' => 'andes'
+ );
+
+ if (isset($exceptions[$this->word])) {
+ return $exceptions[$this->word];
+ }
+
+ return null;
+ }
+
+ /**
+ * Following step 1a, leave the following invariant,
+ */
+ private function exception2()
+ {
+ $exceptions = array(
+ 'inning' => 'inning',
+ 'outing' => 'outing',
+ 'canning' => 'canning',
+ 'herring' => 'herring',
+ 'earring' => 'earring',
+ 'proceed' => 'proceed',
+ 'exceed' => 'exceed',
+ 'succeed' => 'succeed'
+ );
+
+ if (isset($exceptions[$this->word])) {
+ return $exceptions[$this->word];
+ }
+
+ return null;
+ }
+
+ /**
+ * A word is called short if it ends in a short syllable, and if R1 is null.
+ * Note : R1 not really null, but the word at this state must be smaller than r1 index
+ *
+ * @return boolean
+ */
+ private function isShort()
+ {
+ $length = Utf8::strlen($this->word);
+ return ( ($this->searchShortSyllabe(-3, 3) || $this->searchShortSyllabe(-2, 2)) && ($length == $this->r1Index) );
+ }
+
+ /**
+ * Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel,
+ * or * (b) a vowel at the beginning of the word followed by a non-vowel.
+ *
+ * So rap, trap, entrap end with a short syllable, and ow, on, at are classed as short syllables.
+ * But uproot, bestow, disturb do not end with a short syllable.
+ */
+ private function searchShortSyllabe($from, $nbLetters)
+ {
+ $length = Utf8::strlen($this->word);
+
+ if ($from < 0) {
+ $from = $length + $from;
+ }
+ if ($from < 0) {
+ $from = 0;
+ }
+
+ // (a) is just for beginning of the word
+ if ( ($nbLetters == 2) && ($from != 0) ) {
+ return false;
+ }
+
+ $first = Utf8::substr($this->word, $from, 1);
+ $second = Utf8::substr($this->word, ($from+1), 1);
+
+ if ($nbLetters == 2) {
+ if ( (in_array($first, self::$vowels)) && (!in_array($second, self::$vowels)) ) {
+ return true;
+ }
+ }
+
+ $third = Utf8::substr($this->word, ($from+2), 1);
+
+ if ( (!in_array($first, self::$vowels)) && (in_array($second, self::$vowels))
+ && (!in_array($third, array_merge(self::$vowels, array('x', 'Y', 'w'))))) {
+ return true;
+ }
+
+ return false;
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/French.php b/libraries/vendor/wamania/php-stemmer/src/French.php
new file mode 100644
index 00000000000..f839e844850
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/French.php
@@ -0,0 +1,530 @@
+word = Utf8::strtolower($word);
+
+ $this->plainVowels = implode('', self::$vowels);
+
+ $this->step0();
+
+ $this->rv();
+ $this->r1();
+ $this->r2();
+
+ // to know if step1, 2a or 2b have altered the word
+ $this->originalWord = $this->word;
+
+ $nextStep = $this->step1();
+
+ // Do step 2a if either no ending was removed by step 1, or if one of endings amment, emment, ment, ments was found.
+ if ( ($nextStep == 2) || ($this->originalWord == $this->word) ) {
+ $modified = $this->step2a();
+ if (!$modified) {
+ $this->step2b();
+ }
+ }
+
+ if ($this->word != $this->originalWord) {
+ $this->step3();
+
+ } else {
+ $this->step4();
+ }
+
+ $this->step5();
+ $this->step6();
+ $this->finish();
+
+ return $this->word;
+ }
+
+
+
+ /**
+ * Assume the word is in lower case.
+ * Then put into upper case u or i preceded and followed by a vowel, and y preceded or followed by a vowel.
+ * u after q is also put into upper case. For example,
+ * jouer -> joUer
+ * ennuie -> ennuIe
+ * yeux -> Yeux
+ * quand -> qUand
+ */
+ private function step0()
+ {
+ $this->word = preg_replace('#([q])u#u', '$1U', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word);
+ $this->word = preg_replace('#y(['.$this->plainVowels.'])#u', 'Y$1', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
+ }
+
+ /**
+ * Step 1
+ * Search for the longest among the following suffixes, and perform the action indicated.
+ *
+ * @return integer Next step number
+ */
+ private function step1()
+ {
+ // ance iqUe isme able iste eux ances iqUes ismes ables istes
+ // delete if in R2
+ if ( ($position = $this->search(array('ances', 'iqUes', 'ismes', 'ables', 'istes', 'ance', 'iqUe','isme', 'able', 'iste', 'eux'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return 3;
+ }
+
+ // atrice ateur ation atrices ateurs ations
+ // delete if in R2
+ // if preceded by ic, delete if in R2, else replace by iqU
+ if ( ($position = $this->search(array('atrices', 'ateurs', 'ations', 'atrice', 'ateur', 'ation'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position2 = $this->searchIfInR2(array('ic'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ } else {
+ $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
+ }
+ }
+
+ return 3;
+ }
+
+ // logie logies
+ // replace with log if in R2
+ if ( ($position = $this->search(array('logies', 'logie'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(logies|logie)$#u', 'log', $this->word);
+ }
+ return 3;
+ }
+
+ // usion ution usions utions
+ // replace with u if in R2
+ if ( ($position = $this->search(array('usions', 'utions', 'usion', 'ution'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(usion|ution|usions|utions)$#u', 'u', $this->word);
+ }
+ return 3;
+ }
+
+ // ence ences
+ // replace with ent if in R2
+ if ( ($position = $this->search(array('ences', 'ence'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(ence|ences)$#u', 'ent', $this->word);
+ }
+ return 3;
+ }
+
+ // issement issements
+ // delete if in R1 and preceded by a non-vowel
+ if ( ($position = $this->search(array('issements', 'issement'))) != false) {
+ if ($this->inR1($position)) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+ if (! in_array($letter, self::$vowels)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+ return 3;
+ }
+
+ // ement ements
+ // delete if in RV
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ // if preceded by eus, delete if in R2, else replace by eux if in R1, otherwise,
+ // if preceded by abl or iqU, delete if in R2, otherwise,
+ // if preceded by ièr or Ièr, replace by i if in RV
+ if ( ($position = $this->search(array('ements', 'ement'))) !== false) {
+
+ // delete if in RV
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ if ( ($position = $this->searchIfInR2(array('iv'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+
+ // if preceded by eus, delete if in R2, else replace by eux if in R1, otherwise,
+ } elseif ( ($position = $this->search(array('eus'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ } elseif ($this->inR1($position)) {
+ $this->word = preg_replace('#(eus)$#u', 'eux', $this->word);
+ }
+
+ // if preceded by abl or iqU, delete if in R2, otherwise,
+ } elseif ( ($position = $this->searchIfInR2(array('abl', 'iqU'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ // if preceded by ièr or Ièr, replace by i if in RV
+ } elseif ( ($position = $this->searchIfInRv(array('ièr', 'Ièr'))) !== false) {
+ $this->word = preg_replace('#(ièr|Ièr)$#u', 'i', $this->word);
+ }
+ return 3;
+ }
+
+ // ité ités
+ // delete if in R2
+ // if preceded by abil, delete if in R2, else replace by abl, otherwise,
+ // if preceded by ic, delete if in R2, else replace by iqU, otherwise,
+ // if preceded by iv, delete if in R2
+ if ( ($position = $this->search(array('ités', 'ité'))) !== false) {
+
+ // delete if in R2
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by abil, delete if in R2, else replace by abl, otherwise,
+ if ( ($position = $this->search(array('abil'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ } else {
+ $this->word = preg_replace('#(abil)$#u', 'abl', $this->word);
+ }
+
+ // if preceded by ic, delete if in R2, else replace by iqU, otherwise,
+ } elseif ( ($position = $this->search(array('ic'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ } else {
+ $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
+ }
+
+ // if preceded by iv, delete if in R2
+ } elseif ( ($position = $this->searchIfInR2(array('iv'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ return 3;
+ }
+
+ // if ive ifs ives
+ // delete if in R2
+ // if preceded by at, delete if in R2 (and if further preceded by ic, delete if in R2, else replace by iqU)
+ if ( ($position = $this->search(array('ifs', 'ives', 'if', 'ive'))) !== false) {
+
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ if ( ($position = $this->searchIfInR2(array('at'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position2 = $this->search(array('ic'))) !== false) {
+ if ($this->inR2($position2)) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ } else {
+ $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
+ }
+ }
+ }
+
+ return 3;
+ }
+
+ // eaux
+ // replace with eau
+ if ( ($position = $this->search(array('eaux'))) !== false) {
+ $this->word = preg_replace('#(eaux)$#u', 'eau', $this->word);
+ return 3;
+ }
+
+ // aux
+ // replace with al if in R1
+ if ( ($position = $this->search(array('aux'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(aux)$#u', 'al', $this->word);
+ }
+ return 3;
+ }
+
+ // euse euses
+ // delete if in R2, else replace by eux if in R1
+ if ( ($position = $this->search(array('euses', 'euse'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ } elseif ($this->inR1($position)) {
+ $this->word = preg_replace('#(euses|euse)$#u', 'eux', $this->word);
+ //return 3;
+ }
+ return 3;
+ }
+
+ // amment
+ // replace with ant if in RV
+ if ( ($position = $this->search(array('amment'))) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = preg_replace('#(amment)$#u', 'ant', $this->word);
+ }
+ return 2;
+ }
+
+ // emment
+ // replace with ent if in RV
+ if ( ($position = $this->search(array('emment'))) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = preg_replace('#(emment)$#u', 'ent', $this->word);
+ }
+ return 2;
+ }
+
+ // ment ments
+ // delete if preceded by a vowel in RV
+ if ( ($position = $this->search(array('ments', 'ment'))) != false) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+ if ( $this->inRv($before) && (in_array($letter, self::$vowels)) ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ return 2;
+ }
+
+ return 2;
+ }
+
+ /**
+ * Step 2a: Verb suffixes beginning i
+ * In steps 2a and 2b all tests are confined to the RV region.
+ * Search for the longest among the following suffixes and if found, delete if preceded by a non-vowel.
+ * îmes ît îtes i ie ies ir ira irai iraIent irais irait iras irent irez iriez
+ * irions irons iront is issaIent issais issait issant issante issantes issants isse
+ * issent isses issez issiez issions issons it
+ * (Note that the non-vowel itself must also be in RV.)
+ */
+ private function step2a()
+ {
+ if ( ($position = $this->searchIfInRv(array(
+ 'îmes', 'îtes', 'ît', 'ies', 'ie', 'iraIent', 'irais', 'irait', 'irai', 'iras', 'ira', 'irent', 'irez', 'iriez',
+ 'irions', 'irons', 'iront', 'ir', 'issaIent', 'issais', 'issait', 'issant', 'issantes', 'issante', 'issants',
+ 'issent', 'isses', 'issez', 'isse', 'issiez', 'issions', 'issons', 'is', 'it', 'i'))) !== false) {
+
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+ if ( $this->inRv($before) && (!in_array($letter, self::$vowels)) ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Do step 2b if step 2a was done, but failed to remove a suffix.
+ * Step 2b: Other verb suffixes
+ */
+ private function step2b()
+ {
+ // é ée ées és èrent er era erai eraIent erais erait eras erez eriez erions erons eront ez iez
+ // delete
+ if ( ($position = $this->searchIfInRv(array(
+ 'ées', 'èrent', 'erais', 'erait', 'erai', 'eraIent', 'eras', 'erez', 'eriez',
+ 'erions', 'erons', 'eront', 'era', 'er', 'iez', 'ez','és', 'ée', 'é'))) !== false) {
+
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ return true;
+ }
+
+ // âmes ât âtes a ai aIent ais ait ant ante antes ants as asse assent asses assiez assions
+ // delete
+ // if preceded by e, delete
+ if ( ($position = $this->searchIfInRv(array(
+ 'âmes', 'âtes', 'ât', 'aIent', 'ais', 'ait', 'antes', 'ante', 'ants', 'ant',
+ 'assent', 'asses', 'assiez', 'assions', 'asse', 'as', 'ai', 'a'))) !== false) {
+
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+ if ( $this->inRv($before) && ($letter == 'e') ) {
+ $this->word = Utf8::substr($this->word, 0, $before);
+
+ } else {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ return true;
+ }
+
+ // ions
+ // delete if in R2
+ if ( ($position = $this->searchIfInRv(array('ions'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 3: Replace final Y with i or final ç with c
+ */
+ private function step3()
+ {
+ $this->word = preg_replace('#(Y)$#u', 'i', $this->word);
+ $this->word = preg_replace('#(ç)$#u', 'c', $this->word);
+ }
+
+ /**
+ * Step 4: Residual suffix
+ */
+ private function step4()
+ {
+ //If the word ends s, not preceded by a, i, o, u, è or s, delete it.
+ if (preg_match('#[^aiouès]s$#', $this->word)) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+
+ // In the rest of step 4, all tests are confined to the RV region.
+ // ion
+ // delete if in R2 and preceded by s or t
+ if ( (($position = $this->searchIfInRv(array('ion'))) !== false) && ($this->inR2($position)) ) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+ if ( $this->inRv($before) && (($letter == 's') || ($letter == 't')) ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // ier ière Ier Ière
+ // replace with i
+ if ( ($this->searchIfInRv(array('ier', 'ière', 'Ier', 'Ière'))) !== false) {
+ $this->word = preg_replace('#(ier|ière|Ier|Ière)$#u', 'i', $this->word);
+ return true;
+ }
+
+ // e
+ // delete
+ if ( ($this->searchIfInRv(array('e'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ return true;
+ }
+
+ // ë
+ // if preceded by gu, delete
+ if ( ($position = $this->searchIfInRv(array('guë'))) !== false) {
+ if ($this->inRv($position+2)) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 5: Undouble
+ * If the word ends enn, onn, ett, ell or eill, delete the last letter
+ */
+ private function step5()
+ {
+ if ($this->search(array('enn', 'onn', 'ett', 'ell', 'eill')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+
+ /**
+ * Step 6: Un-accent
+ * If the words ends é or è followed by at least one non-vowel, remove the accent from the e.
+ */
+ private function step6()
+ {
+ $this->word = preg_replace('#(é|è)([^'.$this->plainVowels.']+)$#u', 'e$2', $this->word);
+ }
+
+ /**
+ * And finally:
+ * Turn any remaining I, U and Y letters in the word back into lower case.
+ */
+ private function finish()
+ {
+ $this->word = Utf8::str_replace(array('I','U','Y'), array('i', 'u', 'y'), $this->word);
+ }
+
+ /**
+ * If the word begins with two vowels, RV is the region after the third letter,
+ * otherwise the region after the first vowel not at the beginning of the word,
+ * or the end of the word if these positions cannot be found.
+ * (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
+ */
+ protected function rv()
+ {
+ $length = Utf8::strlen($this->word);
+
+ $this->rv = '';
+ $this->rvIndex = $length;
+
+ if ($length < 3) {
+ return true;
+ }
+
+ // If the word begins with two vowels, RV is the region after the third letter
+ $first = Utf8::substr($this->word, 0, 1);
+ $second = Utf8::substr($this->word, 1, 1);
+
+ if ( (in_array($first, self::$vowels)) && (in_array($second, self::$vowels)) ) {
+ $this->rv = Utf8::substr($this->word, 3);
+ $this->rvIndex = 3;
+ return true;
+ }
+
+ // (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
+ $begin3 = Utf8::substr($this->word, 0, 3);
+ if (in_array($begin3, array('par', 'col', 'tap'))) {
+ $this->rv = Utf8::substr($this->word, 3);
+ $this->rvIndex = 3;
+ return true;
+ }
+
+ // otherwise the region after the first vowel not at the beginning of the word,
+ for ($i=1; $i<$length; $i++) {
+ $letter = Utf8::substr($this->word, $i, 1);
+ if (in_array($letter, self::$vowels)) {
+ $this->rv = Utf8::substr($this->word, ($i + 1));
+ $this->rvIndex = $i + 1;
+ return true;
+ }
+ }
+
+ return false;
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/German.php b/libraries/vendor/wamania/php-stemmer/src/German.php
new file mode 100644
index 00000000000..949344241c4
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/German.php
@@ -0,0 +1,213 @@
+plainVowels = implode('', self::$vowels);
+
+ $this->word = Utf8::strtolower($word);
+
+ // First, replace ß by ss
+ $this->word = Utf8::str_replace('ß', 'ss', $this->word);
+
+ // put u and y between vowels into upper case
+ $this->word = preg_replace('#(['.$this->plainVowels.'])y(['.$this->plainVowels.'])#u', '$1Y$2', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
+
+ // R1 and R2 are first set up in the standard way
+ $this->r1();
+ $this->r2();
+
+ // but then R1 is adjusted so that the region before it contains at least 3 letters.
+ if ($this->r1Index < 3) {
+ $this->r1Index = 3;
+ $this->r1 = Utf8::substr($this->word, 3);
+ }
+
+ $this->step1();
+ $this->step2();
+ $this->step3();
+ $this->finish();
+
+ return $this->word;
+ }
+
+ /**
+ * Step 1
+ */
+ public function step1()
+ {
+ // delete if in R1
+ if ( ($position = $this->search(array('em', 'ern', 'er'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // delete if in R1
+ if ( ($position = $this->search(array('es', 'en', 'e'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ //If an ending of group (b) is deleted, and the ending is preceded by niss, delete the final s
+ if ($this->search(array('niss')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+ return true;
+ }
+
+ // s (preceded by a valid s-ending)
+ if ( ($position = $this->search(array('s'))) !== false) {
+ if ($this->inR1($position)) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if (in_array($letter, self::$sEndings)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2
+ */
+ public function step2()
+ {
+ // en er est
+ // delete if in R1
+ if ( ($position = $this->search(array('en', 'er', 'est'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // st (preceded by a valid st-ending, itself preceded by at least 3 letters)
+ // delete if in R1
+ if ( ($position = $this->search(array('st'))) !== false) {
+ if ($this->inR1($position)) {
+ $before = $position - 1;
+ if ($before >= 3) {
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if (in_array($letter, self::$stEndings)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Step 3: d-suffixes
+ */
+ public function step3()
+ {
+ // end ung
+ // delete if in R2
+ // if preceded by ig, delete if in R2 and not preceded by e
+ if ( ($position = $this->search(array('end', 'ung'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ if ( ($position2 = $this->search(array('ig'))) !== false) {
+ $before = $position2 - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if ( ($this->inR2($position2)) && ($letter != 'e') ) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ }
+ return true;
+ }
+
+ // ig ik isch
+ // delete if in R2 and not preceded by e
+ if ( ($position = $this->search(array('ig', 'ik', 'isch'))) !== false) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if ( ($this->inR2($position)) && ($letter != 'e') ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // lich heit
+ // delete if in R2
+ // if preceded by er or en, delete if in R1
+ if ( ($position = $this->search(array('lich', 'heit'))) != false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ if ( ($position2 = $this->search(array('er', 'en'))) !== false) {
+ if ($this->inR1($position2)) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ }
+ return true;
+ }
+
+ // keit
+ // delete if in R2
+ // if preceded by lich or ig, delete if in R2
+ if ( ($position = $this->search(array('keit'))) != false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ if ( ($position2 = $this->search(array('lich', 'ig'))) !== false) {
+ if ($this->inR2($position2)) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Finally
+ */
+ public function finish()
+ {
+ // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
+ $this->word = Utf8::str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Italian.php b/libraries/vendor/wamania/php-stemmer/src/Italian.php
new file mode 100644
index 00000000000..62107407b1a
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Italian.php
@@ -0,0 +1,286 @@
+plainVowels = implode('', self::$vowels);
+
+ $this->word = Utf8::strtolower($word);
+
+ // First, replace all acute accents by grave accents.
+ $this->word = Utf8::str_replace(array('á', 'é', 'í', 'ó', 'ú'), array('à', 'è', 'ì', 'ò', 'ù'), $this->word);
+
+ //And, as in French, put u after q, and u, i between vowels into upper case. (See note on vowel marking.) The vowels are then
+ $this->word = preg_replace('#([q])u#u', '$1U', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
+
+ $this->rv();
+ $this->r1();
+ $this->r2();
+
+ $this->step0();
+
+ $word = $this->word;
+ $this->step1();
+
+ //Do step 2 if no ending was removed by step 1.
+ if ($word == $this->word) {
+ $this->step2();
+ }
+
+ $this->step3a();
+ $this->step3b();
+ $this->finish();
+
+ return $this->word;
+ }
+
+ /**
+ * Step 0: Attached pronoun
+ */
+ private function step0()
+ {
+ // Search for the longest among the following suffixes
+ if ( ($position = $this->search(array(
+ 'gliela', 'gliele', 'glieli', 'glielo', 'gliene',
+ 'sene', 'mela', 'mele', 'meli', 'melo', 'mene', 'tela', 'tele', 'teli', 'telo', 'tene', 'cela',
+ 'cele', 'celi', 'celo', 'cene', 'vela', 'vele', 'veli', 'velo', 'vene',
+ 'gli', 'la', 'le', 'li', 'lo', 'mi', 'ne', 'si', 'ti', 'vi', 'ci'))) !== false) {
+
+ $suffixe = Utf8::substr($this->word, $position);
+
+ // following one of (in RV)
+ // a
+ $a = array('ando', 'endo');
+ $a = array_map(function($item) use ($suffixe) {
+ return $item . $suffixe;
+ }, $a);
+ // In case of (a) the suffix is deleted
+ if ($this->searchIfInRv($a) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ //b
+ $b = array('ar', 'er', 'ir');
+ $b = array_map(function($item) use ($suffixe) {
+ return $item . $suffixe;
+ }, $b);
+ // in case (b) it is replace by e
+ if ($this->searchIfInRv($b) !== false) {
+ $this->word = preg_replace('#('.$suffixe.')$#u', 'e', $this->word);
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 1: Standard suffix removal
+ */
+ private function step1()
+ {
+ // amente
+ // delete if in R1
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ // if preceded by os, ic or abil, delete if in R2
+ if ( ($position = $this->search(array('amente'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position3);
+ }
+
+ // if preceded by os, ic or ad, delete if in R2
+ } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'abil'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position4);
+ }
+ return true;
+ }
+
+ // delete if in R2
+ if ( ($position = $this->search(array(
+ 'ibili', 'atrice', 'abili', 'abile', 'ibile', 'atrici', 'mente',
+ 'anza', 'anze', 'iche', 'ichi', 'ismo', 'ismi', 'ista', 'iste', 'isti', 'istà', 'istè', 'istì', 'ante', 'anti',
+ 'ico', 'ici', 'ica', 'ice', 'oso', 'osi', 'osa', 'ose'
+ ))) !== false) {
+
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // azione azioni atore atori
+ // delete if in R2
+ // if preceded by ic, delete if in R2
+ if ( ($position = $this->search(array('azione', 'azioni', 'atore', 'atori'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position2 = $this->search(array('ic'))) !== false) {
+ if ($this->inR2($position2)) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ }
+ }
+ return true;
+ }
+
+ // logia logie
+ // replace with log if in R2
+ if ( ($position = $this->search(array('logia', 'logie'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(logia|logie)$#u', 'log', $this->word);
+ }
+ return true;
+ }
+
+ // uzione uzioni usione usioni
+ // replace with u if in R2
+ if ( ($position = $this->search(array('uzione', 'uzioni', 'usione', 'usioni'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(uzione|uzioni|usione|usioni)$#u', 'u', $this->word);
+ }
+ return true;
+ }
+
+ // enza enze
+ // replace with ente if in R2
+ if ( ($position = $this->search(array('enza', 'enze'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(enza|enze)$#u', 'ente', $this->word);
+ }
+ return true;
+ }
+
+ // amento amenti imento imenti
+ // delete if in RV
+ if ( ($position = $this->search(array('amento', 'amenti', 'imento', 'imenti'))) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // ità
+ // delete if in R2
+ // if preceded by abil, ic or iv, delete if in R2
+ if ( ($position = $this->search(array('ità'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ // ivo ivi iva ive
+ // delete if in R2
+ // if preceded by at, delete if in R2 (and if further preceded by ic, delete if in R2)
+ if ( ($position = $this->search(array('ivo', 'ivi', 'iva', 'ive'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ if ( ($position3 = $this->searchIfInR2(array('ic'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position3);
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2: Verb suffixes
+ * Search for the longest among the following suffixes in RV, and if found, delete.
+ */
+ private function step2()
+ {
+ if ( ($position = $this->searchIfInRv(array(
+ 'assimo', 'assero', 'eranno', 'erebbero', 'erebbe', 'eremmo', 'ereste', 'eresti', 'essero', 'iranno', 'irebbero', 'irebbe', 'iremmo',
+ 'iscano', 'ireste', 'iresti', 'iscono', 'issero',
+ 'avamo', 'arono', 'avano', 'avate', 'eremo', 'erete', 'erono', 'evamo', 'evano', 'evate', 'ivamo', 'ivano', 'ivate', 'iremo', 'irete', 'irono',
+ 'ammo', 'ando', 'asse', 'assi', 'emmo', 'enda', 'ende', 'endi', 'endo', 'erai', 'erei', 'Yamo', 'iamo', 'immo', 'irà', 'irai', 'irei',
+ 'isca', 'isce', 'isci', 'isco',
+ 'ano', 'are', 'ata', 'ate', 'ati', 'ato', 'ava', 'avi', 'avo', 'erà', 'ere', 'erò', 'ete', 'eva',
+ 'evi', 'evo', 'ire', 'ita', 'ite', 'iti', 'ito', 'iva', 'ivi', 'ivo', 'ono', 'uta', 'ute', 'uti', 'uto', 'irò', 'ar', 'ir'))) !== false) {
+
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+
+ /**
+ * Step 3a
+ * Delete a final a, e, i, o, à, è, ì or ò if it is in RV, and a preceding i if it is in RV
+ */
+ private function step3a()
+ {
+ if ($this->searchIfInRv(array('a', 'e', 'i', 'o', 'à', 'è', 'ì', 'ò')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+
+ if ($this->searchIfInRv(array('i')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Step 3b
+ * Replace final ch (or gh) with c (or g) if in RV (crocch -> crocc)
+ */
+ private function step3b()
+ {
+ if ($this->searchIfInRv(array('ch')) !== false) {
+ $this->word = preg_replace('#(ch)$#u', 'c', $this->word);
+
+ } elseif ($this->searchIfInRv(array('gh')) !== false) {
+ $this->word = preg_replace('#(gh)$#u', 'g', $this->word);
+ }
+ }
+
+ /**
+ * Finally
+ * turn I and U back into lower case
+ */
+ private function finish()
+ {
+ $this->word = Utf8::str_replace(array('I', 'U'), array('i', 'u'), $this->word);
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Norwegian.php b/libraries/vendor/wamania/php-stemmer/src/Norwegian.php
new file mode 100644
index 00000000000..bf9d2322f9d
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Norwegian.php
@@ -0,0 +1,127 @@
+word = Utf8::strtolower($word);
+
+ // R2 is not used: R1 is defined in the same way as in the German stemmer
+ $this->r1();
+
+ // then R1 is adjusted so that the region before it contains at least 3 letters.
+ if ($this->r1Index < 3) {
+ $this->r1Index = 3;
+ $this->r1 = Utf8::substr($this->word, 3);
+ }
+
+ // Do each of steps 1, 2 3 and 4.
+ $this->step1();
+ $this->step2();
+ $this->step3();
+
+ return $this->word;
+ }
+
+ /**
+ * Define a valid s-ending as one of
+ * b c d f g h j l m n o p r t v y z,
+ * or k not preceded by a vowel
+ *
+ * @param string $ending
+ * @return boolean
+ */
+ private function hasValidSEnding($word)
+ {
+ $lastLetter = Utf8::substr($word, -1, 1);
+ if (in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z'))) {
+ return true;
+ }
+ if ($lastLetter == 'k') {
+ $beforeLetter = Utf8::substr($word, -2, 1);
+ if (!in_array($beforeLetter, self::$vowels)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Step 1
+ * Search for the longest among the following suffixes in R1, and perform the action indicated.
+ */
+ private function step1()
+ {
+ // erte ert
+ // replace with er
+ if ( ($position = $this->searchIfInR1(array('erte', 'ert'))) !== false) {
+ $this->word = preg_replace('#(erte|ert)$#u', 'er', $this->word);
+ return true;
+ }
+
+ // a e ede ande ende ane ene hetene en heten ar er heter as es edes endes enes hetenes ens hetens ers ets et het ast
+ // delete
+ if ( ($position = $this->searchIfInR1(array(
+ 'hetenes', 'hetene', 'hetens', 'heten', 'endes', 'heter', 'ande', 'ende', 'enes', 'edes', 'ede', 'ane',
+ 'ene', 'het', 'ers', 'ets', 'ast', 'ens', 'en', 'ar', 'er', 'as', 'es', 'et', 'a', 'e'
+ ))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // s
+ // delete if preceded by a valid s-ending
+ if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
+ $word = Utf8::substr($this->word, 0, $position);
+ if ($this->hasValidSEnding($word)) {
+ $this->word = $word;
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Step 2
+ * If the word ends dt or vt in R1, delete the t.
+ */
+ private function step2()
+ {
+ if ($this->searchIfInR1(array('dt', 'vt')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+
+ /**
+ * Step 3:
+ * Search for the longest among the following suffixes in R1, and if found, delete.
+ */
+ private function step3()
+ {
+ // leg eleg ig eig lig elig els lov elov slov hetslov
+ if ( ($position = $this->searchIfInR1(array(
+ 'hetslov', 'eleg', 'elov', 'slov', 'elig', 'eig', 'lig', 'els', 'lov', 'leg', 'ig'
+ ))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Portuguese.php b/libraries/vendor/wamania/php-stemmer/src/Portuguese.php
new file mode 100644
index 00000000000..7621b83a34a
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Portuguese.php
@@ -0,0 +1,280 @@
+word = Utf8::strtolower($word);
+
+ $this->word = Utf8::str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word);
+
+ $this->rv();
+ $this->r1();
+ $this->r2();
+
+ $word = $this->word;
+ $this->step1();
+
+ if ($word == $this->word) {
+ $this->step2();
+ }
+
+ if ($word != $this->word) {
+ $this->step3();
+ } else {
+ $this->step4();
+ }
+
+ $this->step5();
+ $this->finish();
+
+ return $this->word;
+ }
+
+ /**
+ * Step 1: Standard suffix removal
+ */
+ public function step1()
+ {
+ // delete if in R2
+ if ( ($position = $this->search(array(
+ 'amentos', 'imentos', 'adoras', 'adores', 'amento', 'imento', 'adora', 'istas', 'ismos', 'antes', 'ância',
+ 'ezas', 'eza', 'icos', 'icas', 'ismo', 'ável', 'ível', 'ista', 'oso',
+ 'osos', 'osas', 'osa', 'ico', 'ica', 'ador', 'aça~o', 'aço~es' , 'ante'))) !== false) {
+
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // logía logías
+ // replace with log if in R2
+ if ( ($position = $this->search(array('logías', 'logía'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word);
+ }
+ return true;
+ }
+
+ // ución uciones
+ // replace with u if in R2
+ if ( ($position = $this->search(array('uciones', 'ución'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word);
+ }
+ return true;
+ }
+
+ // ência ências
+ // replace with ente if in R2
+ if ( ($position = $this->search(array('ências', 'ência'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(ências|ência)$#u', 'ente', $this->word);
+ }
+ return true;
+ }
+
+ // amente
+ // delete if in R1
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ // if preceded by os, ic or ad, delete if in R2
+ if ( ($position = $this->search(array('amente'))) !== false) {
+
+ // delete if in R1
+ if ($this->inR1($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position3);
+ }
+
+ // if preceded by os, ic or ad, delete if in R2
+ } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position4);
+ }
+ return true;
+ }
+
+ // mente
+ // delete if in R2
+ // if preceded by ante, avel or ível, delete if in R2
+ if ( ($position = $this->search(array('mente'))) !== false) {
+
+ // delete if in R2
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by ante, avel or ível, delete if in R2
+ if ( ($position2 = $this->searchIfInR2(array('ante', 'avel', 'ível'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ // idade idades
+ // delete if in R2
+ // if preceded by abil, ic or iv, delete if in R2
+ if ( ($position = $this->search(array('idades', 'idade'))) !== false) {
+
+ // delete if in R2
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by abil, ic or iv, delete if in R2
+ if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ // iva ivo ivas ivos
+ // delete if in R2
+ // if preceded by at, delete if in R2
+ if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) !== false) {
+
+ // delete if in R2
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by at, delete if in R2
+ if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ // ira iras
+ // replace with ir if in RV and preceded by e
+ if ( ($position = $this->search(array('iras', 'ira'))) !== false) {
+
+ if ($this->inRv($position)) {
+ $before = $position -1;
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if ($letter == 'e') {
+ $this->word = preg_replace('#(iras|ira)$#u', 'ir', $this->word);
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2: Verb suffixes
+ * Search for the longest among the following suffixes in RV, and if found, delete.
+ */
+ public function step2()
+ {
+ if ( ($position = $this->searchIfInRv(array(
+ 'aríamos', 'eríamos', 'iríamos', 'ássemos', 'êssemos', 'íssemos',
+ 'aríeis', 'eríeis', 'iríeis', 'ásseis', 'ésseis', 'ísseis', 'áramos', 'éramos', 'íramos', 'ávamos',
+ 'aremos', 'eremos', 'iremos',
+ 'ariam', 'eriam', 'iriam', 'assem', 'essem', 'issem', 'arias', 'erias', 'irias', 'ardes', 'erdes', 'irdes',
+ 'asses', 'esses', 'isses', 'astes', 'estes', 'istes', 'áreis', 'areis', 'éreis', 'ereis', 'íreis', 'ireis',
+ 'áveis', 'íamos', 'armos', 'ermos', 'irmos',
+ 'aria', 'eria', 'iria', 'asse', 'esse', 'isse', 'aste', 'este', 'iste', 'arei', 'erei', 'irei', 'adas', 'idas',
+ 'aram', 'eram', 'iram', 'avam', 'arem', 'erem', 'irem', 'ando', 'endo', 'indo', 'ara~o', 'era~o', 'ira~o',
+ 'arás', 'aras', 'erás', 'eras', 'irás', 'avas', 'ares', 'eres', 'ires', 'íeis', 'ados', 'idos', 'ámos', 'amos',
+ 'emos', 'imos', 'iras',
+ 'ada', 'ida', 'ará', 'ara', 'erá', 'era', 'irá', 'ava', 'iam', 'ado', 'ido', 'ias', 'ais', 'eis', 'ira',
+ 'ia', 'ei', 'am', 'em', 'ar', 'er', 'ir', 'as', 'es', 'is', 'eu', 'iu', 'ou',
+ ))) !== false) {
+
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Step 3: d-suffixes
+ *
+ */
+ public function step3()
+ {
+ // Delete suffix i if in RV and preceded by c
+ if ($this->searchIfInRv(array('i')) !== false) {
+ $letter = Utf8::substr($this->word, -2, 1);
+
+ if ($letter == 'c') {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Step 4
+ */
+ public function step4()
+ {
+ // If the word ends with one of the suffixes "os a i o á í ó" in RV, delete it
+ if ( ($position = $this->searchIfInRv(array('os', 'a', 'i', 'o','á', 'í', 'ó'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Step 5
+ */
+ public function step5()
+ {
+ // If the word ends with one of "e é ê" in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV, delete the u (or i).
+ if ($this->searchIfInRv(array('e', 'é', 'ê')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+
+ if ( ($position2 = $this->search(array('gu', 'ci'))) !== false) {
+ if ($this->inRv(($position2+1))) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+ return true;
+ } else if ($this->search(array('ç')) !== false) {
+ $this->word = preg_replace('#(ç)$#u', 'c', $this->word);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Finally
+ */
+ public function finish()
+ {
+ // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
+ $this->word = Utf8::str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Romanian.php b/libraries/vendor/wamania/php-stemmer/src/Romanian.php
new file mode 100644
index 00000000000..b831ba437dd
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Romanian.php
@@ -0,0 +1,331 @@
+word = Utf8::strtolower($word);
+
+ $this->plainVowels = implode('', self::$vowels);
+
+ // First, i and u between vowels are put into upper case (so that they are treated as consonants).
+ $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
+ $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
+
+ $this->rv();
+ $this->r1();
+ $this->r2();
+
+ $this->step0();
+
+ $word1 = $this->word;
+ $word2 = $this->word;
+
+ do {
+ $word1 = $this->word;
+ $this->step1();
+ } while ($this->word != $word1);
+
+ $this->step2();
+
+ // Do step 3 if no suffix was removed either by step 1 or step 2.
+ if ($word2 == $this->word) {
+ $this->step3();
+ }
+
+ $this->step4();
+ $this->finish();
+
+ return $this->word;
+ }
+
+ /**
+ * Step 0: Removal of plurals (and other simplifications)
+ * Search for the longest among the following suffixes, and, if it is in R1, perform the action indicated.
+ * @return boolean
+ */
+ public function step0()
+ {
+ // ul ului
+ // delete
+ if ( ($position = $this->search(array('ul', 'ului'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // aua
+ // replace with a
+ if ( ($position = $this->search(array('aua'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(aua)$#u', 'a', $this->word);
+ }
+ return true;
+ }
+
+ // ea ele elor
+ // replace with e
+ if ( ($position = $this->search(array('ea', 'ele', 'elor'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(ea|ele|elor)$#u', 'e', $this->word);
+ }
+ return true;
+ }
+
+ // ii iua iei iile iilor ilor
+ // replace with i
+ if ( ($position = $this->search(array('ii', 'iua', 'iei', 'iile', 'iilor', 'ilor'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(ii|iua|iei|iile|iilor|ilor)$#u', 'i', $this->word);
+ }
+ return true;
+ }
+
+ // ile
+ // replace with i if not preceded by ab
+ if ( ($position = $this->search(array('ile'))) !== false) {
+ if ($this->inR1($position)) {
+ $before = Utf8::substr($this->word, ($position-2), 2);
+
+ if ($before != 'ab') {
+ $this->word = preg_replace('#(ile)$#u', 'i', $this->word);
+ }
+ }
+ return true;
+ }
+
+ // atei
+ // replace with at
+ if ( ($position = $this->search(array('atei'))) != false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(atei)$#u', 'at', $this->word);
+ }
+ return true;
+ }
+
+ // aţie aţia
+ // replace with aţi
+ if ( ($position = $this->search(array('aţie', 'aţia'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(aţie|aţia)$#u', 'aţi', $this->word);
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 1: Reduction of combining suffixes
+ * Search for the longest among the following suffixes, and, if it is in R1, preform the replacement action indicated.
+ * Then repeat this step until no replacement occurs.
+ * @return boolean
+ */
+ public function step1()
+ {
+ // abilitate abilitati abilităi abilităţi
+ // replace with abil
+ if ( ($position = $this->search(array('abilitate', 'abilitati', 'abilităi', 'abilităţi'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(abilitate|abilitati|abilităi|abilităţi)$#u', 'abil', $this->word);
+ }
+ return true;
+ }
+
+ // ibilitate
+ // replace with ibil
+ if ( ($position = $this->search(array('ibilitate'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(ibilitate)$#u', 'ibil', $this->word);
+ }
+ return true;
+ }
+
+ // ivitate ivitati ivităi ivităţi
+ // replace with iv
+ if ( ($position = $this->search(array('ivitate', 'ivitati', 'ivităi', 'ivităţi'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(ivitate|ivitati|ivităi|ivităţi)$#u', 'iv', $this->word);
+ }
+ return true;
+ }
+
+ // icitate icitati icităi icităţi icator icatori iciv iciva icive icivi icivă ical icala icale icali icală
+ // replace with ic
+ if ( ($position = $this->search(array(
+ 'icitate', 'icitati', 'icităi', 'icităţi', 'icatori', 'icator', 'iciva',
+ 'icive', 'icivi', 'icivă', 'icala', 'icale', 'icali', 'icală', 'iciv', 'ical'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(icitate|icitati|icităi|icităţi|cator|icatori|iciva|icive|icivi|icivă|icala|icale|icali|icală|ical|iciv)$#u', 'ic', $this->word);
+ }
+ return true;
+ }
+
+ // ativ ativa ative ativi ativă aţiune atoare ator atori ătoare ător ători
+ // replace with at
+ if ( ($position = $this->search(array('ativa', 'ative', 'ativi', 'ativă', 'ativ', 'aţiune', 'atoare', 'atori', 'ătoare', 'ători', 'ător', 'ator'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(ativa|ative|ativi|ativă|ativ|aţiune|atoare|atori|ătoare|ători|ător|ator)$#u', 'at', $this->word);
+ }
+ return true;
+ }
+
+ // itiv itiva itive itivi itivă iţiune itoare itor itori
+ // replace with it
+ if ( ($position = $this->search(array('itiva', 'itive', 'itivi', 'itivă', 'itiv', 'iţiune', 'itoare', 'itori', 'itor'))) !== false) {
+ if ($this->inR1($position)) {
+ $this->word = preg_replace('#(itiva|itive|itivi|itivă|itiv|iţiune|itoare|itori|itor)$#u', 'it', $this->word);
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2: Removal of 'standard' suffixes
+ * Search for the longest among the following suffixes, and, if it is in R2, perform the action indicated.
+ * @return boolean
+ */
+ public function step2()
+ {
+ // atori itate itati, ităţi, abila abile abili abilă, ibila ibile ibili ibilă
+ // anta, ante, anti, antă, ator, ibil, oasa oasă oase, ităi, abil
+ // osi oşi ant ici ică iva ive ivi ivă ata ată ati ate, ata ată ati ate uta ută uti ute, ita ită iti ite ica ice
+ // at, os, iv, ut, it, ic
+ // delete
+ if ( ($position = $this->search(array(
+ 'atori', 'itate', 'itati', 'ităţi', 'abila', 'abile', 'abili', 'abilă', 'ibila', 'ibile', 'ibili', 'ibilă',
+ 'anta', 'ante', 'anti', 'antă', 'ator', 'ibil', 'oasa', 'oasă', 'oase', 'ităi', 'abil',
+ 'osi', 'oşi', 'ant', 'ici', 'ică', 'iva', 'ive', 'ivi', 'ivă', 'ata', 'ată', 'ati', 'ate', 'ata', 'ată',
+ 'ati', 'ate', 'uta', 'ută', 'uti', 'ute', 'ita', 'ită', 'iti', 'ite', 'ica', 'ice',
+ 'at', 'os', 'iv', 'ut', 'it', 'ic'
+ ))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // iune iuni
+ // delete if preceded by ţ, and replace the ţ by t.
+ if ( ($position = $this->search(array('iune', 'iuni'))) !== false) {
+ if ($this->inR2($position)) {
+ $before = $position - 1;
+ $letter = Utf8::substr($this->word, $before, 1);
+ if ($letter == 'ţ') {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ $this->word = preg_replace('#(ţ)$#u', 't', $this->word);
+ }
+ }
+ return true;
+ }
+
+ // ism isme ist ista iste isti istă işti
+ // replace with ist
+ if ( ($position = $this->search(array('isme', 'ism', 'ista', 'iste', 'isti', 'istă', 'işti', 'ist'))) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(isme|ism|ista|iste|isti|istă|işti|ist)$#u', 'ist', $this->word);
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 3: Removal of verb suffixes
+ * Do step 3 if no suffix was removed either by step 1 or step 2.
+ * @return boolean
+ */
+ public function step3()
+ {
+ // are ere ire âre ind ând indu ându eze ească ez ezi ează esc eşti
+ // eşte ăsc ăşti ăşte am ai au eam eai ea eaţi eau iam iai ia iaţi
+ // iau ui aşi arăm arăţi ară uşi urăm urăţi ură işi irăm irăţi iră âi
+ // âşi ârăm ârăţi âră asem aseşi ase aserăm aserăţi aseră isem iseşi ise
+ // iserăm iserăţi iseră âsem âseşi âse âserăm âserăţi âseră usem useşi use userăm userăţi useră
+ // delete if preceded in RV by a consonant or u
+ if ( ($position = $this->searchIfInRv(array(
+ 'userăţi', 'iserăţi', 'âserăţi', 'aserăţi',
+ 'userăm', 'iserăm', 'âserăm', 'aserăm',
+ 'iseră', 'âseşi', 'useră', 'âseră', 'useşi', 'iseşi', 'aseră', 'aseşi', 'ârăţi', 'irăţi', 'urăţi', 'arăţi', 'ească',
+ 'usem', 'âsem', 'isem', 'asem', 'ârăm', 'urăm', 'irăm', 'arăm', 'iaţi', 'eaţi', 'ăşte', 'ăşti', 'eşte', 'eşti', 'ează', 'ându', 'indu',
+ 'âse', 'use', 'ise', 'ase', 'âră', 'iră', 'işi', 'ură', 'uşi', 'ară', 'aşi', 'âşi', 'iau', 'iai', 'iam', 'eau', 'eai', 'eam', 'ăsc',
+ 'are', 'ere', 'ire', 'âre', 'ind', 'ând', 'eze', 'ezi', 'esc',
+ 'âi', 'ui', 'ia', 'ea', 'au', 'ai', 'am', 'ez'
+ ))) !== false) {
+ if ($this->inRv($position)) {
+ $before = $position - 1;
+ if ($this->inRv($before)) {
+ $letter = Utf8::substr($this->word, $before, 1);
+
+ if ( (!in_array($letter, self::$vowels)) || ($letter == 'u') ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+ }
+ return true;
+ }
+
+
+
+ // ăm aţi em eţi im iţi âm âţi seşi serăm serăţi seră sei se sesem seseşi sese seserăm seserăţi seseră
+ // delete
+ if ( ($position = $this->searchIfInRv(array(
+ 'seserăm', 'seserăţi', 'seseră', 'seseşi', 'sesem', 'serăţi', 'serăm', 'seşi', 'sese', 'seră',
+ 'aţi', 'eţi', 'iţi', 'âţi', 'sei', 'se', 'ăm', 'âm', 'em', 'im'
+ ))) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+ }
+
+ /**
+ * Step 4: Removal of final vowel
+ */
+ public function step4()
+ {
+ // Search for the longest among the suffixes "a e i ie ă " and, if it is in RV, delete it.
+ if ( ($position = $this->search(array('a', 'ie', 'e', 'i', 'ă'))) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Finally
+ * Turn I, U back into i, u
+ */
+ public function finish()
+ {
+ // Turn I, U back into i, u
+ $this->word = Utf8::str_replace(array('I', 'U'), array('i', 'u'), $this->word);
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Russian.php b/libraries/vendor/wamania/php-stemmer/src/Russian.php
new file mode 100644
index 00000000000..61d10afb354
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Russian.php
@@ -0,0 +1,249 @@
+word = Utf8::strtolower($word);
+
+ // R2 is not used: R1 is defined in the same way as in the German stemmer
+ $this->r1();
+ $this->r2();
+ $this->rv();
+
+ // Do each of steps 1, 2 3 and 4.
+ $this->step1();
+ $this->step2();
+ $this->step3();
+ $this->step4();
+
+ return $this->word;
+ }
+
+ /**
+ * Step 1: Search for a PERFECTIVE GERUND ending. If one is found remove it, and that is then the end of step 1.
+ * Otherwise try and remove a REFLEXIVE ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending.
+ * As soon as one of the endings (1) to (3) is found remove it, and terminate step 1.
+ */
+ public function step1()
+ {
+ // Search for a PERFECTIVE GERUND ending.
+ // group 1
+ if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[0])) !== false) {
+ if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+
+ // group 2
+ if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[1])) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+
+ // Otherwise try and remove a REFLEXIVE ending
+ if ( ($position = $this->searchIfInRv(self::$reflexive)) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ }
+
+ // then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending.
+ // As soon as one of the endings (1) to (3) is found remove it, and terminate step 1.
+ if ( ($position = $this->searchIfInRv(self::$adjective)) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position2 = $this->search(self::$participle[0])) !== false) {
+ if ( ($this->inRv($position2)) && ($this->checkGroup1($position2)) ) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ return true;
+ }
+ }
+
+ if ( ($position2 = $this->search(self::$participle[1])) !== false) {
+ if ($this->inRv($position2)) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ return true;
+ }
+ }
+
+ return true;
+ }
+ }
+
+ if ( ($position = $this->searchIfInRv(self::$verb[0])) !== false) {
+ if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+
+ if ( ($position = $this->searchIfInRv(self::$verb[1])) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+
+ if ( ($position = $this->searchIfInRv(self::$noun)) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2: If the word ends with и (i), remove it.
+ */
+ public function step2()
+ {
+ if ( ($position = $this->searchIfInRv(array('и'))) !== false) {
+ if ($this->inRv($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Step 3: Search for a DERIVATIONAL ending in R2 (i.e. the entire ending must lie in R2),
+ * and if one is found, remove it.
+ */
+ public function step3()
+ {
+ if ( ($position = $this->searchIfInRv(self::$derivational)) !== false) {
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+ }
+
+ /**
+ * Step 4: (1) Undouble н (n), or, (2) if the word ends with a SUPERLATIVE ending, remove it
+ * and undouble н (n), or (3) if the word ends ь (') (soft sign) remove it.
+ */
+ public function step4()
+ {
+ // (2) if the word ends with a SUPERLATIVE ending, remove it
+ if ( ($position = $this->searchIfInRv(self::$superlative)) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // (1) Undouble н (n)
+ if ( ($position = $this->searchIfInRv(array('нн'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, ($position+1));
+ return true;
+ }
+
+ // (3) if the word ends ь (') (soft sign) remove it
+ if ( ($position = $this->searchIfInRv(array('ь'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+
+ /**
+ * In any word, RV is the region after the first vowel, or the end of the word if it contains no vowel.
+ */
+ protected function rv()
+ {
+ $length = Utf8::strlen($this->word);
+
+ $this->rv = '';
+ $this->rvIndex = $length;
+
+ for ($i=0; $i<$length; $i++) {
+ $letter = Utf8::substr($this->word, $i, 1);
+ if (in_array($letter, self::$vowels)) {
+ $this->rv = Utf8::substr($this->word, ($i+1));
+ $this->rvIndex = $i + 1;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * group 1 endings must follow а (a) or я (ia)
+ *
+ * @param integer $position
+ * @return boolean
+ */
+ private function checkGroup1($position)
+ {
+ if (! $this->inRv(($position-1))) {
+ return false;
+ }
+
+ $letter = Utf8::substr($this->word, ($position - 1), 1);
+
+ if ($letter == 'а' || $letter == 'я') {
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Spanish.php b/libraries/vendor/wamania/php-stemmer/src/Spanish.php
new file mode 100644
index 00000000000..c3563f85335
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Spanish.php
@@ -0,0 +1,345 @@
+word = Utf8::strtolower($word);
+
+ $this->rv();
+ $this->r1();
+ $this->r2();
+
+ $this->step0();
+
+ $word = $this->word;
+ $this->step1();
+
+ // Do step 2a if no ending was removed by step 1.
+ if ($this->word == $word) {
+ $this->step2a();
+
+ // Do Step 2b if step 2a was done, but failed to remove a suffix.
+ if ($this->word == $word) {
+ $this->step2b();
+ }
+ }
+
+ $this->step3();
+ $this->finish();
+
+ return $this->word;
+ }
+
+ /**
+ * Step 0: Attached pronoun
+ *
+ * Search for the longest among the following suffixes
+ * me se sela selo selas selos la le lo las les los nos
+ *
+ * and delete it, if comes after one of
+ * (a) iéndo ándo ár ér ír
+ * (b) ando iendo ar er ir
+ * (c) yendo following u
+ *
+ * in RV. In the case of (c), yendo must lie in RV, but the preceding u can be outside it.
+ * In the case of (a), deletion is followed by removing the acute accent (for example, haciéndola -> haciendo).
+ */
+ private function step0()
+ {
+ if ( ($position = $this->searchIfInRv(array('selas', 'selos', 'las', 'los', 'les', 'nos', 'selo', 'sela', 'me', 'se', 'la', 'le', 'lo' ))) != false) {
+ $suffixe = Utf8::substr($this->word, $position);
+
+ // a
+ $a = array('iéndo', 'ándo', 'ár', 'ér', 'ír');
+ $a = array_map(function($item) use ($suffixe) {
+ return $item . $suffixe;
+ }, $a);
+
+ if ( ($position2 = $this->searchIfInRv($a)) !== false) {
+ $suffixe2 = Utf8::substr($this->word, $position2);
+ $suffixe2 = Utf8::deaccent($suffixe2, -1);
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ $this->word .= $suffixe2;
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // b
+ $b = array('iendo', 'ando', 'ar', 'er', 'ir');
+ $b = array_map(function($item) use ($suffixe) {
+ return $item . $suffixe;
+ }, $b);
+
+ if ( ($position2 = $this->searchIfInRv($b)) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // c
+ if ( ($position2 = $this->searchIfInRv(array('yendo' . $suffixe))) != false) {
+ $before = Utf8::substr($this->word, ($position2-1), 1);
+ if ( (isset($before)) && ($before == 'u') ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 1
+ */
+ private function step1()
+ {
+ // anza anzas ico ica icos icas ismo ismos able ables ible ibles ista
+ // istas oso osa osos osas amiento amientos imiento imientos
+ // delete if in R2
+ if ( ($position = $this->search(array(
+ 'imientos', 'imiento', 'amientos', 'amiento', 'osas', 'osos', 'osa', 'oso', 'istas', 'ista', 'ibles',
+ 'ible', 'ables', 'able', 'ismos', 'ismo', 'icas', 'icos', 'ica', 'ico', 'anzas', 'anza'))) != false) {
+
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+ return true;
+ }
+
+ // adora ador ación adoras adores aciones ante antes ancia ancias
+ // delete if in R2
+ // if preceded by ic, delete if in R2
+ if ( ($position = $this->search(array(
+ 'adoras', 'adora', 'aciones', 'ación', 'adores', 'ador', 'antes', 'ante', 'ancias', 'ancia'))) != false) {
+
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ if ( ($position2 = $this->searchIfInR2(array('ic')))) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ // logía logías
+ // replace with log if in R2
+ if ( ($position = $this->search(array('logías', 'logía'))) != false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word);
+ }
+ return true;
+ }
+
+ // ución uciones
+ // replace with u if in R2
+ if ( ($position = $this->search(array('uciones', 'ución'))) != false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word);
+ }
+ return true;
+ }
+
+ // encia encias
+ // replace with ente if in R2
+ if ( ($position = $this->search(array('encias', 'encia'))) != false) {
+ if ($this->inR2($position)) {
+ $this->word = preg_replace('#(encias|encia)$#u', 'ente', $this->word);
+ }
+ return true;
+ }
+
+ // amente
+ // delete if in R1
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ // if preceded by os, ic or ad, delete if in R2
+ if ( ($position = $this->search(array('amente'))) != false) {
+
+ // delete if in R1
+ if ($this->inR1($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
+ if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position3);
+ }
+
+ // if preceded by os, ic or ad, delete if in R2
+ } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position4);
+ }
+ return true;
+ }
+
+ // mente
+ // delete if in R2
+ // if preceded by ante, able or ible, delete if in R2
+ if ( ($position = $this->search(array('mente'))) != false) {
+
+ // delete if in R2
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by ante, able or ible, delete if in R2
+ if ( ($position2 = $this->searchIfInR2(array('ante', 'able', 'ible'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ // idad idades
+ // delete if in R2
+ // if preceded by abil, ic or iv, delete if in R2
+ if ( ($position = $this->search(array('idades', 'idad'))) != false) {
+
+ // delete if in R2
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by abil, ic or iv, delete if in R2
+ if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ // iva ivo ivas ivos
+ // delete if in R2
+ // if preceded by at, delete if in R2
+ if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) != false) {
+
+ // delete if in R2
+ if ($this->inR2($position)) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ }
+
+ // if preceded by at, delete if in R2
+ if ( ($position2 = $this->searchIfInR2(array('at'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2a: Verb suffixes beginning y
+ */
+ private function step2a()
+ {
+ // if found, delete if preceded by u
+ // (Note that the preceding u need not be in RV.)
+ if ( ($position = $this->searchIfInRv(array(
+ 'yamos', 'yendo', 'yeron', 'yan', 'yen', 'yais', 'yas', 'yes', 'yo', 'yó', 'ya', 'ye'))) != false) {
+
+ $before = Utf8::substr($this->word, ($position-1), 1);
+ if ( (isset($before)) && ($before == 'u') ) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Step 2b: Other verb suffixes
+ * Search for the longest among the following suffixes in RV, and perform the action indicated.
+ */
+ private function step2b()
+ {
+ // delete
+ if ( ($position = $this->searchIfInRv(array(
+ 'iésemos', 'iéramos', 'ábamos', 'iríamos', 'eríamos', 'aríamos', 'áramos', 'ásemos', 'eríais',
+ 'aremos', 'eremos', 'iremos', 'asteis', 'ieseis', 'ierais', 'isteis', 'aríais',
+ 'irían', 'aréis', 'erían', 'erías', 'eréis', 'iréis', 'irías', 'ieran', 'iesen', 'ieron', 'iendo', 'ieras',
+ 'iríais', 'arían', 'arías',
+ 'amos', 'imos', 'ados', 'idos', 'irán', 'irás', 'erán', 'erás', 'ería', 'iría', 'íais', 'arán', 'arás', 'aría',
+ 'iera', 'iese', 'aste', 'iste', 'aban', 'aran', 'asen', 'aron', 'ando', 'abas', 'adas', 'idas', 'ases', 'aras',
+ 'aré', 'erá', 'eré', 'áis', 'ías', 'irá', 'iré', 'aba', 'ían', 'ada', 'ara', 'ase', 'ida', 'ado', 'ido', 'ará',
+ 'ad', 'ed', 'id', 'ís', 'ió', 'ar', 'er', 'ir', 'as', 'ía', 'an'
+ ))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // en es éis emos
+ // delete, and if preceded by gu delete the u (the gu need not be in RV)
+ if ( ($position = $this->searchIfInRv(array('éis', 'emos', 'en', 'es'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position2 = $this->search(array('gu'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, ($position2+1));
+ }
+
+
+ return true;
+ }
+ }
+
+ /**
+ * Step 3: residual suffix
+ * Search for the longest among the following suffixes in RV, and perform the action indicated.
+ */
+ private function step3()
+ {
+ // os a o á í ó
+ // delete if in RV
+ if ( ($position = $this->searchIfInRv(array('os', 'a', 'o', 'á', 'í', 'ó'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // e é
+ // delete if in RV, and if preceded by gu with the u in RV delete the u
+ if ( ($position = $this->searchIfInRv(array('e', 'é'))) != false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+
+ if ( ($position2 = $this->searchIfInRv(array('u'))) != false) {
+ $before = Utf8::substr($this->word, ($position2-1), 1);
+ if ( (isset($before)) && ($before == 'g') ) {
+ $this->word = Utf8::substr($this->word, 0, $position2);
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * And finally:
+ * Remove acute accents
+ */
+ private function finish()
+ {
+ $this->word = Utf8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Stem.php b/libraries/vendor/wamania/php-stemmer/src/Stem.php
new file mode 100644
index 00000000000..642ece8d0c0
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Stem.php
@@ -0,0 +1,218 @@
+= $this->rvIndex);
+ }
+
+ protected function inR1($position)
+ {
+ return ($position >= $this->r1Index);
+ }
+
+ protected function inR2($position)
+ {
+ return ($position >= $this->r2Index);
+ }
+
+ protected function searchIfInRv($suffixes)
+ {
+ return $this->search($suffixes, $this->rvIndex);
+ }
+
+ protected function searchIfInR1($suffixes)
+ {
+ return $this->search($suffixes, $this->r1Index);
+ }
+
+ protected function searchIfInR2($suffixes)
+ {
+ return $this->search($suffixes, $this->r2Index);
+ }
+
+ protected function search($suffixes, $offset = 0)
+ {
+ $length = Utf8::strlen($this->word);
+ if ($offset > $length) {
+ return false;
+ }
+ foreach ($suffixes as $suffixe) {
+ if ( (($position = Utf8::strrpos($this->word, $suffixe, $offset)) !== false) && ((Utf8::strlen($suffixe)+$position) == $length) ) {
+ return $position;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel.
+ */
+ protected function r1()
+ {
+ list($this->r1Index, $this->r1) = $this->rx($this->word);
+ }
+
+ /**
+ * R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel.
+ */
+ protected function r2()
+ {
+ list($index, $value) = $this->rx($this->r1);
+
+ $this->r2 = $value;
+ $this->r2Index = $this->r1Index + $index;
+ }
+
+ /**
+ * Common function for R1 and R2
+ * Search the region after the first non-vowel following a vowel in $word, or the end of the word if there is no such non-vowel.
+ * R1 : $in = $this->word
+ * R2 : $in = R1
+ */
+ protected function rx($in)
+ {
+ $length = Utf8::strlen($in);
+
+ // defaults
+ $value = '';
+ $index = $length;
+
+ // we search all vowels
+ $vowels = array();
+ for ($i=0; $i<$length; $i++) {
+ $letter = Utf8::substr($in, $i, 1);
+ if (in_array($letter, static::$vowels)) {
+ $vowels[] = $i;
+ }
+ }
+
+ // search the non-vowel following a vowel
+ foreach ($vowels as $position) {
+ $after = $position + 1;
+ $letter = Utf8::substr($in, $after, 1);
+
+ if (! in_array($letter, static::$vowels)) {
+ $index = $after + 1;
+ $value = Utf8::substr($in, ($after+1));
+
+ break;
+ }
+ }
+
+ return array($index, $value);
+ }
+
+ /**
+ * Used by spanish, italian, portuguese, etc (but not by french)
+ *
+ * If the second letter is a consonant, RV is the region after the next following vowel,
+ * or if the first two letters are vowels, RV is the region after the next consonant,
+ * and otherwise (consonant-vowel case) RV is the region after the third letter.
+ * But RV is the end of the word if these positions cannot be found.
+ */
+ protected function rv()
+ {
+ $length = Utf8::strlen($this->word);
+
+ $this->rv = '';
+ $this->rvIndex = $length;
+
+ if ($length < 3) {
+ return true;
+ }
+
+ $first = Utf8::substr($this->word, 0, 1);
+ $second = Utf8::substr($this->word, 1, 1);
+
+ // If the second letter is a consonant, RV is the region after the next following vowel,
+ if (!in_array($second, static::$vowels)) {
+ for ($i=2; $i<$length; $i++) {
+ $letter = Utf8::substr($this->word, $i, 1);
+ if (in_array($letter, static::$vowels)) {
+ $this->rvIndex = $i + 1;
+ $this->rv = Utf8::substr($this->word, ($i+1));
+ return true;
+ }
+ }
+ }
+
+ // or if the first two letters are vowels, RV is the region after the next consonant,
+ if ( (in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
+ for ($i=2; $i<$length; $i++) {
+ $letter = Utf8::substr($this->word, $i, 1);
+ if (! in_array($letter, static::$vowels)) {
+ $this->rvIndex = $i + 1;
+ $this->rv = Utf8::substr($this->word, ($i+1));
+ return true;
+ }
+ }
+ }
+
+ // and otherwise (consonant-vowel case) RV is the region after the third letter.
+ if ( (! in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
+ $this->rv = Utf8::substr($this->word, 3);
+ $this->rvIndex = 3;
+ return true;
+ }
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Stemmer.php b/libraries/vendor/wamania/php-stemmer/src/Stemmer.php
new file mode 100644
index 00000000000..c013fa858e9
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Stemmer.php
@@ -0,0 +1,19 @@
+
+ */
+interface Stemmer
+{
+ /**
+ * Main function to get the STEM of a word
+ *
+ * @param string $word A valid UTF-8 word
+ *
+ * @return string
+ *
+ * @throws \Exception
+ */
+ public function stem($word);
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Swedish.php b/libraries/vendor/wamania/php-stemmer/src/Swedish.php
new file mode 100644
index 00000000000..41496976b1b
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Swedish.php
@@ -0,0 +1,127 @@
+word = Utf8::strtolower($word);
+
+ // R2 is not used: R1 is defined in the same way as in the German stemmer
+ $this->r1();
+
+ // then R1 is adjusted so that the region before it contains at least 3 letters.
+ if ($this->r1Index < 3) {
+ $this->r1Index = 3;
+ $this->r1 = Utf8::substr($this->word, 3);
+ }
+
+ // Do each of steps 1, 2 3 and 4.
+ $this->step1();
+ $this->step2();
+ $this->step3();
+
+ return $this->word;
+ }
+
+ /**
+ * Define a valid s-ending as one of
+ * b c d f g h j k l m n o p r t v y
+ *
+ * @param string $ending
+ * @return boolean
+ */
+ private function hasValidSEnding($word)
+ {
+ $lastLetter = Utf8::substr($word, -1, 1);
+ return in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y'));
+ }
+
+ /**
+ * Step 1
+ * Search for the longest among the following suffixes in R1, and perform the action indicated.
+ */
+ private function step1()
+ {
+ // a arna erna heterna orna ad e ade ande arne are aste en anden aren heten
+ // ern ar er heter or as arnas ernas ornas es ades andes ens arens hetens
+ // erns at andet het ast
+ // delete
+ if ( ($position = $this->searchIfInR1(array(
+ 'heterna', 'hetens', 'ornas', 'andes', 'arnas', 'heter', 'ernas', 'anden', 'heten', 'andet', 'arens',
+ 'orna', 'arna', 'erna', 'aren', 'ande', 'ades', 'arne', 'erns', 'aste', 'ade', 'ern', 'het',
+ 'ast', 'are', 'ens', 'or', 'es', 'ad', 'en', 'at', 'ar', 'as', 'er', 'a', 'e'
+ ))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // s
+ // delete if preceded by a valid s-ending
+ if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
+ $word = Utf8::substr($this->word, 0, $position);
+ if ($this->hasValidSEnding($word)) {
+ $this->word = $word;
+ }
+ }
+ }
+
+ /**
+ * Step 2
+ * Search for one of the following suffixes in R1, and if found delete the last letter.
+ */
+ private function step2()
+ {
+ // dd gd nn dt gt kt tt
+ if ($this->searchIfInR1(array('dd', 'gd', 'nn', 'dt', 'gt', 'kt', 'tt')) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ }
+ }
+
+ /**
+ * Step 3:
+ * Search for the longest among the following suffixes in R1, and perform the action indicated.
+ */
+ private function step3()
+ {
+ // lig ig els
+ // delete
+ if ( ($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, $position);
+ return true;
+ }
+
+ // löst
+ // replace with lös
+ if ( ($this->searchIfInR1(array('löst'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ return true;
+ }
+
+ // fullt
+ // replace with full
+ if ( ($this->searchIfInR1(array('fullt'))) !== false) {
+ $this->word = Utf8::substr($this->word, 0, -1);
+ return true;
+ }
+ }
+}
diff --git a/libraries/vendor/wamania/php-stemmer/src/Utf8.php b/libraries/vendor/wamania/php-stemmer/src/Utf8.php
new file mode 100644
index 00000000000..e316c0d970d
--- /dev/null
+++ b/libraries/vendor/wamania/php-stemmer/src/Utf8.php
@@ -0,0 +1,708 @@
+
+ * @package Stato
+ * @subpackage view
+ */
+
+class Utf8
+{
+ /**
+ * UTF-8 lookup table for lower case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are lower case letters only.
+ *
+ * @author Andreas Gohr
+ * @see utf8_deaccent()
+ */
+ private static $utf8_lower_accents = array(
+ 'à' => 'a', 'ô' => 'o', 'd' => 'd', '?' => 'f', 'ë' => 'e', 'š' => 's', 'o' => 'o',
+ 'ß' => 'ss', 'a' => 'a', 'r' => 'r', '?' => 't', 'n' => 'n', 'a' => 'a', 'k' => 'k',
+ 's' => 's', '?' => 'y', 'n' => 'n', 'l' => 'l', 'h' => 'h', '?' => 'p', 'ó' => 'o',
+ 'ú' => 'u', 'e' => 'e', 'é' => 'e', 'ç' => 'c', '?' => 'w', 'c' => 'c', 'õ' => 'o',
+ '?' => 's', 'ø' => 'o', 'g' => 'g', 't' => 't', '?' => 's', 'e' => 'e', 'c' => 'c',
+ 's' => 's', 'î' => 'i', 'u' => 'u', 'c' => 'c', 'e' => 'e', 'w' => 'w', '?' => 't',
+ 'u' => 'u', 'c' => 'c', 'ö' => 'oe', 'è' => 'e', 'y' => 'y', 'a' => 'a', 'l' => 'l',
+ 'u' => 'u', 'u' => 'u', 's' => 's', 'g' => 'g', 'l' => 'l', 'ƒ' => 'f', 'ž' => 'z',
+ '?' => 'w', '?' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', '?' => 'd', 't' => 't',
+ 'r' => 'r', 'ä' => 'ae', 'í' => 'i', 'r' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o',
+ 'e' => 'e', 'ñ' => 'n', 'n' => 'n', 'h' => 'h', 'g' => 'g', 'd' => 'd', 'j' => 'j',
+ 'ÿ' => 'y', 'u' => 'u', 'u' => 'u', 'u' => 'u', 't' => 't', 'ý' => 'y', 'o' => 'o',
+ 'â' => 'a', 'l' => 'l', '?' => 'w', 'z' => 'z', 'i' => 'i', 'ã' => 'a', 'g' => 'g',
+ '?' => 'm', 'o' => 'o', 'i' => 'i', 'ù' => 'u', 'i' => 'i', 'z' => 'z', 'á' => 'a',
+ 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u',
+ );
+
+ /**
+ * UTF-8 Case lookup table
+ *
+ * This lookuptable defines the upper case letters to their correspponding
+ * lower case letter in UTF-8
+ *
+ * @author Andreas Gohr
+ */
+ private static $utf8_lower_to_upper = array(
+ 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042,
+ 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100,
+ 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393,
+ 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C,
+ 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F,
+ 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E,
+ 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3,
+ 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A,
+ 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9,
+ 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C,
+ 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4,
+ 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164,
+ 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156,
+ 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118,
+ 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128,
+ 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428,
+ 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055,
+ 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A,
+ 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC,
+ 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0,
+ 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D,
+ 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0,
+ 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5,
+ 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA,
+ 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045,
+ 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F,
+ 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048,
+ 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6,
+ 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407,
+ 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395,
+ 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396,
+ 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051,
+ 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408,
+ 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F,
+ 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126,
+ 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C,
+ 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E,
+ 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB,
+ 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421,
+ 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A,
+ 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102,
+ 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9,
+ 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
+ );
+
+ /**
+ * UTF-8 Case lookup table
+ *
+ * This lookuptable defines the lower case letters to their correspponding
+ * upper case letter in UTF-8 (it does so by flipping $utf8_lower_to_upper)
+ *
+ * @author Andreas Gohr
+ */
+ //private static $utf8_upper_to_lower = array_flip(self::$utf8_lower_to_upper);
+
+
+ /**
+ * UTF-8 lookup table for upper case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are upper case letters only.
+ *
+ * @author Andreas Gohr
+ * @see utf8_deaccent()
+ */
+ private static $utf8_upper_accents = array(
+ 'À' => 'A', 'Ô' => 'O', 'D' => 'D', '?' => 'F', 'Ë' => 'E', 'Š' => 'S', 'O' => 'O',
+ 'A' => 'A', 'R' => 'R', '?' => 'T', 'N' => 'N', 'A' => 'A', 'K' => 'K',
+ 'S' => 'S', '?' => 'Y', 'N' => 'N', 'L' => 'L', 'H' => 'H', '?' => 'P', 'Ó' => 'O',
+ 'Ú' => 'U', 'E' => 'E', 'É' => 'E', 'Ç' => 'C', '?' => 'W', 'C' => 'C', 'Õ' => 'O',
+ '?' => 'S', 'Ø' => 'O', 'G' => 'G', 'T' => 'T', '?' => 'S', 'E' => 'E', 'C' => 'C',
+ 'S' => 'S', 'Î' => 'I', 'U' => 'U', 'C' => 'C', 'E' => 'E', 'W' => 'W', '?' => 'T',
+ 'U' => 'U', 'C' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Y' => 'Y', 'A' => 'A', 'L' => 'L',
+ 'U' => 'U', 'U' => 'U', 'S' => 'S', 'G' => 'G', 'L' => 'L', 'ƒ' => 'F', 'Ž' => 'Z',
+ '?' => 'W', '?' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', '?' => 'D', 'T' => 'T',
+ 'R' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'R' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O',
+ 'E' => 'E', 'Ñ' => 'N', 'N' => 'N', 'H' => 'H', 'G' => 'G', 'Ð' => 'D', 'J' => 'J',
+ 'Ÿ' => 'Y', 'U' => 'U', 'U' => 'U', 'U' => 'U', 'T' => 'T', 'Ý' => 'Y', 'O' => 'O',
+ 'Â' => 'A', 'L' => 'L', '?' => 'W', 'Z' => 'Z', 'I' => 'I', 'Ã' => 'A', 'G' => 'G',
+ '?' => 'M', 'O' => 'O', 'I' => 'I', 'Ù' => 'U', 'I' => 'I', 'Z' => 'Z', 'Á' => 'A',
+ 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae',
+ );
+
+ /**
+ * UTF-8 array of common special characters
+ *
+ * This array should contain all special characters (not a letter or digit)
+ * defined in the various local charsets - it's not a complete list of non-alphanum
+ * characters in UTF-8. It's not perfect but should match most cases of special
+ * chars.
+ *
+ * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
+ * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
+ *
+ * @author Andreas Gohr
+ * @see utf8_stripspecials()
+ */
+ private static $utf8_special_chars = array(
+ 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
+ 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c,
+ 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b,
+ 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e,
+ 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088,
+ 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092,
+ 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c,
+ 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6,
+ 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0,
+ 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba,
+ 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9,
+ 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384,
+ 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1,
+ 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc,
+ 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c,
+ 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
+ 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015,
+ 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022,
+ 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab,
+ 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193,
+ 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202,
+ 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212,
+ 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229,
+ 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265,
+ 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310,
+ 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514,
+ 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553,
+ 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d,
+ 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567,
+ 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
+ 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7,
+ 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702,
+ 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f,
+ 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719,
+ 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723,
+ 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e,
+ 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738,
+ 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742,
+ 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d,
+ 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c,
+ 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f,
+ 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e,
+ 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8,
+ 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3,
+ 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd,
+ 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
+ 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6,
+ 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0,
+ 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa,
+ 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d,
+ );
+
+ /**
+ * URL-Encode a filename to allow unicodecharacters
+ *
+ * Slashes are not encoded
+ *
+ * When the second parameter is true the string will
+ * be encoded only if non ASCII characters are detected -
+ * This makes it safe to run it multiple times on the
+ * same string (default is true)
+ *
+ * @author Andreas Gohr
+ * @see urlencode
+ */
+ public static function encode_fn($file,$safe=true)
+ {
+ if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){
+ return $file;
+ }
+ $file = urlencode($file);
+ $file = str_replace('%2F','/',$file);
+ return $file;
+ }
+
+ /**
+ * URL-Decode a filename
+ *
+ * This is just a wrapper around urldecode
+ *
+ * @author Andreas Gohr
+ * @see urldecode
+ */
+ public static function decode_fn($file)
+ {
+ $file = urldecode($file);
+ return $file;
+ }
+
+ /**
+ * Checks if a string contains 7bit ASCII only
+ *
+ * @author Andreas Gohr
+ */
+ public static function is_ascii($str)
+ {
+ for($i=0; $i127) return false;
+ }
+ return true;
+ }
+
+ /**
+ * Strips all highbyte chars
+ *
+ * Returns a pure ASCII7 string
+ *
+ * @author Andreas Gohr
+ */
+ public static function strip($str)
+ {
+ $ascii = '';
+ for($i=0; $i
+ * @link http://www.php.net/manual/en/function.utf8-encode.php
+ */
+ public static function check($str)
+ {
+ for ($i=0; $i
+ * @see strlen()
+ * @see utf8_decode()
+ */
+ public static function strlen($string)
+ {
+ return strlen(utf8_decode($string));
+ }
+
+ /**
+ * Unicode aware replacement for substr()
+ *
+ * @author lmak at NOSPAM dot iti dot gr
+ * @link http://www.php.net/manual/en/function.substr.php
+ * @see substr()
+ */
+ public static function substr($str,$start,$length=null)
+ {
+ $ar = array();
+ preg_match_all("/./u", $str, $ar);
+
+ if($length != null) {
+ return join("",array_slice($ar[0],$start,$length));
+ } else {
+ return join("",array_slice($ar[0],$start));
+ }
+ }
+
+ /**
+ * Unicode aware replacement for substr_replace()
+ *
+ * @author Andreas Gohr
+ * @see substr_replace()
+ */
+ public static function substr_replace($string, $replacement, $start , $length=null )
+ {
+ $ret = '';
+ if($start>0) $ret .= self::substr($string, 0, $start);
+ $ret .= $replacement;
+ if($length!=null) $ret .= self::substr($string, $start+$length);
+ return $ret;
+ }
+
+ /**
+ * Unicode aware replacement for explode
+ *
+ * @TODO support third limit arg
+ * @author Harry Fuecks
+ * @see explode();
+ */
+ public static function explode($sep, $str)
+ {
+ if ( $sep == '' ) {
+ trigger_error('Empty delimiter',E_USER_WARNING);
+ return FALSE;
+ }
+
+ return preg_split('!'.preg_quote($sep,'!').'!u',$str);
+ }
+
+ /**
+ * Unicode aware replacement for strrepalce()
+ *
+ * @todo support PHP5 count (fourth arg)
+ * @author Harry Fuecks
+ * @see strreplace();
+ */
+ public static function str_replace($s,$r,$str)
+ {
+ if(!is_array($s)){
+ $s = '!'.preg_quote($s,'!').'!u';
+ }else{
+ foreach ($s as $k => $v) {
+ $s[$k] = '!'.preg_quote($v).'!u';
+ }
+ }
+ return preg_replace($s,$r,$str);
+ }
+
+ /**
+ * Unicode aware replacement for ltrim()
+ *
+ * @author Andreas Gohr
+ * @see ltrim()
+ * @return string
+ */
+ public static function ltrim($str,$charlist='')
+ {
+ if($charlist == '') return ltrim($str);
+
+ //quote charlist for use in a characterclass
+ $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
+
+ return preg_replace('/^['.$charlist.']+/u','',$str);
+ }
+
+ /**
+ * Unicode aware replacement for rtrim()
+ *
+ * @author Andreas Gohr
+ * @see rtrim()
+ * @return string
+ */
+ public static function rtrim($str,$charlist='')
+ {
+ if($charlist == '') return rtrim($str);
+
+ //quote charlist for use in a characterclass
+ $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\${1}',$charlist);
+
+ return preg_replace('/['.$charlist.']+$/u','',$str);
+ }
+
+ /**
+ * Unicode aware replacement for trim()
+ *
+ * @author Andreas Gohr
+ * @see trim()
+ * @return string
+ */
+ public static function trim($str,$charlist='')
+ {
+ if($charlist == '') return trim($str);
+
+ return self::ltrim(self::rtrim($str));
+ }
+
+
+ /**
+ * This is a unicode aware replacement for strtolower()
+ *
+ * Uses mb_string extension if available
+ *
+ * @author Andreas Gohr
+ * @see strtolower()
+ * @see utf8_strtoupper()
+ */
+ public static function strtolower($string)
+ {
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower'))
+ return mb_strtolower($string,'utf-8');
+
+ //global $utf8_upper_to_lower;
+ $utf8_upper_to_lower = array_flip(self::$utf8_lower_to_upper);
+ $uni = self::utf8_to_unicode($string);
+ $cnt = count($uni);
+ for ($i=0; $i < $cnt; $i++){
+ if($utf8_upper_to_lower[$uni[$i]]){
+ $uni[$i] = $utf8_upper_to_lower[$uni[$i]];
+ }
+ }
+ return self::unicode_to_utf8($uni);
+ }
+
+ /**
+ * This is a unicode aware replacement for strtoupper()
+ *
+ * Uses mb_string extension if available
+ *
+ * @author Andreas Gohr
+ * @see strtoupper()
+ * @see utf8_strtoupper()
+ */
+ public static function strtoupper($string)
+ {
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower'))
+ return mb_strtoupper($string,'utf-8');
+
+ //global $utf8_lower_to_upper;
+ $uni = self::utf8_to_unicode($string);
+ $cnt = count($uni);
+ for ($i=0; $i < $cnt; $i++){
+ if(self::$utf8_lower_to_upper[$uni[$i]]){
+ $uni[$i] = self::$utf8_lower_to_upper[$uni[$i]];
+ }
+ }
+ return self::unicode_to_utf8($uni);
+ }
+
+ /**
+ * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
+ *
+ * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
+ * letters. Default is to deaccent both cases ($case = 0)
+ *
+ * @author Andreas Gohr
+ */
+ public static function deaccent($string,$case=0)
+ {
+ if($case <= 0){
+ //global $utf8_lower_accents;
+ $string = str_replace(array_keys(self::$utf8_lower_accents),array_values(self::$utf8_lower_accents),$string);
+ }
+ if($case >= 0){
+ //global $utf8_upper_accents;
+ $string = str_replace(array_keys(self::$utf8_upper_accents),array_values(self::$utf8_upper_accents),$string);
+ }
+ return $string;
+ }
+
+ /**
+ * Removes special characters (nonalphanumeric) from a UTF-8 string
+ *
+ * This function adds the controlchars 0x00 to 0x19 to the array of
+ * stripped chars (they are not included in $utf8_special_chars)
+ *
+ * @author Andreas Gohr
+ * @param string $string The UTF8 string to strip of special chars
+ * @param string $repl Replace special with this string
+ * @param string $additional Additional chars to strip (used in regexp char class)
+ */
+ public static function stripspecials($string,$repl='',$additional='')
+ {
+ //global $utf8_special_chars;
+
+ static $specials = null;
+ if(is_null($specials)){
+ $specials = preg_quote(self::unicode_to_utf8(self::$utf8_special_chars), '/');
+ }
+
+ return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string);
+ }
+
+ /**
+ * This is an Unicode aware replacement for strpos
+ *
+ * Uses mb_string extension if available
+ *
+ * @author Harry Fuecks
+ * @see strpos()
+ */
+ public static function strpos($haystack, $needle, $offset=0)
+ {
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos'))
+ return mb_strpos($haystack,$needle,$offset,'utf-8');
+
+ if(!$offset){
+ $ar = self::explode($needle, $haystack);
+ if ( count($ar) > 1 ) {
+ return self::strlen($ar[0]);
+ }
+ return false;
+ } else {
+ if ( !is_int($offset) ) {
+ trigger_error('Offset must be an integer',E_USER_WARNING);
+ return false;
+ }
+
+ $str = self::substr($haystack, $offset);
+
+ if ( false !== ($pos = self::strpos($str, $needle))){
+ return $pos + $offset;
+ }
+ return false;
+ }
+ }
+
+ /**
+ * This is an Unicode aware replacement for strrpos
+ *
+ * Uses mb_string extension if available
+ *
+ * @author Harry Fuecks
+ * @see strpos()
+ */
+ public static function strrpos($haystack, $needle, $offset=0)
+ {
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strrpos'))
+ return mb_strrpos($haystack, $needle, $offset, 'utf-8');
+
+ if (!$offset) {
+ $ar = self::explode($needle, $haystack);
+ $count = count($ar);
+ if ( $count > 1 ) {
+ return self::strlen($haystack) - self::strlen($ar[($count-1)]) - self::strlen($needle);
+ }
+ return false;
+ } else {
+ if ( !is_int($offset) ) {
+ trigger_error('Offset must be an integer', E_USER_WARNING);
+ return false;
+ }
+
+ $str = self::substr($haystack, $offset);
+
+ if ( false !== ($pos = self::strrpos($str, $needle))){
+ return $pos + $offset;
+ }
+ return false;
+ }
+ }
+
+ /**
+ * Encodes UTF-8 characters to HTML entities
+ *
+ * @author
+ * @link http://www.php.net/manual/en/function.utf8-decode.php
+ */
+ public static function tohtml ($str)
+ {
+ $ret = '';
+ $max = strlen($str);
+ $last = 0; // keeps the index of the last regular character
+ for ($i=0; $i<$max; $i++) {
+ $c = $str{$i};
+ $c1 = ord($c);
+ if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode
+ $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed
+ $c1 &= 31; // remove the 3 bit two bytes prefix
+ $c2 = ord($str{++$i}); // the next byte
+ $c2 &= 63; // remove the 2 bit trailing byte prefix
+ $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2
+ $c1 >>= 2; // c1 shifts 2 to the right
+ $ret .= '' . ($c1 * 100 + $c2) . ';'; // this is the fastest string concatenation
+ $last = $i+1;
+ }
+ }
+ return $ret . substr($str, $last, $i); // append the last batch of regular characters
+ }
+
+ /**
+ * This function returns any UTF-8 encoded text as a list of
+ * Unicode values:
+ *
+ * @author Scott Michael Reynen
+ * @link http://www.randomchaos.com/document.php?source=php_and_unicode
+ * @see unicode_to_utf8()
+ */
+ public static function utf8_to_unicode( &$str )
+ {
+ $unicode = array();
+ $values = array();
+ $looking_for = 1;
+
+ for ($i = 0; $i < strlen( $str ); $i++ ) {
+ $this_value = ord( $str[ $i ] );
+ if ( $this_value < 128 ) $unicode[] = $this_value;
+ else {
+ if ( count( $values ) == 0 ) $looking_for = ( $this_value < 224 ) ? 2 : 3;
+ $values[] = $this_value;
+ if ( count( $values ) == $looking_for ) {
+ $number = ( $looking_for == 3 ) ?
+ ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
+ ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
+ $unicode[] = $number;
+ $values = array();
+ $looking_for = 1;
+ }
+ }
+ }
+ return $unicode;
+ }
+
+ /**
+ * This function converts a Unicode array back to its UTF-8 representation
+ *
+ * @author Scott Michael Reynen
+ * @link http://www.randomchaos.com/document.php?source=php_and_unicode
+ * @see utf8_to_unicode()
+ */
+ public static function unicode_to_utf8( &$str )
+ {
+ if (!is_array($str)) return '';
+
+ $utf8 = '';
+ foreach( $str as $unicode ) {
+ if ( $unicode < 128 ) {
+ $utf8.= chr( $unicode );
+ } elseif ( $unicode < 2048 ) {
+ $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
+ $utf8.= chr( 128 + ( $unicode % 64 ) );
+ } else {
+ $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
+ $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
+ $utf8.= chr( 128 + ( $unicode % 64 ) );
+ }
+ }
+ return $utf8;
+ }
+
+ /**
+ * UTF-8 to UTF-16BE conversion.
+ *
+ * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+ */
+ public static function utf8_to_utf16be(&$str, $bom = false)
+ {
+ $out = $bom ? "\xFE\xFF" : '';
+ if(!defined('UTF8_NOMBSTRING') && function_exists('mb_convert_encoding'))
+ return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
+
+ $uni = self::utf8_to_unicode($str);
+ foreach($uni as $cp){
+ $out .= pack('n',$cp);
+ }
+ return $out;
+ }
+
+ /**
+ * UTF-8 to UTF-16BE conversion.
+ *
+ * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+ */
+ public static function utf16be_to_utf8(&$str)
+ {
+ $uni = unpack('n*',$str);
+ return self::unicode_to_utf8($uni);
+ }
+}