2016-06-14 09:54:04 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-06-14 09:54:04 +00:00
|
|
|
|
|
|
|
namespace Phpml\FeatureExtraction;
|
|
|
|
|
|
|
|
use Phpml\Exception\InvalidArgumentException;
|
|
|
|
|
|
|
|
class StopWords
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
2017-11-22 21:16:10 +00:00
|
|
|
protected $stopWords = [];
|
2016-06-14 09:54:04 +00:00
|
|
|
|
|
|
|
public function __construct(array $stopWords)
|
|
|
|
{
|
|
|
|
$this->stopWords = array_fill_keys($stopWords, true);
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function isStopWord(string $token): bool
|
2016-06-14 09:54:04 +00:00
|
|
|
{
|
|
|
|
return isset($this->stopWords[$token]);
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public static function factory(string $language = 'English'): self
|
2016-06-14 09:54:04 +00:00
|
|
|
{
|
2018-01-06 20:25:47 +00:00
|
|
|
$className = __NAMESPACE__."\\StopWords\\${language}";
|
2016-06-14 09:54:04 +00:00
|
|
|
|
|
|
|
if (!class_exists($className)) {
|
2018-03-03 15:03:53 +00:00
|
|
|
throw new InvalidArgumentException(sprintf('Can\'t find "%s" language for StopWords', $language));
|
2016-06-14 09:54:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return new $className();
|
|
|
|
}
|
|
|
|
}
|