2016-06-14 09:54:04 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-06-14 09:54:04 +00:00
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests\FeatureExtraction;
|
2016-06-14 09:54:04 +00:00
|
|
|
|
2017-11-28 07:00:13 +00:00
|
|
|
use Phpml\Exception\InvalidArgumentException;
|
2016-06-14 09:54:04 +00:00
|
|
|
use Phpml\FeatureExtraction\StopWords;
|
2017-02-03 11:58:25 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-06-14 09:54:04 +00:00
|
|
|
|
2017-02-03 11:58:25 +00:00
|
|
|
class StopWordsTest extends TestCase
|
2016-06-14 09:54:04 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testCustomStopWords(): void
|
2016-06-14 09:54:04 +00:00
|
|
|
{
|
|
|
|
$stopWords = new StopWords(['lorem', 'ipsum', 'dolor']);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertTrue($stopWords->isStopWord('lorem'));
|
|
|
|
self::assertTrue($stopWords->isStopWord('ipsum'));
|
|
|
|
self::assertTrue($stopWords->isStopWord('dolor'));
|
2016-06-14 09:54:04 +00:00
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertFalse($stopWords->isStopWord('consectetur'));
|
|
|
|
self::assertFalse($stopWords->isStopWord('adipiscing'));
|
|
|
|
self::assertFalse($stopWords->isStopWord('amet'));
|
2016-06-14 09:54:04 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testThrowExceptionOnInvalidLanguage(): void
|
2016-06-14 09:54:04 +00:00
|
|
|
{
|
2017-11-28 07:00:13 +00:00
|
|
|
$this->expectException(InvalidArgumentException::class);
|
2016-06-14 09:54:04 +00:00
|
|
|
StopWords::factory('Lorem');
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testEnglishStopWords(): void
|
2016-06-14 09:54:04 +00:00
|
|
|
{
|
|
|
|
$stopWords = StopWords::factory('English');
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertTrue($stopWords->isStopWord('again'));
|
|
|
|
self::assertFalse($stopWords->isStopWord('strategy'));
|
2016-06-14 09:54:04 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testPolishStopWords(): void
|
2016-06-14 09:54:04 +00:00
|
|
|
{
|
|
|
|
$stopWords = StopWords::factory('Polish');
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertTrue($stopWords->isStopWord('wam'));
|
|
|
|
self::assertFalse($stopWords->isStopWord('transhumanizm'));
|
2016-06-14 09:54:04 +00:00
|
|
|
}
|
2017-05-22 21:18:27 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testFrenchStopWords(): void
|
2017-05-22 21:18:27 +00:00
|
|
|
{
|
|
|
|
$stopWords = StopWords::factory('French');
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertTrue($stopWords->isStopWord('alors'));
|
|
|
|
self::assertFalse($stopWords->isStopWord('carte'));
|
2017-05-22 21:18:27 +00:00
|
|
|
}
|
2016-06-14 09:54:04 +00:00
|
|
|
}
|