2016-06-16 07:58:17 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-06-16 07:58:17 +00:00
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests;
|
2016-06-16 07:58:17 +00:00
|
|
|
|
|
|
|
use Phpml\Classification\SVC;
|
|
|
|
use Phpml\FeatureExtraction\TfIdfTransformer;
|
2017-05-24 07:06:54 +00:00
|
|
|
use Phpml\FeatureExtraction\TokenCountVectorizer;
|
2018-02-14 18:05:48 +00:00
|
|
|
use Phpml\FeatureSelection\SelectKBest;
|
2018-01-12 09:54:20 +00:00
|
|
|
use Phpml\ModelManager;
|
2016-06-16 07:58:17 +00:00
|
|
|
use Phpml\Pipeline;
|
2016-06-16 08:26:29 +00:00
|
|
|
use Phpml\Preprocessing\Imputer;
|
|
|
|
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
2017-11-06 07:56:37 +00:00
|
|
|
use Phpml\Preprocessing\Normalizer;
|
2016-07-24 11:52:52 +00:00
|
|
|
use Phpml\Regression\SVR;
|
2017-05-24 07:06:54 +00:00
|
|
|
use Phpml\Tokenization\WordTokenizer;
|
2017-02-03 11:58:25 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-06-16 07:58:17 +00:00
|
|
|
|
2017-02-03 11:58:25 +00:00
|
|
|
class PipelineTest extends TestCase
|
2016-06-16 07:58:17 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testPipelineConstruction(): void
|
2016-06-16 07:58:17 +00:00
|
|
|
{
|
|
|
|
$transformers = [
|
2016-06-16 08:01:40 +00:00
|
|
|
new TfIdfTransformer(),
|
2016-06-16 07:58:17 +00:00
|
|
|
];
|
|
|
|
$estimator = new SVC();
|
|
|
|
|
|
|
|
$pipeline = new Pipeline($transformers, $estimator);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals($transformers, $pipeline->getTransformers());
|
|
|
|
self::assertEquals($estimator, $pipeline->getEstimator());
|
2016-06-16 07:58:17 +00:00
|
|
|
}
|
2016-06-16 08:26:29 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testPipelineEstimatorSetter(): void
|
2016-07-24 11:52:52 +00:00
|
|
|
{
|
|
|
|
$pipeline = new Pipeline([new TfIdfTransformer()], new SVC());
|
|
|
|
|
|
|
|
$estimator = new SVR();
|
|
|
|
$pipeline->setEstimator($estimator);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals($estimator, $pipeline->getEstimator());
|
2016-07-24 11:52:52 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testPipelineWorkflow(): void
|
2016-06-16 08:26:29 +00:00
|
|
|
{
|
|
|
|
$transformers = [
|
|
|
|
new Imputer(null, new MostFrequentStrategy()),
|
|
|
|
new Normalizer(),
|
|
|
|
];
|
|
|
|
$estimator = new SVC();
|
|
|
|
|
|
|
|
$samples = [
|
|
|
|
[1, -1, 2],
|
|
|
|
[2, 0, null],
|
|
|
|
[null, 1, -1],
|
|
|
|
];
|
|
|
|
|
|
|
|
$targets = [
|
|
|
|
4,
|
|
|
|
1,
|
2016-06-16 14:10:46 +00:00
|
|
|
4,
|
2016-06-16 08:26:29 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
$pipeline = new Pipeline($transformers, $estimator);
|
|
|
|
$pipeline->train($samples, $targets);
|
|
|
|
|
|
|
|
$predicted = $pipeline->predict([[0, 0, 0]]);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals(4, $predicted[0]);
|
2016-06-16 08:26:29 +00:00
|
|
|
}
|
2017-05-24 07:06:54 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testPipelineTransformers(): void
|
2017-05-24 07:06:54 +00:00
|
|
|
{
|
|
|
|
$transformers = [
|
|
|
|
new TokenCountVectorizer(new WordTokenizer()),
|
2017-11-22 21:16:10 +00:00
|
|
|
new TfIdfTransformer(),
|
2017-05-24 07:06:54 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
$estimator = new SVC();
|
|
|
|
|
|
|
|
$samples = [
|
|
|
|
'Hello Paul',
|
|
|
|
'Hello Martin',
|
|
|
|
'Goodbye Tom',
|
|
|
|
'Hello John',
|
|
|
|
'Goodbye Alex',
|
|
|
|
'Bye Tony',
|
|
|
|
];
|
|
|
|
|
|
|
|
$targets = [
|
|
|
|
'greetings',
|
|
|
|
'greetings',
|
|
|
|
'farewell',
|
|
|
|
'greetings',
|
|
|
|
'farewell',
|
|
|
|
'farewell',
|
|
|
|
];
|
|
|
|
|
|
|
|
$pipeline = new Pipeline($transformers, $estimator);
|
|
|
|
$pipeline->train($samples, $targets);
|
|
|
|
|
|
|
|
$expected = ['greetings', 'farewell'];
|
|
|
|
|
|
|
|
$predicted = $pipeline->predict(['Hello Max', 'Goodbye Mark']);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals($expected, $predicted);
|
2017-05-24 07:06:54 +00:00
|
|
|
}
|
2018-01-12 09:54:20 +00:00
|
|
|
|
2018-02-14 18:51:07 +00:00
|
|
|
public function testPipelineTransformersWithTargets(): void
|
2018-02-14 18:05:48 +00:00
|
|
|
{
|
|
|
|
$samples = [[1, 2, 1], [1, 3, 4], [5, 2, 1], [1, 3, 3], [1, 3, 4], [0, 3, 5]];
|
|
|
|
$targets = ['a', 'a', 'a', 'b', 'b', 'b'];
|
|
|
|
|
|
|
|
$pipeline = new Pipeline([$selector = new SelectKBest(2)], new SVC());
|
|
|
|
$pipeline->train($samples, $targets);
|
|
|
|
|
2019-04-10 18:42:59 +00:00
|
|
|
self::assertEqualsWithDelta([1.47058823, 4.0, 3.0], $selector->scores(), 0.00000001);
|
2018-02-14 18:05:48 +00:00
|
|
|
self::assertEquals(['b'], $pipeline->predict([[1, 3, 5]]));
|
|
|
|
}
|
|
|
|
|
2018-01-12 09:54:20 +00:00
|
|
|
public function testSaveAndRestore(): void
|
|
|
|
{
|
|
|
|
$pipeline = new Pipeline([
|
|
|
|
new TokenCountVectorizer(new WordTokenizer()),
|
|
|
|
new TfIdfTransformer(),
|
|
|
|
], new SVC());
|
|
|
|
|
|
|
|
$pipeline->train([
|
|
|
|
'Hello Paul',
|
|
|
|
'Hello Martin',
|
|
|
|
'Goodbye Tom',
|
|
|
|
'Hello John',
|
|
|
|
'Goodbye Alex',
|
|
|
|
'Bye Tony',
|
|
|
|
], [
|
|
|
|
'greetings',
|
|
|
|
'greetings',
|
|
|
|
'farewell',
|
|
|
|
'greetings',
|
|
|
|
'farewell',
|
|
|
|
'farewell',
|
|
|
|
]);
|
|
|
|
|
|
|
|
$testSamples = ['Hello Max', 'Goodbye Mark'];
|
|
|
|
$predicted = $pipeline->predict($testSamples);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
$filepath = (string) tempnam(sys_get_temp_dir(), uniqid('pipeline-test', true));
|
2018-01-12 09:54:20 +00:00
|
|
|
$modelManager = new ModelManager();
|
|
|
|
$modelManager->saveToFile($pipeline, $filepath);
|
|
|
|
|
|
|
|
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals($pipeline, $restoredClassifier);
|
|
|
|
self::assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
2018-01-12 09:54:20 +00:00
|
|
|
unlink($filepath);
|
|
|
|
}
|
2016-06-16 07:58:17 +00:00
|
|
|
}
|