php-ml/tests/PipelineTest.php

158 lines
4.2 KiB
PHP
Raw Normal View History

2016-06-16 09:58:17 +02:00
<?php
2016-11-20 22:53:17 +01:00
declare(strict_types=1);
2016-06-16 09:58:17 +02:00
namespace Phpml\Tests;
2016-06-16 09:58:17 +02:00
use Phpml\Classification\SVC;
use Phpml\FeatureExtraction\TfIdfTransformer;
use Phpml\FeatureExtraction\TokenCountVectorizer;
2018-02-14 19:05:48 +01:00
use Phpml\FeatureSelection\SelectKBest;
use Phpml\ModelManager;
2016-06-16 09:58:17 +02:00
use Phpml\Pipeline;
2016-06-16 10:26:29 +02:00
use Phpml\Preprocessing\Imputer;
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
use Phpml\Preprocessing\Normalizer;
2016-07-24 13:52:52 +02:00
use Phpml\Regression\SVR;
use Phpml\Tokenization\WordTokenizer;
2017-02-03 12:58:25 +01:00
use PHPUnit\Framework\TestCase;
2016-06-16 09:58:17 +02:00
2017-02-03 12:58:25 +01:00
class PipelineTest extends TestCase
2016-06-16 09:58:17 +02:00
{
public function testPipelineConstruction(): void
2016-06-16 09:58:17 +02:00
{
$transformers = [
new TfIdfTransformer(),
2016-06-16 09:58:17 +02:00
];
$estimator = new SVC();
$pipeline = new Pipeline($transformers, $estimator);
2018-10-28 07:44:52 +01:00
self::assertEquals($transformers, $pipeline->getTransformers());
self::assertEquals($estimator, $pipeline->getEstimator());
2016-06-16 09:58:17 +02:00
}
2016-06-16 10:26:29 +02:00
public function testPipelineEstimatorSetter(): void
2016-07-24 13:52:52 +02:00
{
$pipeline = new Pipeline([new TfIdfTransformer()], new SVC());
$estimator = new SVR();
$pipeline->setEstimator($estimator);
2018-10-28 07:44:52 +01:00
self::assertEquals($estimator, $pipeline->getEstimator());
2016-07-24 13:52:52 +02:00
}
public function testPipelineWorkflow(): void
2016-06-16 10:26:29 +02:00
{
$transformers = [
new Imputer(null, new MostFrequentStrategy()),
new Normalizer(),
];
$estimator = new SVC();
$samples = [
[1, -1, 2],
[2, 0, null],
[null, 1, -1],
];
$targets = [
4,
1,
4,
2016-06-16 10:26:29 +02:00
];
$pipeline = new Pipeline($transformers, $estimator);
$pipeline->train($samples, $targets);
$predicted = $pipeline->predict([[0, 0, 0]]);
2018-10-28 07:44:52 +01:00
self::assertEquals(4, $predicted[0]);
2016-06-16 10:26:29 +02:00
}
public function testPipelineTransformers(): void
{
$transformers = [
new TokenCountVectorizer(new WordTokenizer()),
new TfIdfTransformer(),
];
$estimator = new SVC();
$samples = [
'Hello Paul',
'Hello Martin',
'Goodbye Tom',
'Hello John',
'Goodbye Alex',
'Bye Tony',
];
$targets = [
'greetings',
'greetings',
'farewell',
'greetings',
'farewell',
'farewell',
];
$pipeline = new Pipeline($transformers, $estimator);
$pipeline->train($samples, $targets);
$expected = ['greetings', 'farewell'];
$predicted = $pipeline->predict(['Hello Max', 'Goodbye Mark']);
2018-10-28 07:44:52 +01:00
self::assertEquals($expected, $predicted);
}
2018-02-14 19:51:07 +01:00
public function testPipelineTransformersWithTargets(): void
2018-02-14 19:05:48 +01:00
{
$samples = [[1, 2, 1], [1, 3, 4], [5, 2, 1], [1, 3, 3], [1, 3, 4], [0, 3, 5]];
$targets = ['a', 'a', 'a', 'b', 'b', 'b'];
$pipeline = new Pipeline([$selector = new SelectKBest(2)], new SVC());
$pipeline->train($samples, $targets);
self::assertEqualsWithDelta([1.47058823, 4.0, 3.0], $selector->scores(), 0.00000001);
2018-02-14 19:05:48 +01:00
self::assertEquals(['b'], $pipeline->predict([[1, 3, 5]]));
}
public function testSaveAndRestore(): void
{
$pipeline = new Pipeline([
new TokenCountVectorizer(new WordTokenizer()),
new TfIdfTransformer(),
], new SVC());
$pipeline->train([
'Hello Paul',
'Hello Martin',
'Goodbye Tom',
'Hello John',
'Goodbye Alex',
'Bye Tony',
], [
'greetings',
'greetings',
'farewell',
'greetings',
'farewell',
'farewell',
]);
$testSamples = ['Hello Max', 'Goodbye Mark'];
$predicted = $pipeline->predict($testSamples);
2018-10-28 07:44:52 +01:00
$filepath = (string) tempnam(sys_get_temp_dir(), uniqid('pipeline-test', true));
$modelManager = new ModelManager();
$modelManager->saveToFile($pipeline, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
2018-10-28 07:44:52 +01:00
self::assertEquals($pipeline, $restoredClassifier);
self::assertEquals($predicted, $restoredClassifier->predict($testSamples));
unlink($filepath);
}
2016-06-16 09:58:17 +02:00
}