php-ml/tests/PipelineTest.php

171 lines
4.5 KiB
PHP
Raw Permalink Normal View History

2016-06-16 07:58:17 +00:00
<?php
2016-11-20 21:53:17 +00:00
declare(strict_types=1);
2016-06-16 07:58:17 +00:00
namespace Phpml\Tests;
2016-06-16 07:58:17 +00:00
use Phpml\Classification\SVC;
use Phpml\FeatureExtraction\TfIdfTransformer;
use Phpml\FeatureExtraction\TokenCountVectorizer;
2018-02-14 18:05:48 +00:00
use Phpml\FeatureSelection\SelectKBest;
use Phpml\ModelManager;
2016-06-16 07:58:17 +00:00
use Phpml\Pipeline;
2016-06-16 08:26:29 +00:00
use Phpml\Preprocessing\Imputer;
2019-05-14 19:26:25 +00:00
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
2016-06-16 08:26:29 +00:00
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
use Phpml\Preprocessing\Normalizer;
use Phpml\Tokenization\WordTokenizer;
2017-02-03 11:58:25 +00:00
use PHPUnit\Framework\TestCase;
2016-06-16 07:58:17 +00:00
2017-02-03 11:58:25 +00:00
class PipelineTest extends TestCase
2016-06-16 07:58:17 +00:00
{
public function testPipelineConstruction(): void
2016-06-16 07:58:17 +00:00
{
$transformers = [
new TfIdfTransformer(),
2016-06-16 07:58:17 +00:00
];
$estimator = new SVC();
$pipeline = new Pipeline($transformers, $estimator);
2018-10-28 06:44:52 +00:00
self::assertEquals($transformers, $pipeline->getTransformers());
self::assertEquals($estimator, $pipeline->getEstimator());
2016-06-16 07:58:17 +00:00
}
2016-06-16 08:26:29 +00:00
public function testPipelineWorkflow(): void
2016-06-16 08:26:29 +00:00
{
$transformers = [
new Imputer(null, new MostFrequentStrategy()),
new Normalizer(),
];
$estimator = new SVC();
$samples = [
[1, -1, 2],
[2, 0, null],
[null, 1, -1],
];
$targets = [
4,
1,
4,
2016-06-16 08:26:29 +00:00
];
$pipeline = new Pipeline($transformers, $estimator);
$pipeline->train($samples, $targets);
$predicted = $pipeline->predict([[0, 0, 0]]);
2018-10-28 06:44:52 +00:00
self::assertEquals(4, $predicted[0]);
2016-06-16 08:26:29 +00:00
}
public function testPipelineTransformers(): void
{
$transformers = [
new TokenCountVectorizer(new WordTokenizer()),
new TfIdfTransformer(),
];
$estimator = new SVC();
$samples = [
'Hello Paul',
'Hello Martin',
'Goodbye Tom',
'Hello John',
'Goodbye Alex',
'Bye Tony',
];
$targets = [
'greetings',
'greetings',
'farewell',
'greetings',
'farewell',
'farewell',
];
$pipeline = new Pipeline($transformers, $estimator);
$pipeline->train($samples, $targets);
$expected = ['greetings', 'farewell'];
$predicted = $pipeline->predict(['Hello Max', 'Goodbye Mark']);
2018-10-28 06:44:52 +00:00
self::assertEquals($expected, $predicted);
}
2018-02-14 18:51:07 +00:00
public function testPipelineTransformersWithTargets(): void
2018-02-14 18:05:48 +00:00
{
$samples = [[1, 2, 1], [1, 3, 4], [5, 2, 1], [1, 3, 3], [1, 3, 4], [0, 3, 5]];
$targets = ['a', 'a', 'a', 'b', 'b', 'b'];
$pipeline = new Pipeline([$selector = new SelectKBest(2)], new SVC());
$pipeline->train($samples, $targets);
self::assertEqualsWithDelta([1.47058823, 4.0, 3.0], $selector->scores(), 0.00000001);
2018-02-14 18:05:48 +00:00
self::assertEquals(['b'], $pipeline->predict([[1, 3, 5]]));
}
2019-05-14 19:26:25 +00:00
public function testPipelineAsTransformer(): void
{
$pipeline = new Pipeline([
new Imputer(null, new MeanStrategy()),
]);
$trainSamples = [
[10, 20, 30],
[20, 30, 40],
[30, 40, 50],
];
$pipeline->fit($trainSamples);
$testSamples = [
[null, null, null],
];
$pipeline->transform($testSamples);
self::assertEquals([[20.0, 30.0, 40.0]], $testSamples);
}
public function testSaveAndRestore(): void
{
$pipeline = new Pipeline([
new TokenCountVectorizer(new WordTokenizer()),
new TfIdfTransformer(),
], new SVC());
$pipeline->train([
'Hello Paul',
'Hello Martin',
'Goodbye Tom',
'Hello John',
'Goodbye Alex',
'Bye Tony',
], [
'greetings',
'greetings',
'farewell',
'greetings',
'farewell',
'farewell',
]);
$testSamples = ['Hello Max', 'Goodbye Mark'];
$predicted = $pipeline->predict($testSamples);
2018-10-28 06:44:52 +00:00
$filepath = (string) tempnam(sys_get_temp_dir(), uniqid('pipeline-test', true));
$modelManager = new ModelManager();
$modelManager->saveToFile($pipeline, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
2018-10-28 06:44:52 +00:00
self::assertEquals($pipeline, $restoredClassifier);
self::assertEquals($predicted, $restoredClassifier->predict($testSamples));
unlink($filepath);
}
2016-06-16 07:58:17 +00:00
}