mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-24 22:07:33 +00:00
Fix samples transformation in Pipeline training (#94)
This commit is contained in:
parent
de50490154
commit
2d3b44f1a0
@ -67,8 +67,11 @@ class Pipeline implements Estimator
|
|||||||
*/
|
*/
|
||||||
public function train(array $samples, array $targets)
|
public function train(array $samples, array $targets)
|
||||||
{
|
{
|
||||||
$this->fitTransformers($samples);
|
foreach ($this->transformers as $transformer) {
|
||||||
$this->transformSamples($samples);
|
$transformer->fit($samples);
|
||||||
|
$transformer->transform($samples);
|
||||||
|
}
|
||||||
|
|
||||||
$this->estimator->train($samples, $targets);
|
$this->estimator->train($samples, $targets);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,16 +87,6 @@ class Pipeline implements Estimator
|
|||||||
return $this->estimator->predict($samples);
|
return $this->estimator->predict($samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param array $samples
|
|
||||||
*/
|
|
||||||
private function fitTransformers(array &$samples)
|
|
||||||
{
|
|
||||||
foreach ($this->transformers as $transformer) {
|
|
||||||
$transformer->fit($samples);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param array $samples
|
* @param array $samples
|
||||||
*/
|
*/
|
||||||
|
@ -6,11 +6,13 @@ namespace tests;
|
|||||||
|
|
||||||
use Phpml\Classification\SVC;
|
use Phpml\Classification\SVC;
|
||||||
use Phpml\FeatureExtraction\TfIdfTransformer;
|
use Phpml\FeatureExtraction\TfIdfTransformer;
|
||||||
|
use Phpml\FeatureExtraction\TokenCountVectorizer;
|
||||||
use Phpml\Pipeline;
|
use Phpml\Pipeline;
|
||||||
use Phpml\Preprocessing\Imputer;
|
use Phpml\Preprocessing\Imputer;
|
||||||
use Phpml\Preprocessing\Normalizer;
|
use Phpml\Preprocessing\Normalizer;
|
||||||
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
||||||
use Phpml\Regression\SVR;
|
use Phpml\Regression\SVR;
|
||||||
|
use Phpml\Tokenization\WordTokenizer;
|
||||||
use PHPUnit\Framework\TestCase;
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
class PipelineTest extends TestCase
|
class PipelineTest extends TestCase
|
||||||
@ -65,4 +67,41 @@ class PipelineTest extends TestCase
|
|||||||
|
|
||||||
$this->assertEquals(4, $predicted[0]);
|
$this->assertEquals(4, $predicted[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testPipelineTransformers()
|
||||||
|
{
|
||||||
|
$transformers = [
|
||||||
|
new TokenCountVectorizer(new WordTokenizer()),
|
||||||
|
new TfIdfTransformer()
|
||||||
|
];
|
||||||
|
|
||||||
|
$estimator = new SVC();
|
||||||
|
|
||||||
|
$samples = [
|
||||||
|
'Hello Paul',
|
||||||
|
'Hello Martin',
|
||||||
|
'Goodbye Tom',
|
||||||
|
'Hello John',
|
||||||
|
'Goodbye Alex',
|
||||||
|
'Bye Tony',
|
||||||
|
];
|
||||||
|
|
||||||
|
$targets = [
|
||||||
|
'greetings',
|
||||||
|
'greetings',
|
||||||
|
'farewell',
|
||||||
|
'greetings',
|
||||||
|
'farewell',
|
||||||
|
'farewell',
|
||||||
|
];
|
||||||
|
|
||||||
|
$pipeline = new Pipeline($transformers, $estimator);
|
||||||
|
$pipeline->train($samples, $targets);
|
||||||
|
|
||||||
|
$expected = ['greetings', 'farewell'];
|
||||||
|
|
||||||
|
$predicted = $pipeline->predict(['Hello Max', 'Goodbye Mark']);
|
||||||
|
|
||||||
|
$this->assertEquals($expected, $predicted);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user