mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-21 12:35:10 +00:00
Implement FeatureUnion 🚀 (#382)
This commit is contained in:
parent
ff118eb2ba
commit
b500f0b648
@ -91,6 +91,7 @@ Public datasets are available in a separate repository [php-ai/php-ml-datasets](
|
||||
* Regression
|
||||
* Workflow
|
||||
* [Pipeline](http://php-ml.readthedocs.io/en/latest/machine-learning/workflow/pipeline)
|
||||
* FeatureUnion
|
||||
* Neural Network
|
||||
* [Multilayer Perceptron Classifier](http://php-ml.readthedocs.io/en/latest/machine-learning/neural-network/multilayer-perceptron-classifier/)
|
||||
* Cross Validation
|
||||
@ -103,6 +104,9 @@ Public datasets are available in a separate repository [php-ai/php-ml-datasets](
|
||||
* [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization/)
|
||||
* [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/)
|
||||
* LabelEncoder
|
||||
* LambdaTransformer
|
||||
* NumberConverter
|
||||
* ColumnFilter
|
||||
* Feature Extraction
|
||||
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
|
||||
* NGramTokenizer
|
||||
|
72
src/FeatureUnion.php
Normal file
72
src/FeatureUnion.php
Normal file
@ -0,0 +1,72 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
final class FeatureUnion implements Transformer
|
||||
{
|
||||
/**
|
||||
* @var Pipeline[]
|
||||
*/
|
||||
private $pipelines = [];
|
||||
|
||||
/**
|
||||
* @var Pipeline[]
|
||||
*/
|
||||
public function __construct(array $pipelines)
|
||||
{
|
||||
if ($pipelines === []) {
|
||||
throw new InvalidArgumentException('At least one pipeline is required');
|
||||
}
|
||||
|
||||
$this->pipelines = array_map(static function (Pipeline $pipeline): Pipeline {
|
||||
return $pipeline;
|
||||
}, $pipelines);
|
||||
}
|
||||
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
$originSamples = $samples;
|
||||
foreach ($this->pipelines as $pipeline) {
|
||||
foreach ($pipeline->getTransformers() as $transformer) {
|
||||
$transformer->fit($samples, $targets);
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
$samples = $originSamples;
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
$this->transformSamples($samples, $targets);
|
||||
}
|
||||
|
||||
public function fitAndTransform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
$this->transformSamples($samples, $targets, true);
|
||||
}
|
||||
|
||||
private function transformSamples(array &$samples, ?array &$targets = null, bool $fit = false): void
|
||||
{
|
||||
$union = [];
|
||||
$originSamples = $samples;
|
||||
foreach ($this->pipelines as $pipeline) {
|
||||
foreach ($pipeline->getTransformers() as $transformer) {
|
||||
if ($fit) {
|
||||
$transformer->fit($samples, $targets);
|
||||
}
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
|
||||
foreach ($samples as $index => $sample) {
|
||||
$union[$index] = array_merge($union[$index] ?? [], is_array($sample) ? $sample : [$sample]);
|
||||
}
|
||||
$samples = $originSamples;
|
||||
}
|
||||
|
||||
$samples = $union;
|
||||
}
|
||||
}
|
@ -28,7 +28,7 @@ final class Regression
|
||||
|
||||
$errors = [];
|
||||
foreach ($targets as $index => $target) {
|
||||
$errors[] = (log(1 + $target) - log(1 + $predictions[$index])) ** 2;
|
||||
$errors[] = log((1 + $target) / (1 + $predictions[$index])) ** 2;
|
||||
}
|
||||
|
||||
return Mean::arithmetic($errors);
|
||||
|
@ -4,7 +4,9 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml;
|
||||
|
||||
class Pipeline implements Estimator
|
||||
use Phpml\Exception\InvalidOperationException;
|
||||
|
||||
class Pipeline implements Estimator, Transformer
|
||||
{
|
||||
/**
|
||||
* @var Transformer[]
|
||||
@ -12,29 +14,18 @@ class Pipeline implements Estimator
|
||||
private $transformers = [];
|
||||
|
||||
/**
|
||||
* @var Estimator
|
||||
* @var Estimator|null
|
||||
*/
|
||||
private $estimator;
|
||||
|
||||
/**
|
||||
* @param Transformer[] $transformers
|
||||
*/
|
||||
public function __construct(array $transformers, Estimator $estimator)
|
||||
{
|
||||
foreach ($transformers as $transformer) {
|
||||
$this->addTransformer($transformer);
|
||||
}
|
||||
|
||||
$this->estimator = $estimator;
|
||||
}
|
||||
|
||||
public function addTransformer(Transformer $transformer): void
|
||||
{
|
||||
$this->transformers[] = $transformer;
|
||||
}
|
||||
|
||||
public function setEstimator(Estimator $estimator): void
|
||||
public function __construct(array $transformers, ?Estimator $estimator = null)
|
||||
{
|
||||
$this->transformers = array_map(static function (Transformer $transformer): Transformer {
|
||||
return $transformer;
|
||||
}, $transformers);
|
||||
$this->estimator = $estimator;
|
||||
}
|
||||
|
||||
@ -46,16 +37,20 @@ class Pipeline implements Estimator
|
||||
return $this->transformers;
|
||||
}
|
||||
|
||||
public function getEstimator(): Estimator
|
||||
public function getEstimator(): ?Estimator
|
||||
{
|
||||
return $this->estimator;
|
||||
}
|
||||
|
||||
public function train(array $samples, array $targets): void
|
||||
{
|
||||
if ($this->estimator === null) {
|
||||
throw new InvalidOperationException('Pipeline without estimator can\'t use train method');
|
||||
}
|
||||
|
||||
foreach ($this->transformers as $transformer) {
|
||||
$transformer->fit($samples, $targets);
|
||||
$transformer->transform($samples);
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
|
||||
$this->estimator->train($samples, $targets);
|
||||
@ -66,15 +61,27 @@ class Pipeline implements Estimator
|
||||
*/
|
||||
public function predict(array $samples)
|
||||
{
|
||||
$this->transformSamples($samples);
|
||||
if ($this->estimator === null) {
|
||||
throw new InvalidOperationException('Pipeline without estimator can\'t use predict method');
|
||||
}
|
||||
|
||||
$this->transform($samples);
|
||||
|
||||
return $this->estimator->predict($samples);
|
||||
}
|
||||
|
||||
private function transformSamples(array &$samples): void
|
||||
public function fit(array $samples, ?array $targets = null): void
|
||||
{
|
||||
foreach ($this->transformers as $transformer) {
|
||||
$transformer->transform($samples);
|
||||
$transformer->fit($samples, $targets);
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
}
|
||||
|
||||
public function transform(array &$samples, ?array &$targets = null): void
|
||||
{
|
||||
foreach ($this->transformers as $transformer) {
|
||||
$transformer->transform($samples, $targets);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
105
tests/FeatureUnionTest.php
Normal file
105
tests/FeatureUnionTest.php
Normal file
@ -0,0 +1,105 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Tests;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
use Phpml\FeatureUnion;
|
||||
use Phpml\Pipeline;
|
||||
use Phpml\Preprocessing\ColumnFilter;
|
||||
use Phpml\Preprocessing\Imputer;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
|
||||
use Phpml\Preprocessing\LabelEncoder;
|
||||
use Phpml\Preprocessing\LambdaTransformer;
|
||||
use Phpml\Preprocessing\NumberConverter;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
final class FeatureUnionTest extends TestCase
|
||||
{
|
||||
public function testFitAndTransform(): void
|
||||
{
|
||||
$columns = ['age', 'income', 'sex'];
|
||||
$samples = [
|
||||
['23', '100000', 'male'],
|
||||
['23', '200000', 'female'],
|
||||
['43', '150000', 'female'],
|
||||
['33', 'n/a', 'male'],
|
||||
];
|
||||
$targets = ['1', '2', '1', '3'];
|
||||
|
||||
$union = new FeatureUnion([
|
||||
new Pipeline([
|
||||
new ColumnFilter($columns, ['sex']),
|
||||
new LambdaTransformer(function (array $sample) {
|
||||
return $sample[0];
|
||||
}),
|
||||
new LabelEncoder(),
|
||||
]),
|
||||
new Pipeline([
|
||||
new ColumnFilter($columns, ['age', 'income']),
|
||||
new NumberConverter(true),
|
||||
new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN),
|
||||
]),
|
||||
]);
|
||||
|
||||
$union->fitAndTransform($samples, $targets);
|
||||
|
||||
self::assertEquals([
|
||||
[0, 23.0, 100000.0],
|
||||
[1, 23.0, 200000.0],
|
||||
[1, 43.0, 150000.0],
|
||||
[0, 33.0, 150000.0],
|
||||
], $samples);
|
||||
self::assertEquals([1, 2, 1, 3], $targets);
|
||||
}
|
||||
|
||||
public function testFitAndTransformSeparate(): void
|
||||
{
|
||||
$columns = ['age', 'income', 'sex'];
|
||||
$trainSamples = [
|
||||
['23', '100000', 'male'],
|
||||
['23', '200000', 'female'],
|
||||
['43', '150000', 'female'],
|
||||
['33', 'n/a', 'male'],
|
||||
];
|
||||
$testSamples = [
|
||||
['43', '500000', 'female'],
|
||||
['13', 'n/a', 'male'],
|
||||
['53', 'n/a', 'male'],
|
||||
['43', 'n/a', 'female'],
|
||||
];
|
||||
|
||||
$union = new FeatureUnion([
|
||||
new Pipeline([
|
||||
new ColumnFilter($columns, ['sex']),
|
||||
new LambdaTransformer(function (array $sample) {
|
||||
return $sample[0];
|
||||
}),
|
||||
new LabelEncoder(),
|
||||
]),
|
||||
new Pipeline([
|
||||
new ColumnFilter($columns, ['age', 'income']),
|
||||
new NumberConverter(),
|
||||
new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN),
|
||||
]),
|
||||
]);
|
||||
|
||||
$union->fit($trainSamples);
|
||||
$union->transform($testSamples);
|
||||
|
||||
self::assertEquals([
|
||||
[1, 43.0, 500000.0],
|
||||
[0, 13.0, 150000.0],
|
||||
[0, 53.0, 150000.0],
|
||||
[1, 43.0, 150000.0],
|
||||
], $testSamples);
|
||||
}
|
||||
|
||||
public function testNotAllowForEmptyPipelines(): void
|
||||
{
|
||||
$this->expectException(InvalidArgumentException::class);
|
||||
|
||||
new FeatureUnion([]);
|
||||
}
|
||||
}
|
@ -11,9 +11,9 @@ use Phpml\FeatureSelection\SelectKBest;
|
||||
use Phpml\ModelManager;
|
||||
use Phpml\Pipeline;
|
||||
use Phpml\Preprocessing\Imputer;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
||||
use Phpml\Preprocessing\Normalizer;
|
||||
use Phpml\Regression\SVR;
|
||||
use Phpml\Tokenization\WordTokenizer;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
@ -32,16 +32,6 @@ class PipelineTest extends TestCase
|
||||
self::assertEquals($estimator, $pipeline->getEstimator());
|
||||
}
|
||||
|
||||
public function testPipelineEstimatorSetter(): void
|
||||
{
|
||||
$pipeline = new Pipeline([new TfIdfTransformer()], new SVC());
|
||||
|
||||
$estimator = new SVR();
|
||||
$pipeline->setEstimator($estimator);
|
||||
|
||||
self::assertEquals($estimator, $pipeline->getEstimator());
|
||||
}
|
||||
|
||||
public function testPipelineWorkflow(): void
|
||||
{
|
||||
$transformers = [
|
||||
@ -119,6 +109,29 @@ class PipelineTest extends TestCase
|
||||
self::assertEquals(['b'], $pipeline->predict([[1, 3, 5]]));
|
||||
}
|
||||
|
||||
public function testPipelineAsTransformer(): void
|
||||
{
|
||||
$pipeline = new Pipeline([
|
||||
new Imputer(null, new MeanStrategy()),
|
||||
]);
|
||||
|
||||
$trainSamples = [
|
||||
[10, 20, 30],
|
||||
[20, 30, 40],
|
||||
[30, 40, 50],
|
||||
];
|
||||
|
||||
$pipeline->fit($trainSamples);
|
||||
|
||||
$testSamples = [
|
||||
[null, null, null],
|
||||
];
|
||||
|
||||
$pipeline->transform($testSamples);
|
||||
|
||||
self::assertEquals([[20.0, 30.0, 40.0]], $testSamples);
|
||||
}
|
||||
|
||||
public function testSaveAndRestore(): void
|
||||
{
|
||||
$pipeline = new Pipeline([
|
||||
|
Loading…
Reference in New Issue
Block a user