Fix samples transformation in Pipeline training (#94)

2025-01-10 00:37:55 +00:00 · 2017-05-24 09:06:54 +02:00 · 2017-05-24 09:06:54 +02:00 · 2d3b44f1a0
commit 2d3b44f1a0
parent de50490154
2 changed files with 44 additions and 12 deletions
--- a/src/Phpml/Pipeline.php
+++ b/src/Phpml/Pipeline.php
@ -67,8 +67,11 @@ class Pipeline implements Estimator
     */
    public function train(array $samples, array $targets)
    {
-        $this->fitTransformers($samples);
-        $this->transformSamples($samples);
+        foreach ($this->transformers as $transformer) {
+            $transformer->fit($samples);
+            $transformer->transform($samples);
+        }
+
        $this->estimator->train($samples, $targets);
    }

@ -84,16 +87,6 @@ class Pipeline implements Estimator
        return $this->estimator->predict($samples);
    }

-    /**
-     * @param array $samples
-     */
-    private function fitTransformers(array &$samples)
-    {
-        foreach ($this->transformers as $transformer) {
-            $transformer->fit($samples);
-        }
-    }
-
    /**
     * @param array $samples
     */
--- a/tests/Phpml/PipelineTest.php
+++ b/tests/Phpml/PipelineTest.php
@ -6,11 +6,13 @@ namespace tests;

 use Phpml\Classification\SVC;
 use Phpml\FeatureExtraction\TfIdfTransformer;
+use Phpml\FeatureExtraction\TokenCountVectorizer;
 use Phpml\Pipeline;
 use Phpml\Preprocessing\Imputer;
 use Phpml\Preprocessing\Normalizer;
 use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
 use Phpml\Regression\SVR;
+use Phpml\Tokenization\WordTokenizer;
 use PHPUnit\Framework\TestCase;

 class PipelineTest extends TestCase
@ -65,4 +67,41 @@ class PipelineTest extends TestCase

        $this->assertEquals(4, $predicted[0]);
    }
+
+    public function testPipelineTransformers()
+    {
+        $transformers = [
+            new TokenCountVectorizer(new WordTokenizer()),
+            new TfIdfTransformer()
+        ];
+
+        $estimator = new SVC();
+
+        $samples = [
+            'Hello Paul',
+            'Hello Martin',
+            'Goodbye Tom',
+            'Hello John',
+            'Goodbye Alex',
+            'Bye Tony',
+        ];
+
+        $targets = [
+            'greetings',
+            'greetings',
+            'farewell',
+            'greetings',
+            'farewell',
+            'farewell',
+        ];
+
+        $pipeline = new Pipeline($transformers, $estimator);
+        $pipeline->train($samples, $targets);
+
+        $expected = ['greetings', 'farewell'];
+
+        $predicted = $pipeline->predict(['Hello Max', 'Goodbye Mark']);
+
+        $this->assertEquals($expected, $predicted);
+    }
 }