diff --git a/CHANGELOG.md b/CHANGELOG.md index 5990242..3c44b94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This changelog references the relevant changes done in PHP-ML library. * feature [Dataset] added removeColumns function to ArrayDataset (#249) * feature [Dataset] added a SvmDataset class for SVM-Light (or LibSVM) format files (#237) * feature [Optimizer] removed $initialTheta property and renamed setInitialTheta method to setTheta (#252) + * change [Imputer] Throw exception when trying to transform without train data (#314) * enhancement Add performance test for LeastSquares (#263) * enhancement Micro optimization for matrix multiplication (#255) * enhancement Throw proper exception (#259, #251) diff --git a/docs/machine-learning/preprocessing/imputation-missing-values.md b/docs/machine-learning/preprocessing/imputation-missing-values.md index 48a5b3a..219db22 100644 --- a/docs/machine-learning/preprocessing/imputation-missing-values.md +++ b/docs/machine-learning/preprocessing/imputation-missing-values.md @@ -8,6 +8,7 @@ To solve this problem you can use the `Imputer` class. * $missingValue (mixed) - this value will be replaced (default null) * $strategy (Strategy) - imputation strategy (read to use: MeanStrategy, MedianStrategy, MostFrequentStrategy) * $axis (int) - axis for strategy, Imputer::AXIS_COLUMN or Imputer::AXIS_ROW +* $samples (array) - array of samples to train ``` $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); @@ -34,6 +35,7 @@ $data = [ ]; $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); +$imputer->fit($data); $imputer->transform($data); /* @@ -46,3 +48,20 @@ $data = [ */ ``` + +You can also use `$samples` constructer parameter instead of `fit` method: + +``` +use Phpml\Preprocessing\Imputer; +use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy; + +$data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], +]; + +$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $data); +$imputer->transform($data); +``` diff --git a/src/Preprocessing/Imputer.php b/src/Preprocessing/Imputer.php index fdce666..e5b5af8 100644 --- a/src/Preprocessing/Imputer.php +++ b/src/Preprocessing/Imputer.php @@ -4,6 +4,7 @@ declare(strict_types=1); namespace Phpml\Preprocessing; +use Phpml\Exception\InvalidOperationException; use Phpml\Preprocessing\Imputer\Strategy; class Imputer implements Preprocessor @@ -50,6 +51,10 @@ class Imputer implements Preprocessor public function transform(array &$samples): void { + if ($this->samples === []) { + throw new InvalidOperationException('Missing training samples for Imputer.'); + } + foreach ($samples as &$sample) { $this->preprocessSample($sample); } diff --git a/tests/Preprocessing/ImputerTest.php b/tests/Preprocessing/ImputerTest.php index c229c15..1078e54 100644 --- a/tests/Preprocessing/ImputerTest.php +++ b/tests/Preprocessing/ImputerTest.php @@ -4,6 +4,7 @@ declare(strict_types=1); namespace Phpml\Tests\Preprocessing; +use Phpml\Exception\InvalidOperationException; use Phpml\Preprocessing\Imputer; use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy; use Phpml\Preprocessing\Imputer\Strategy\MedianStrategy; @@ -173,4 +174,18 @@ class ImputerTest extends TestCase $this->assertEquals($imputeData, $data, '', $delta = 0.01); } + + public function testThrowExceptionWhenTryingToTransformWithoutTrainSamples(): void + { + $this->expectException(InvalidOperationException::class); + + $data = [ + [1, 3, null], + [6, null, 8], + [null, 7, 5], + ]; + + $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); + $imputer->transform($data); + } }