Fix Imputer docs and check if train data was set (#314)

* Update docs for Imputer class

* Throw exception when trying to transform imputer without train data

* Update changelog
This commit is contained in:
Arkadiusz Kondas 2018-10-10 21:36:18 +02:00 committed by GitHub
parent 15adf9e252
commit e255369636
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 40 additions and 0 deletions

View File

@ -9,6 +9,7 @@ This changelog references the relevant changes done in PHP-ML library.
* feature [Dataset] added removeColumns function to ArrayDataset (#249) * feature [Dataset] added removeColumns function to ArrayDataset (#249)
* feature [Dataset] added a SvmDataset class for SVM-Light (or LibSVM) format files (#237) * feature [Dataset] added a SvmDataset class for SVM-Light (or LibSVM) format files (#237)
* feature [Optimizer] removed $initialTheta property and renamed setInitialTheta method to setTheta (#252) * feature [Optimizer] removed $initialTheta property and renamed setInitialTheta method to setTheta (#252)
* change [Imputer] Throw exception when trying to transform without train data (#314)
* enhancement Add performance test for LeastSquares (#263) * enhancement Add performance test for LeastSquares (#263)
* enhancement Micro optimization for matrix multiplication (#255) * enhancement Micro optimization for matrix multiplication (#255)
* enhancement Throw proper exception (#259, #251) * enhancement Throw proper exception (#259, #251)

View File

@ -8,6 +8,7 @@ To solve this problem you can use the `Imputer` class.
* $missingValue (mixed) - this value will be replaced (default null) * $missingValue (mixed) - this value will be replaced (default null)
* $strategy (Strategy) - imputation strategy (read to use: MeanStrategy, MedianStrategy, MostFrequentStrategy) * $strategy (Strategy) - imputation strategy (read to use: MeanStrategy, MedianStrategy, MostFrequentStrategy)
* $axis (int) - axis for strategy, Imputer::AXIS_COLUMN or Imputer::AXIS_ROW * $axis (int) - axis for strategy, Imputer::AXIS_COLUMN or Imputer::AXIS_ROW
* $samples (array) - array of samples to train
``` ```
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
@ -34,6 +35,7 @@ $data = [
]; ];
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
$imputer->fit($data);
$imputer->transform($data); $imputer->transform($data);
/* /*
@ -46,3 +48,20 @@ $data = [
*/ */
``` ```
You can also use `$samples` constructer parameter instead of `fit` method:
```
use Phpml\Preprocessing\Imputer;
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
$data = [
[1, null, 3, 4],
[4, 3, 2, 1],
[null, 6, 7, 8],
[8, 7, null, 5],
];
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $data);
$imputer->transform($data);
```

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Preprocessing; namespace Phpml\Preprocessing;
use Phpml\Exception\InvalidOperationException;
use Phpml\Preprocessing\Imputer\Strategy; use Phpml\Preprocessing\Imputer\Strategy;
class Imputer implements Preprocessor class Imputer implements Preprocessor
@ -50,6 +51,10 @@ class Imputer implements Preprocessor
public function transform(array &$samples): void public function transform(array &$samples): void
{ {
if ($this->samples === []) {
throw new InvalidOperationException('Missing training samples for Imputer.');
}
foreach ($samples as &$sample) { foreach ($samples as &$sample) {
$this->preprocessSample($sample); $this->preprocessSample($sample);
} }

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Tests\Preprocessing; namespace Phpml\Tests\Preprocessing;
use Phpml\Exception\InvalidOperationException;
use Phpml\Preprocessing\Imputer; use Phpml\Preprocessing\Imputer;
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy; use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
use Phpml\Preprocessing\Imputer\Strategy\MedianStrategy; use Phpml\Preprocessing\Imputer\Strategy\MedianStrategy;
@ -173,4 +174,18 @@ class ImputerTest extends TestCase
$this->assertEquals($imputeData, $data, '', $delta = 0.01); $this->assertEquals($imputeData, $data, '', $delta = 0.01);
} }
public function testThrowExceptionWhenTryingToTransformWithoutTrainSamples(): void
{
$this->expectException(InvalidOperationException::class);
$data = [
[1, 3, null],
[6, null, 8],
[null, 7, 5],
];
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
$imputer->transform($data);
}
} }