2016-05-08 14:47:17 +02:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 22:53:17 +01:00
|
|
|
declare(strict_types=1);
|
2016-05-08 14:47:17 +02:00
|
|
|
|
2018-01-06 13:09:33 +01:00
|
|
|
namespace Phpml\Tests\Preprocessing;
|
2016-05-08 14:47:17 +02:00
|
|
|
|
2018-10-10 21:36:18 +02:00
|
|
|
use Phpml\Exception\InvalidOperationException;
|
2016-05-08 14:47:17 +02:00
|
|
|
use Phpml\Preprocessing\Imputer;
|
|
|
|
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
|
2016-05-08 19:33:39 +02:00
|
|
|
use Phpml\Preprocessing\Imputer\Strategy\MedianStrategy;
|
|
|
|
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
2017-02-03 12:58:25 +01:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-05-08 14:47:17 +02:00
|
|
|
|
2017-02-03 12:58:25 +01:00
|
|
|
class ImputerTest extends TestCase
|
2016-05-08 14:47:17 +02:00
|
|
|
{
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testComplementsMissingValuesWithMeanStrategyOnColumnAxis(): void
|
2016-05-08 14:47:17 +02:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 5.33, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[4.33, 6, 7, 8],
|
|
|
|
[8, 7, 4, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-17 00:16:49 +02:00
|
|
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $data);
|
2016-06-16 10:03:57 +02:00
|
|
|
$imputer->transform($data);
|
2016-05-08 14:47:17 +02:00
|
|
|
|
2019-04-10 20:42:59 +02:00
|
|
|
self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01);
|
2016-05-08 14:47:17 +02:00
|
|
|
}
|
|
|
|
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testComplementsMissingValuesWithMeanStrategyOnRowAxis(): void
|
2016-05-08 14:47:17 +02:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 2.66, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[7, 6, 7, 8],
|
|
|
|
[8, 7, 6.66, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-17 00:16:49 +02:00
|
|
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_ROW, $data);
|
2016-06-16 10:03:57 +02:00
|
|
|
$imputer->transform($data);
|
2016-05-08 14:47:17 +02:00
|
|
|
|
2019-04-10 20:42:59 +02:00
|
|
|
self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01);
|
2016-05-08 14:47:17 +02:00
|
|
|
}
|
2016-05-08 19:33:39 +02:00
|
|
|
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testComplementsMissingValuesWithMediaStrategyOnColumnAxis(): void
|
2016-05-08 19:33:39 +02:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 6, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[4, 6, 7, 8],
|
|
|
|
[8, 7, 3, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-17 00:16:49 +02:00
|
|
|
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_COLUMN, $data);
|
2016-06-16 10:03:57 +02:00
|
|
|
$imputer->transform($data);
|
2016-05-08 19:33:39 +02:00
|
|
|
|
2019-04-10 20:42:59 +02:00
|
|
|
self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01);
|
2016-05-08 19:33:39 +02:00
|
|
|
}
|
|
|
|
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testComplementsMissingValuesWithMediaStrategyOnRowAxis(): void
|
2016-05-08 19:33:39 +02:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[7, 6, 7, 8],
|
|
|
|
[8, 7, 7, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-17 00:16:49 +02:00
|
|
|
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW, $data);
|
2016-06-16 10:03:57 +02:00
|
|
|
$imputer->transform($data);
|
2016-05-08 19:33:39 +02:00
|
|
|
|
2019-04-10 20:42:59 +02:00
|
|
|
self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01);
|
2016-05-08 19:33:39 +02:00
|
|
|
}
|
|
|
|
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testComplementsMissingValuesWithMostFrequentStrategyOnColumnAxis(): void
|
2016-05-08 19:33:39 +02:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
[8, 3, 2, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[8, 6, 7, 8],
|
|
|
|
[8, 7, 2, 5],
|
|
|
|
[8, 3, 2, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-17 00:16:49 +02:00
|
|
|
$imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_COLUMN, $data);
|
2016-06-16 10:03:57 +02:00
|
|
|
$imputer->transform($data);
|
2016-05-08 19:33:39 +02:00
|
|
|
|
2018-10-28 07:44:52 +01:00
|
|
|
self::assertEquals($imputeData, $data);
|
2016-05-08 19:33:39 +02:00
|
|
|
}
|
|
|
|
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testComplementsMissingValuesWithMostFrequentStrategyOnRowAxis(): void
|
2016-05-08 19:33:39 +02:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4, 3],
|
|
|
|
[4, 3, 2, 1, 7],
|
|
|
|
[null, 6, 7, 8, 6],
|
|
|
|
[8, 7, null, 5, 5],
|
|
|
|
[8, 3, 2, 5, 4],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 3, 4, 3],
|
|
|
|
[4, 3, 2, 1, 7],
|
|
|
|
[6, 6, 7, 8, 6],
|
|
|
|
[8, 7, 5, 5, 5],
|
|
|
|
[8, 3, 2, 5, 4],
|
|
|
|
];
|
|
|
|
|
2016-06-17 00:16:49 +02:00
|
|
|
$imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_ROW, $data);
|
2016-06-16 10:03:57 +02:00
|
|
|
$imputer->transform($data);
|
2016-05-08 19:33:39 +02:00
|
|
|
|
2018-10-28 07:44:52 +01:00
|
|
|
self::assertEquals($imputeData, $data);
|
2016-05-08 19:33:39 +02:00
|
|
|
}
|
2016-06-17 00:23:27 +02:00
|
|
|
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testImputerWorksOnFitSamples(): void
|
2016-06-17 00:23:27 +02:00
|
|
|
{
|
|
|
|
$trainData = [
|
|
|
|
[1, 3, 4],
|
|
|
|
[6, 7, 8],
|
|
|
|
[8, 7, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$data = [
|
|
|
|
[1, 3, null],
|
|
|
|
[6, null, 8],
|
|
|
|
[null, 7, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 5.66],
|
|
|
|
[6, 5.66, 8],
|
|
|
|
[5, 7, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $trainData);
|
|
|
|
$imputer->transform($data);
|
|
|
|
|
2019-04-10 20:42:59 +02:00
|
|
|
self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01);
|
2016-06-17 00:23:27 +02:00
|
|
|
}
|
2018-10-10 21:36:18 +02:00
|
|
|
|
|
|
|
public function testThrowExceptionWhenTryingToTransformWithoutTrainSamples(): void
|
|
|
|
{
|
|
|
|
$this->expectException(InvalidOperationException::class);
|
|
|
|
|
|
|
|
$data = [
|
|
|
|
[1, 3, null],
|
|
|
|
[6, null, 8],
|
|
|
|
[null, 7, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
|
|
|
|
$imputer->transform($data);
|
|
|
|
}
|
2016-05-08 14:47:17 +02:00
|
|
|
}
|