2016-05-08 12:47:17 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-05-08 12:47:17 +00:00
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests\Preprocessing;
|
2016-05-08 12:47:17 +00:00
|
|
|
|
|
|
|
use Phpml\Preprocessing\Imputer;
|
|
|
|
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
|
2016-05-08 17:33:39 +00:00
|
|
|
use Phpml\Preprocessing\Imputer\Strategy\MedianStrategy;
|
|
|
|
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
2017-02-03 11:58:25 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-05-08 12:47:17 +00:00
|
|
|
|
2017-02-03 11:58:25 +00:00
|
|
|
class ImputerTest extends TestCase
|
2016-05-08 12:47:17 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testComplementsMissingValuesWithMeanStrategyOnColumnAxis(): void
|
2016-05-08 12:47:17 +00:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 5.33, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[4.33, 6, 7, 8],
|
|
|
|
[8, 7, 4, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-16 22:16:49 +00:00
|
|
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $data);
|
2016-06-16 08:03:57 +00:00
|
|
|
$imputer->transform($data);
|
2016-05-08 12:47:17 +00:00
|
|
|
|
|
|
|
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testComplementsMissingValuesWithMeanStrategyOnRowAxis(): void
|
2016-05-08 12:47:17 +00:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 2.66, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[7, 6, 7, 8],
|
|
|
|
[8, 7, 6.66, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-16 22:16:49 +00:00
|
|
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_ROW, $data);
|
2016-06-16 08:03:57 +00:00
|
|
|
$imputer->transform($data);
|
2016-05-08 12:47:17 +00:00
|
|
|
|
|
|
|
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
|
|
|
}
|
2016-05-08 17:33:39 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testComplementsMissingValuesWithMediaStrategyOnColumnAxis(): void
|
2016-05-08 17:33:39 +00:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 6, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[4, 6, 7, 8],
|
|
|
|
[8, 7, 3, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-16 22:16:49 +00:00
|
|
|
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_COLUMN, $data);
|
2016-06-16 08:03:57 +00:00
|
|
|
$imputer->transform($data);
|
2016-05-08 17:33:39 +00:00
|
|
|
|
|
|
|
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testComplementsMissingValuesWithMediaStrategyOnRowAxis(): void
|
2016-05-08 17:33:39 +00:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[7, 6, 7, 8],
|
|
|
|
[8, 7, 7, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-16 22:16:49 +00:00
|
|
|
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW, $data);
|
2016-06-16 08:03:57 +00:00
|
|
|
$imputer->transform($data);
|
2016-05-08 17:33:39 +00:00
|
|
|
|
|
|
|
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testComplementsMissingValuesWithMostFrequentStrategyOnColumnAxis(): void
|
2016-05-08 17:33:39 +00:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[null, 6, 7, 8],
|
|
|
|
[8, 7, null, 5],
|
|
|
|
[8, 3, 2, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 3, 4],
|
|
|
|
[4, 3, 2, 1],
|
|
|
|
[8, 6, 7, 8],
|
|
|
|
[8, 7, 2, 5],
|
|
|
|
[8, 3, 2, 5],
|
|
|
|
];
|
|
|
|
|
2016-06-16 22:16:49 +00:00
|
|
|
$imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_COLUMN, $data);
|
2016-06-16 08:03:57 +00:00
|
|
|
$imputer->transform($data);
|
2016-05-08 17:33:39 +00:00
|
|
|
|
|
|
|
$this->assertEquals($imputeData, $data);
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testComplementsMissingValuesWithMostFrequentStrategyOnRowAxis(): void
|
2016-05-08 17:33:39 +00:00
|
|
|
{
|
|
|
|
$data = [
|
|
|
|
[1, null, 3, 4, 3],
|
|
|
|
[4, 3, 2, 1, 7],
|
|
|
|
[null, 6, 7, 8, 6],
|
|
|
|
[8, 7, null, 5, 5],
|
|
|
|
[8, 3, 2, 5, 4],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 3, 4, 3],
|
|
|
|
[4, 3, 2, 1, 7],
|
|
|
|
[6, 6, 7, 8, 6],
|
|
|
|
[8, 7, 5, 5, 5],
|
|
|
|
[8, 3, 2, 5, 4],
|
|
|
|
];
|
|
|
|
|
2016-06-16 22:16:49 +00:00
|
|
|
$imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_ROW, $data);
|
2016-06-16 08:03:57 +00:00
|
|
|
$imputer->transform($data);
|
2016-05-08 17:33:39 +00:00
|
|
|
|
|
|
|
$this->assertEquals($imputeData, $data);
|
|
|
|
}
|
2016-06-16 22:23:27 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testImputerWorksOnFitSamples(): void
|
2016-06-16 22:23:27 +00:00
|
|
|
{
|
|
|
|
$trainData = [
|
|
|
|
[1, 3, 4],
|
|
|
|
[6, 7, 8],
|
|
|
|
[8, 7, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$data = [
|
|
|
|
[1, 3, null],
|
|
|
|
[6, null, 8],
|
|
|
|
[null, 7, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputeData = [
|
|
|
|
[1, 3, 5.66],
|
|
|
|
[6, 5.66, 8],
|
|
|
|
[5, 7, 5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $trainData);
|
|
|
|
$imputer->transform($data);
|
|
|
|
|
|
|
|
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
|
|
|
}
|
2016-05-08 12:47:17 +00:00
|
|
|
}
|