create imputer tool for completing missing values

This commit is contained in:
Arkadiusz Kondas 2016-05-08 14:47:17 +02:00
parent 365a9baeca
commit b0ab236ab9
5 changed files with 185 additions and 0 deletions

View File

@ -0,0 +1,86 @@
<?php
declare (strict_types = 1);
namespace Phpml\Preprocessing;
use Phpml\Preprocessing\Imputer\Strategy;
class Imputer implements Preprocessor
{
const AXIS_COLUMN = 0;
const AXIS_ROW = 1;
/**
* @var mixed
*/
private $missingValue;
/**
* @var Strategy
*/
private $strategy;
/**
* @var int
*/
private $axis;
/**
* @param mixed $missingValue
* @param Strategy $strategy
* @param int $axis
*/
public function __construct($missingValue = null, Strategy $strategy, int $axis = self::AXIS_COLUMN)
{
$this->missingValue = $missingValue;
$this->strategy = $strategy;
$this->axis = $axis;
}
/**
* @param array $samples
*/
public function preprocess(array &$samples)
{
foreach ($samples as &$sample) {
$this->preprocessSample($sample, $samples);
}
}
/**
* @param array $sample
* @param array $samples
*/
private function preprocessSample(array &$sample, array $samples)
{
foreach ($sample as $column => &$value) {
if ($value === $this->missingValue) {
$value = $this->strategy->replaceValue($this->getAxis($column, $sample, $samples));
}
}
}
/**
* @param int $column
* @param array $currentSample
* @param array $samples
*
* @return array
*/
private function getAxis(int $column, array $currentSample, array $samples): array
{
if (self::AXIS_ROW === $this->axis) {
return array_diff($currentSample, [$this->missingValue]);
}
$axis = [];
foreach ($samples as $sample) {
if ($sample[$column] !== $this->missingValue) {
$axis[] = $sample[$column];
}
}
return $axis;
}
}

View File

@ -0,0 +1,15 @@
<?php
declare (strict_types = 1);
namespace Phpml\Preprocessing\Imputer;
interface Strategy
{
/**
* @param array $currentAxis
*
* @return mixed
*/
public function replaceValue(array $currentAxis);
}

View File

@ -0,0 +1,16 @@
<?php
declare (strict_types = 1);
namespace Phpml\Preprocessing\Imputer\Strategy;
use Phpml\Preprocessing\Imputer\Strategy;
use Phpml\Math\Statistic\Mean;
class MeanStrategy implements Strategy
{
public function replaceValue(array $currentAxis)
{
return Mean::arithmetic($currentAxis);
}
}

View File

@ -0,0 +1,13 @@
<?php
declare (strict_types = 1);
namespace Phpml\Preprocessing;
interface Preprocessor
{
/**
* @param array $samples
*/
public function preprocess(array &$samples);
}

View File

@ -0,0 +1,55 @@
<?php
declare (strict_types = 1);
namespace tests\Preprocessing;
use Phpml\Preprocessing\Imputer;
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
class ImputerTest extends \PHPUnit_Framework_TestCase
{
public function testCompletingMissingValuesWithMeanStrategyOnColumnAxis()
{
$data = [
[1, null, 3, 4],
[4, 3, 2, 1],
[null, 6, 7, 8],
[8, 7, null, 5],
];
$imputeData = [
[1, 5.33, 3, 4],
[4, 3, 2, 1],
[4.33, 6, 7, 8],
[8, 7, 4, 5],
];
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
$imputer->preprocess($data);
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
}
public function testCompletingMissingValuesWithMeanStrategyOnRowAxis()
{
$data = [
[1, null, 3, 4],
[4, 3, 2, 1],
[null, 6, 7, 8],
[8, 7, null, 5],
];
$imputeData = [
[1, 2.66, 3, 4],
[4, 3, 2, 1],
[7, 6, 7, 8],
[8, 7, 6.66, 5],
];
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_ROW);
$imputer->preprocess($data);
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
}
}