mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-01-09 16:36:34 +00:00
create imputer tool for completing missing values
This commit is contained in:
parent
365a9baeca
commit
b0ab236ab9
86
src/Phpml/Preprocessing/Imputer.php
Normal file
86
src/Phpml/Preprocessing/Imputer.php
Normal file
@ -0,0 +1,86 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
use Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
class Imputer implements Preprocessor
|
||||
{
|
||||
const AXIS_COLUMN = 0;
|
||||
const AXIS_ROW = 1;
|
||||
|
||||
/**
|
||||
* @var mixed
|
||||
*/
|
||||
private $missingValue;
|
||||
|
||||
/**
|
||||
* @var Strategy
|
||||
*/
|
||||
private $strategy;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $axis;
|
||||
|
||||
/**
|
||||
* @param mixed $missingValue
|
||||
* @param Strategy $strategy
|
||||
* @param int $axis
|
||||
*/
|
||||
public function __construct($missingValue = null, Strategy $strategy, int $axis = self::AXIS_COLUMN)
|
||||
{
|
||||
$this->missingValue = $missingValue;
|
||||
$this->strategy = $strategy;
|
||||
$this->axis = $axis;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*/
|
||||
public function preprocess(array &$samples)
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
$this->preprocessSample($sample, $samples);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
* @param array $samples
|
||||
*/
|
||||
private function preprocessSample(array &$sample, array $samples)
|
||||
{
|
||||
foreach ($sample as $column => &$value) {
|
||||
if ($value === $this->missingValue) {
|
||||
$value = $this->strategy->replaceValue($this->getAxis($column, $sample, $samples));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $column
|
||||
* @param array $currentSample
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function getAxis(int $column, array $currentSample, array $samples): array
|
||||
{
|
||||
if (self::AXIS_ROW === $this->axis) {
|
||||
return array_diff($currentSample, [$this->missingValue]);
|
||||
}
|
||||
|
||||
$axis = [];
|
||||
foreach ($samples as $sample) {
|
||||
if ($sample[$column] !== $this->missingValue) {
|
||||
$axis[] = $sample[$column];
|
||||
}
|
||||
}
|
||||
|
||||
return $axis;
|
||||
}
|
||||
}
|
15
src/Phpml/Preprocessing/Imputer/Strategy.php
Normal file
15
src/Phpml/Preprocessing/Imputer/Strategy.php
Normal file
@ -0,0 +1,15 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing\Imputer;
|
||||
|
||||
interface Strategy
|
||||
{
|
||||
/**
|
||||
* @param array $currentAxis
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function replaceValue(array $currentAxis);
|
||||
}
|
16
src/Phpml/Preprocessing/Imputer/Strategy/MeanStrategy.php
Normal file
16
src/Phpml/Preprocessing/Imputer/Strategy/MeanStrategy.php
Normal file
@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
use Phpml\Preprocessing\Imputer\Strategy;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
class MeanStrategy implements Strategy
|
||||
{
|
||||
public function replaceValue(array $currentAxis)
|
||||
{
|
||||
return Mean::arithmetic($currentAxis);
|
||||
}
|
||||
}
|
13
src/Phpml/Preprocessing/Preprocessor.php
Normal file
13
src/Phpml/Preprocessing/Preprocessor.php
Normal file
@ -0,0 +1,13 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
interface Preprocessor
|
||||
{
|
||||
/**
|
||||
* @param array $samples
|
||||
*/
|
||||
public function preprocess(array &$samples);
|
||||
}
|
55
tests/Phpml/Preprocessing/ImputerTest.php
Normal file
55
tests/Phpml/Preprocessing/ImputerTest.php
Normal file
@ -0,0 +1,55 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Preprocessing;
|
||||
|
||||
use Phpml\Preprocessing\Imputer;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
|
||||
|
||||
class ImputerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testCompletingMissingValuesWithMeanStrategyOnColumnAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 5.33, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[4.33, 6, 7, 8],
|
||||
[8, 7, 4, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
||||
}
|
||||
|
||||
public function testCompletingMissingValuesWithMeanStrategyOnRowAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 2.66, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[7, 6, 7, 8],
|
||||
[8, 7, 6.66, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_ROW);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user