From 65cdfe64b2c307b223fd90f160e49d90e9cc4321 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Sun, 8 May 2016 19:33:39 +0200 Subject: [PATCH] implement Median and MostFrequent strategy for imputer --- src/Phpml/Math/Statistic/Mean.php | 5 +- .../Imputer/Strategy/MedianStrategy.php | 21 ++++ .../Imputer/Strategy/MostFrequentStrategy.php | 21 ++++ tests/Phpml/Math/Statistic/MeanTest.php | 2 - tests/Phpml/Preprocessing/ImputerTest.php | 98 ++++++++++++++++++- 5 files changed, 140 insertions(+), 7 deletions(-) create mode 100644 src/Phpml/Preprocessing/Imputer/Strategy/MedianStrategy.php create mode 100644 src/Phpml/Preprocessing/Imputer/Strategy/MostFrequentStrategy.php diff --git a/src/Phpml/Math/Statistic/Mean.php b/src/Phpml/Math/Statistic/Mean.php index 9d87a97..3804848 100644 --- a/src/Phpml/Math/Statistic/Mean.php +++ b/src/Phpml/Math/Statistic/Mean.php @@ -29,8 +29,8 @@ class Mean * * @throws InvalidArgumentException */ - public static function median(array $numbers) { - + public static function median(array $numbers) + { self::checkArrayLength($numbers); $count = count($numbers); @@ -72,5 +72,4 @@ class Mean throw InvalidArgumentException::arrayCantBeEmpty(); } } - } diff --git a/src/Phpml/Preprocessing/Imputer/Strategy/MedianStrategy.php b/src/Phpml/Preprocessing/Imputer/Strategy/MedianStrategy.php new file mode 100644 index 0000000..3746760 --- /dev/null +++ b/src/Phpml/Preprocessing/Imputer/Strategy/MedianStrategy.php @@ -0,0 +1,21 @@ +assertEquals(6, Mean::mode($numbers)); } - } diff --git a/tests/Phpml/Preprocessing/ImputerTest.php b/tests/Phpml/Preprocessing/ImputerTest.php index 630f4b1..9da9765 100644 --- a/tests/Phpml/Preprocessing/ImputerTest.php +++ b/tests/Phpml/Preprocessing/ImputerTest.php @@ -6,10 +6,12 @@ namespace tests\Preprocessing; use Phpml\Preprocessing\Imputer; use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy; +use Phpml\Preprocessing\Imputer\Strategy\MedianStrategy; +use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy; class ImputerTest extends \PHPUnit_Framework_TestCase { - public function testCompletingMissingValuesWithMeanStrategyOnColumnAxis() + public function testComplementsMissingValuesWithMeanStrategyOnColumnAxis() { $data = [ [1, null, 3, 4], @@ -31,7 +33,7 @@ class ImputerTest extends \PHPUnit_Framework_TestCase $this->assertEquals($imputeData, $data, '', $delta = 0.01); } - public function testCompletingMissingValuesWithMeanStrategyOnRowAxis() + public function testComplementsMissingValuesWithMeanStrategyOnRowAxis() { $data = [ [1, null, 3, 4], @@ -52,4 +54,96 @@ class ImputerTest extends \PHPUnit_Framework_TestCase $this->assertEquals($imputeData, $data, '', $delta = 0.01); } + + public function testComplementsMissingValuesWithMediaStrategyOnColumnAxis() + { + $data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], + ]; + + $imputeData = [ + [1, 6, 3, 4], + [4, 3, 2, 1], + [4, 6, 7, 8], + [8, 7, 3, 5], + ]; + + $imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_COLUMN); + $imputer->preprocess($data); + + $this->assertEquals($imputeData, $data, '', $delta = 0.01); + } + + public function testComplementsMissingValuesWithMediaStrategyOnRowAxis() + { + $data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], + ]; + + $imputeData = [ + [1, 3, 3, 4], + [4, 3, 2, 1], + [7, 6, 7, 8], + [8, 7, 7, 5], + ]; + + $imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW); + $imputer->preprocess($data); + + $this->assertEquals($imputeData, $data, '', $delta = 0.01); + } + + public function testComplementsMissingValuesWithMostFrequentStrategyOnColumnAxis() + { + $data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], + [8, 3, 2, 5], + ]; + + $imputeData = [ + [1, 3, 3, 4], + [4, 3, 2, 1], + [8, 6, 7, 8], + [8, 7, 2, 5], + [8, 3, 2, 5], + ]; + + $imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_COLUMN); + $imputer->preprocess($data); + + $this->assertEquals($imputeData, $data); + } + + public function testComplementsMissingValuesWithMostFrequentStrategyOnRowAxis() + { + $data = [ + [1, null, 3, 4, 3], + [4, 3, 2, 1, 7], + [null, 6, 7, 8, 6], + [8, 7, null, 5, 5], + [8, 3, 2, 5, 4], + ]; + + $imputeData = [ + [1, 3, 3, 4, 3], + [4, 3, 2, 1, 7], + [6, 6, 7, 8, 6], + [8, 7, 5, 5, 5], + [8, 3, 2, 5, 4], + ]; + + $imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_ROW); + $imputer->preprocess($data); + + $this->assertEquals($imputeData, $data); + } }