From 1a856c90999c8f3049adea1b9fcd74256f601420 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Sat, 22 Jun 2019 22:54:47 +0200 Subject: [PATCH] Fix division by zero in ANOVA for small size dataset (#391) --- src/Math/Statistic/ANOVA.php | 4 +++ tests/FeatureSelection/SelectKBestTest.php | 42 ++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/Math/Statistic/ANOVA.php b/src/Math/Statistic/ANOVA.php index d233f84..f89309e 100644 --- a/src/Math/Statistic/ANOVA.php +++ b/src/Math/Statistic/ANOVA.php @@ -45,6 +45,10 @@ final class ANOVA return $s / $dfbn; }, $ssbn); $msw = array_map(function ($s) use ($dfwn) { + if ($dfwn === 0) { + return 1; + } + return $s / $dfwn; }, $sswn); diff --git a/tests/FeatureSelection/SelectKBestTest.php b/tests/FeatureSelection/SelectKBestTest.php index ebf119b..5239954 100644 --- a/tests/FeatureSelection/SelectKBestTest.php +++ b/tests/FeatureSelection/SelectKBestTest.php @@ -61,6 +61,48 @@ final class SelectKBestTest extends TestCase ); } + public function testSelectKBestIssue386(): void + { + $samples = [ + [ + 0.0006729998475705993, + 0.0, + 0.999999773507577, + 0.0, + 0.0, + 6.66666515671718E-7, + 3.33333257835859E-6, + 6.66666515671718E-6, + ], + [ + 0.0006729998475849566, + 0.0, + 0.9999997735289103, + 0.0, + 0.0, + 6.666665156859402E-7, + 3.3333325784297012E-6, + 1.3333330313718804E-6, + ], + ]; + + $targets = [15.5844, 4.45284]; + + $selector = new SelectKBest(2); + $selector->fit($samples, $targets); + + self::assertEquals([ + -2.117582368135751E-22, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0097419586828951E-28, + 0.0, + 1.4222215779620095E-11, + ], $selector->scores()); + } + public function testThrowExceptionOnEmptyTargets(): void { $this->expectException(InvalidArgumentException::class);