Fix logistic regression implementation (#169)

* Fix target value of LogisticRegression * Fix probability calculation in LogisticRegression * Change the default cost function to log-likelihood * Remove redundant round function * Fix for coding standard
2025-02-03 12:38:35 +00:00 · 2017-12-05 20:03:55 +09:00 · 2017-12-05 20:03:55 +09:00 · c4f58f7f6f
commit c4f58f7f6f
parent 946fbbc521
2 changed files with 118 additions and 9 deletions
--- a/src/Phpml/Classification/Linear/LogisticRegression.php
+++ b/src/Phpml/Classification/Linear/LogisticRegression.php
@ -32,7 +32,7 @@ class LogisticRegression extends Adaline
     *
     * @var string
     */
-    protected $costFunction = 'sse';
+    protected $costFunction = 'log';
    /**
     * Regularization term: only 'L2' is supported
@ -67,7 +67,7 @@ class LogisticRegression extends Adaline
        int $maxIterations = 500,
        bool $normalizeInputs = true,
        int $trainingType = self::CONJUGATE_GRAD_TRAINING,
-        string $cost = 'sse',
+        string $cost = 'log',
        string $penalty = 'L2'
    ) {
        $trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING);
@ -190,6 +190,8 @@ class LogisticRegression extends Adaline
                        $hX = 1e-10;
                    }
                    $y = $y < 0 ? 0 : 1;
                    $error = -$y * log($hX) - (1 - $y) * log(1 - $hX);
                    $gradient = $hX - $y;
@ -213,6 +215,8 @@ class LogisticRegression extends Adaline
                    $this->weights = $weights;
                    $hX = $this->output($sample);
                    $y = $y < 0 ? 0 : 1;
                    $error = ($y - $hX) ** 2;
                    $gradient = -($y - $hX) * $hX * (1 - $hX);
@ -243,7 +247,7 @@ class LogisticRegression extends Adaline
    {
        $output = $this->output($sample);
-        if (round($output) > 0.5) {
+        if ($output > 0.5) {
            return 1;
        }
@ -260,14 +264,13 @@ class LogisticRegression extends Adaline
     */
    protected function predictProbability(array $sample, $label): float
    {
        $predicted = $this->predictSampleBinary($sample);
        if ((string) $predicted == (string) $label) {
        $sample = $this->checkNormalizedSample($sample);
        $probability = $this->output($sample);
-            return (float) abs($this->output($sample) - 0.5);
+        if (array_search($label, $this->labels, true) > 0) {
            return $probability;
        }
-        return 0.0;
+        return 1 - $probability;
    }
 }
--- a/tests/Phpml/Classification/Linear/LogisticRegressionTest.php
+++ b/tests/Phpml/Classification/Linear/LogisticRegressionTest.php
@ -0,0 +1,106 @@
 <?php
 declare(strict_types=1);
 namespace tests\Phpml\Classification\Linear;
 use Phpml\Classification\Linear\LogisticRegression;
 use PHPUnit\Framework\TestCase;
 use ReflectionMethod;
 use ReflectionProperty;
 class LogisticRegressionTest extends TestCase
 {
    public function testPredictSingleSample(): void
    {
        // AND problem
        $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]];
        $targets = [0, 0, 0, 1, 0, 1];
        $classifier = new LogisticRegression();
        $classifier->train($samples, $targets);
        $this->assertEquals(0, $classifier->predict([0.1, 0.1]));
        $this->assertEquals(1, $classifier->predict([0.9, 0.9]));
    }
    public function testPredictMultiClassSample(): void
    {
        // By use of One-v-Rest, Perceptron can perform multi-class classification
        // The samples should be separable by lines perpendicular to the dimensions
        $samples = [
            [0, 0], [0, 1], [1, 0], [1, 1], // First group : a cluster at bottom-left corner in 2D
            [5, 5], [6, 5], [5, 6], [7, 5], // Second group: another cluster at the middle-right
            [3, 10], [3, 10], [3, 8], [3, 9],  // Third group : cluster at the top-middle
        ];
        $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2];
        $classifier = new LogisticRegression();
        $classifier->train($samples, $targets);
        $this->assertEquals(0, $classifier->predict([0.5, 0.5]));
        $this->assertEquals(1, $classifier->predict([6.0, 5.0]));
        $this->assertEquals(2, $classifier->predict([3.0, 9.5]));
    }
    public function testPredictProbabilitySingleSample(): void
    {
        $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]];
        $targets = [0, 0, 0, 1, 0, 1];
        $classifier = new LogisticRegression();
        $classifier->train($samples, $targets);
        $property = new ReflectionProperty($classifier, 'classifiers');
        $property->setAccessible(true);
        $predictor = $property->getValue($classifier)[0];
        $method = new ReflectionMethod($predictor, 'predictProbability');
        $method->setAccessible(true);
        $zero = $method->invoke($predictor, [0.1, 0.1], 0);
        $one = $method->invoke($predictor, [0.1, 0.1], 1);
        $this->assertEquals(1, $zero + $one, null, 1e-6);
        $this->assertTrue($zero > $one);
        $zero = $method->invoke($predictor, [0.9, 0.9], 0);
        $one = $method->invoke($predictor, [0.9, 0.9], 1);
        $this->assertEquals(1, $zero + $one, null, 1e-6);
        $this->assertTrue($zero < $one);
    }
    public function testPredictProbabilityMultiClassSample(): void
    {
        $samples = [
            [0, 0], [0, 1], [1, 0], [1, 1],
            [5, 5], [6, 5], [5, 6], [6, 6],
            [3, 10], [3, 10], [3, 8], [3, 9],
        ];
        $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2];
        $classifier = new LogisticRegression();
        $classifier->train($samples, $targets);
        $property = new ReflectionProperty($classifier, 'classifiers');
        $property->setAccessible(true);
        $predictor = $property->getValue($classifier)[0];
        $method = new ReflectionMethod($predictor, 'predictProbability');
        $method->setAccessible(true);
        $zero = $method->invoke($predictor, [3.0, 9.5], 0);
        $not_zero = $method->invoke($predictor, [3.0, 9.5], 'not_0');
        $predictor = $property->getValue($classifier)[1];
        $method = new ReflectionMethod($predictor, 'predictProbability');
        $method->setAccessible(true);
        $one = $method->invoke($predictor, [3.0, 9.5], 1);
        $not_one = $method->invoke($predictor, [3.0, 9.5], 'not_1');
        $predictor = $property->getValue($classifier)[2];
        $method = new ReflectionMethod($predictor, 'predictProbability');
        $method->setAccessible(true);
        $two = $method->invoke($predictor, [3.0, 9.5], 2);
        $not_two = $method->invoke($predictor, [3.0, 9.5], 'not_2');
        $this->assertEquals(1, $zero + $not_zero, null, 1e-6);
        $this->assertEquals(1, $one + $not_one, null, 1e-6);
        $this->assertEquals(1, $two + $not_two, null, 1e-6);
        $this->assertTrue($zero < $two);
        $this->assertTrue($one < $two);
    }
 }