php-ml/tests/Phpml/Classification/NaiveBayesTest.php
Jonathan Baldie e318921076 Fix string representation of integer labels issue in NaiveBayes (#206)
* Update NaiveBayes.php

This fixes an issue using string labels that are string representations of integers, e.g. "1998" getting cast to (int)1998.

* Update NaiveBayes.php

fixes superfluous whitespace error

* added tests for naive bayes with numeric labels

* added array_unique

* nested array_flips for speed

* nested the array flips inside the array map

* to appear style CI test
2018-01-31 21:44:44 +01:00

131 lines
4.6 KiB
PHP

<?php
declare(strict_types=1);
namespace Phpml\Tests\Classification;
use Phpml\Classification\NaiveBayes;
use Phpml\ModelManager;
use PHPUnit\Framework\TestCase;
class NaiveBayesTest extends TestCase
{
public function testPredictSingleSample(): void
{
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
$labels = ['a', 'b', 'c'];
$classifier = new NaiveBayes();
$classifier->train($samples, $labels);
$this->assertEquals('a', $classifier->predict([3, 1, 1]));
$this->assertEquals('b', $classifier->predict([1, 4, 1]));
$this->assertEquals('c', $classifier->predict([1, 1, 6]));
}
public function testPredictArrayOfSamples(): void
{
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
$trainLabels = ['a', 'b', 'c'];
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]];
$testLabels = ['a', 'a', 'c', 'c', 'b', 'b', 'a', 'a'];
$classifier = new NaiveBayes();
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$this->assertEquals($testLabels, $predicted);
// Feed an extra set of training data.
$samples = [[1, 1, 6]];
$labels = ['d'];
$classifier->train($samples, $labels);
$testSamples = [[1, 1, 6], [5, 1, 1]];
$testLabels = ['d', 'a'];
$this->assertEquals($testLabels, $classifier->predict($testSamples));
}
public function testSaveAndRestore(): void
{
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
$trainLabels = ['a', 'b', 'c'];
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8]];
$testLabels = ['a', 'a', 'c'];
$classifier = new NaiveBayes();
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$filename = 'naive-bayes-test-'.random_int(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$this->assertEquals($classifier, $restoredClassifier);
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
}
public function testPredictSimpleNumericLabels(): void
{
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
$labels = ['1996', '1997', '1998'];
$classifier = new NaiveBayes();
$classifier->train($samples, $labels);
$this->assertEquals('1996', $classifier->predict([3, 1, 1]));
$this->assertEquals('1997', $classifier->predict([1, 4, 1]));
$this->assertEquals('1998', $classifier->predict([1, 1, 6]));
}
public function testPredictArrayOfSamplesNumericalLabels(): void
{
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
$trainLabels = ['1996', '1997', '1998'];
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]];
$testLabels = ['1996', '1996', '1998', '1998', '1997', '1997', '1996', '1996'];
$classifier = new NaiveBayes();
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$this->assertEquals($testLabels, $predicted);
// Feed an extra set of training data.
$samples = [[1, 1, 6]];
$labels = ['1999'];
$classifier->train($samples, $labels);
$testSamples = [[1, 1, 6], [5, 1, 1]];
$testLabels = ['1999', '1996'];
$this->assertEquals($testLabels, $classifier->predict($testSamples));
}
public function testSaveAndRestoreNumericLabels(): void
{
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
$trainLabels = ['1996', '1997', '1998'];
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8]];
$testLabels = ['1996', '1996', '1998'];
$classifier = new NaiveBayes();
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$filename = 'naive-bayes-test-'.random_int(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$this->assertEquals($classifier, $restoredClassifier);
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
}
}