mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-01-09 00:20:53 +00:00
Fix string representation of integer labels issue in NaiveBayes (#206)
* Update NaiveBayes.php This fixes an issue using string labels that are string representations of integers, e.g. "1998" getting cast to (int)1998. * Update NaiveBayes.php fixes superfluous whitespace error * added tests for naive bayes with numeric labels * added array_unique * nested array_flips for speed * nested the array flips inside the array map * to appear style CI test
This commit is contained in:
parent
10070d97fd
commit
e318921076
@ -66,8 +66,7 @@ class NaiveBayes implements Classifier
|
||||
$this->sampleCount = count($this->samples);
|
||||
$this->featureCount = count($this->samples[0]);
|
||||
|
||||
$labelCounts = array_count_values($this->targets);
|
||||
$this->labels = array_keys($labelCounts);
|
||||
$this->labels = array_map('strval', array_flip(array_flip($this->targets)));
|
||||
foreach ($this->labels as $label) {
|
||||
$samples = $this->getSamplesByLabel($label);
|
||||
$this->p[$label] = count($samples) / $this->sampleCount;
|
||||
|
@ -68,4 +68,63 @@ class NaiveBayesTest extends TestCase
|
||||
$this->assertEquals($classifier, $restoredClassifier);
|
||||
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||
}
|
||||
|
||||
public function testPredictSimpleNumericLabels(): void
|
||||
{
|
||||
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||
$labels = ['1996', '1997', '1998'];
|
||||
|
||||
$classifier = new NaiveBayes();
|
||||
$classifier->train($samples, $labels);
|
||||
|
||||
$this->assertEquals('1996', $classifier->predict([3, 1, 1]));
|
||||
$this->assertEquals('1997', $classifier->predict([1, 4, 1]));
|
||||
$this->assertEquals('1998', $classifier->predict([1, 1, 6]));
|
||||
}
|
||||
|
||||
public function testPredictArrayOfSamplesNumericalLabels(): void
|
||||
{
|
||||
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||
$trainLabels = ['1996', '1997', '1998'];
|
||||
|
||||
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]];
|
||||
$testLabels = ['1996', '1996', '1998', '1998', '1997', '1997', '1996', '1996'];
|
||||
|
||||
$classifier = new NaiveBayes();
|
||||
$classifier->train($trainSamples, $trainLabels);
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$this->assertEquals($testLabels, $predicted);
|
||||
|
||||
// Feed an extra set of training data.
|
||||
$samples = [[1, 1, 6]];
|
||||
$labels = ['1999'];
|
||||
$classifier->train($samples, $labels);
|
||||
|
||||
$testSamples = [[1, 1, 6], [5, 1, 1]];
|
||||
$testLabels = ['1999', '1996'];
|
||||
$this->assertEquals($testLabels, $classifier->predict($testSamples));
|
||||
}
|
||||
|
||||
public function testSaveAndRestoreNumericLabels(): void
|
||||
{
|
||||
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||
$trainLabels = ['1996', '1997', '1998'];
|
||||
|
||||
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8]];
|
||||
$testLabels = ['1996', '1996', '1998'];
|
||||
|
||||
$classifier = new NaiveBayes();
|
||||
$classifier->train($trainSamples, $trainLabels);
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'naive-bayes-test-'.random_int(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
||||
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($classifier, $restoredClassifier);
|
||||
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user