2017-02-07 11:37:56 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests\Classification\Ensemble;
|
2017-02-07 11:37:56 +00:00
|
|
|
|
|
|
|
use Phpml\Classification\DecisionTree;
|
2017-11-06 07:56:37 +00:00
|
|
|
use Phpml\Classification\Ensemble\Bagging;
|
2017-02-07 11:37:56 +00:00
|
|
|
use Phpml\Classification\NaiveBayes;
|
|
|
|
use Phpml\ModelManager;
|
|
|
|
use PHPUnit\Framework\TestCase;
|
|
|
|
|
|
|
|
class BaggingTest extends TestCase
|
|
|
|
{
|
|
|
|
private $data = [
|
2017-08-17 06:50:37 +00:00
|
|
|
['sunny', 85, 85, 'false', 'Dont_play'],
|
|
|
|
['sunny', 80, 90, 'true', 'Dont_play'],
|
|
|
|
['overcast', 83, 78, 'false', 'Play'],
|
|
|
|
['rain', 70, 96, 'false', 'Play'],
|
|
|
|
['rain', 68, 80, 'false', 'Play'],
|
|
|
|
['rain', 65, 70, 'true', 'Dont_play'],
|
|
|
|
['overcast', 64, 65, 'true', 'Play'],
|
|
|
|
['sunny', 72, 95, 'false', 'Dont_play'],
|
|
|
|
['sunny', 69, 70, 'false', 'Play'],
|
|
|
|
['rain', 75, 80, 'false', 'Play'],
|
|
|
|
['sunny', 75, 70, 'true', 'Play'],
|
|
|
|
['overcast', 72, 90, 'true', 'Play'],
|
|
|
|
['overcast', 81, 75, 'false', 'Play'],
|
2017-11-22 21:16:10 +00:00
|
|
|
['rain', 71, 80, 'true', 'Dont_play'],
|
2017-02-07 11:37:56 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
private $extraData = [
|
2017-08-17 06:50:37 +00:00
|
|
|
['scorching', 90, 95, 'false', 'Dont_play'],
|
|
|
|
['scorching', 0, 0, 'false', 'Dont_play'],
|
2017-02-07 11:37:56 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
public function testPredictSingleSample()
|
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
[$data, $targets] = $this->getData($this->data);
|
2017-02-07 11:37:56 +00:00
|
|
|
$classifier = $this->getClassifier();
|
|
|
|
// Testing with default options
|
|
|
|
$classifier->train($data, $targets);
|
|
|
|
$this->assertEquals('Dont_play', $classifier->predict(['sunny', 78, 72, 'false']));
|
|
|
|
$this->assertEquals('Play', $classifier->predict(['overcast', 60, 60, 'false']));
|
|
|
|
$this->assertEquals('Dont_play', $classifier->predict(['rain', 60, 60, 'true']));
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
[$data, $targets] = $this->getData($this->extraData);
|
2017-02-07 11:37:56 +00:00
|
|
|
$classifier->train($data, $targets);
|
|
|
|
$this->assertEquals('Dont_play', $classifier->predict(['scorching', 95, 90, 'true']));
|
|
|
|
$this->assertEquals('Play', $classifier->predict(['overcast', 60, 60, 'false']));
|
|
|
|
|
|
|
|
return $classifier;
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testSaveAndRestore(): void
|
2017-02-07 11:37:56 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
[$data, $targets] = $this->getData($this->data);
|
2017-02-07 11:37:56 +00:00
|
|
|
$classifier = $this->getClassifier(5);
|
|
|
|
$classifier->train($data, $targets);
|
|
|
|
|
|
|
|
$testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']];
|
|
|
|
$predicted = $classifier->predict($testSamples);
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
$filename = 'bagging-test-'.random_int(100, 999).'-'.uniqid();
|
2017-02-07 11:37:56 +00:00
|
|
|
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
|
|
|
$modelManager = new ModelManager();
|
|
|
|
$modelManager->saveToFile($classifier, $filepath);
|
|
|
|
|
|
|
|
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
|
|
|
$this->assertEquals($classifier, $restoredClassifier);
|
|
|
|
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testBaseClassifiers(): void
|
2017-02-07 11:37:56 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
[$data, $targets] = $this->getData($this->data);
|
2017-02-07 11:37:56 +00:00
|
|
|
$baseClassifiers = $this->getAvailableBaseClassifiers();
|
|
|
|
|
|
|
|
foreach ($baseClassifiers as $base => $params) {
|
|
|
|
$classifier = $this->getClassifier();
|
|
|
|
$classifier->setClassifer($base, $params);
|
|
|
|
$classifier->train($data, $targets);
|
|
|
|
|
|
|
|
$baseClassifier = new $base(...array_values($params));
|
|
|
|
$baseClassifier->train($data, $targets);
|
|
|
|
$testData = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false'], ['rain', 60, 60, 'true']];
|
|
|
|
foreach ($testData as $test) {
|
|
|
|
$result = $classifier->predict($test);
|
|
|
|
$baseResult = $classifier->predict($test);
|
|
|
|
$this->assertEquals($result, $baseResult);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function getClassifier($numBaseClassifiers = 50)
|
|
|
|
{
|
|
|
|
$classifier = new Bagging($numBaseClassifiers);
|
|
|
|
$classifier->setSubsetRatio(1.0);
|
|
|
|
$classifier->setClassifer(DecisionTree::class, ['depth' => 10]);
|
2017-08-17 06:50:37 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
return $classifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function getAvailableBaseClassifiers()
|
|
|
|
{
|
|
|
|
return [
|
|
|
|
DecisionTree::class => ['depth' => 5],
|
2017-11-22 21:16:10 +00:00
|
|
|
NaiveBayes::class => [],
|
2017-02-07 11:37:56 +00:00
|
|
|
];
|
|
|
|
}
|
|
|
|
|
|
|
|
private function getData($input)
|
|
|
|
{
|
|
|
|
// Populating input data to a size large enough
|
|
|
|
// for base classifiers that they can work with a subset of it
|
|
|
|
$populated = [];
|
2017-08-17 06:50:37 +00:00
|
|
|
for ($i = 0; $i < 20; ++$i) {
|
2017-02-07 11:37:56 +00:00
|
|
|
$populated = array_merge($populated, $input);
|
|
|
|
}
|
2017-11-22 21:16:10 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
shuffle($populated);
|
|
|
|
$targets = array_column($populated, 4);
|
2017-11-14 20:21:23 +00:00
|
|
|
array_walk($populated, function (&$v): void {
|
2017-02-07 11:37:56 +00:00
|
|
|
array_splice($v, 4, 1);
|
|
|
|
});
|
2017-08-17 06:50:37 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
return [$populated, $targets];
|
|
|
|
}
|
|
|
|
}
|