2017-02-07 11:37:56 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
namespace Phpml\Classification\Ensemble;
|
|
|
|
|
|
|
|
use Phpml\Classification\Classifier;
|
|
|
|
use Phpml\Classification\DecisionTree;
|
2017-11-06 07:56:37 +00:00
|
|
|
use Phpml\Helper\Predictable;
|
|
|
|
use Phpml\Helper\Trainable;
|
2017-02-07 11:37:56 +00:00
|
|
|
|
|
|
|
class Bagging implements Classifier
|
|
|
|
{
|
|
|
|
use Trainable, Predictable;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
protected $numSamples;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
private $targets = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
protected $featureCount = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
protected $numClassifier;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var Classifier
|
|
|
|
*/
|
|
|
|
protected $classifier = DecisionTree::class;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $classifierOptions = ['depth' => 20];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $classifiers;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var float
|
|
|
|
*/
|
2017-02-13 20:23:18 +00:00
|
|
|
protected $subsetRatio = 0.7;
|
2017-02-07 11:37:56 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
private $samples = [];
|
|
|
|
|
|
|
|
/**
|
2017-05-17 07:03:25 +00:00
|
|
|
* Creates an ensemble classifier with given number of base classifiers
|
|
|
|
* Default number of base classifiers is 50.
|
2017-02-07 11:37:56 +00:00
|
|
|
* The more number of base classifiers, the better performance but at the cost of procesing time
|
|
|
|
*/
|
2017-05-17 07:03:25 +00:00
|
|
|
public function __construct(int $numClassifier = 50)
|
2017-02-07 11:37:56 +00:00
|
|
|
{
|
|
|
|
$this->numClassifier = $numClassifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This method determines the ratio of samples used to create the 'bootstrap' subset,
|
|
|
|
* e.g., random samples drawn from the original dataset with replacement (allow repeats),
|
|
|
|
* to train each base classifier.
|
|
|
|
*
|
|
|
|
* @return $this
|
2017-05-17 07:03:25 +00:00
|
|
|
*
|
|
|
|
* @throws \Exception
|
2017-02-07 11:37:56 +00:00
|
|
|
*/
|
|
|
|
public function setSubsetRatio(float $ratio)
|
|
|
|
{
|
|
|
|
if ($ratio < 0.1 || $ratio > 1.0) {
|
2017-08-17 06:50:37 +00:00
|
|
|
throw new \Exception('Subset ratio should be between 0.1 and 1.0');
|
2017-02-07 11:37:56 +00:00
|
|
|
}
|
2017-05-17 07:03:25 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
$this->subsetRatio = $ratio;
|
2017-08-17 06:50:37 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This method is used to set the base classifier. Default value is
|
|
|
|
* DecisionTree::class, but any class that implements the <i>Classifier</i>
|
|
|
|
* can be used. <br>
|
|
|
|
* While giving the parameters of the classifier, the values should be
|
|
|
|
* given in the order they are in the constructor of the classifier and parameter
|
|
|
|
* names are neglected.
|
|
|
|
*
|
|
|
|
* @return $this
|
|
|
|
*/
|
|
|
|
public function setClassifer(string $classifier, array $classifierOptions = [])
|
|
|
|
{
|
|
|
|
$this->classifier = $classifier;
|
|
|
|
$this->classifierOptions = $classifierOptions;
|
2017-05-17 07:03:25 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function train(array $samples, array $targets): void
|
2017-02-07 11:37:56 +00:00
|
|
|
{
|
|
|
|
$this->samples = array_merge($this->samples, $samples);
|
|
|
|
$this->targets = array_merge($this->targets, $targets);
|
|
|
|
$this->featureCount = count($samples[0]);
|
|
|
|
$this->numSamples = count($this->samples);
|
|
|
|
|
2017-02-13 20:23:18 +00:00
|
|
|
// Init classifiers and train them with bootstrap samples
|
2017-02-07 11:37:56 +00:00
|
|
|
$this->classifiers = $this->initClassifiers();
|
|
|
|
$index = 0;
|
|
|
|
foreach ($this->classifiers as $classifier) {
|
2017-11-14 20:21:23 +00:00
|
|
|
[$samples, $targets] = $this->getRandomSubset($index);
|
2017-02-07 11:37:56 +00:00
|
|
|
$classifier->train($samples, $targets);
|
|
|
|
++$index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
protected function getRandomSubset(int $index) : array
|
2017-02-07 11:37:56 +00:00
|
|
|
{
|
|
|
|
$samples = [];
|
|
|
|
$targets = [];
|
2017-02-13 20:23:18 +00:00
|
|
|
srand($index);
|
|
|
|
$bootstrapSize = $this->subsetRatio * $this->numSamples;
|
2017-05-17 07:03:25 +00:00
|
|
|
for ($i = 0; $i < $bootstrapSize; ++$i) {
|
2017-02-13 20:23:18 +00:00
|
|
|
$rand = rand(0, $this->numSamples - 1);
|
2017-02-07 11:37:56 +00:00
|
|
|
$samples[] = $this->samples[$rand];
|
|
|
|
$targets[] = $this->targets[$rand];
|
|
|
|
}
|
2017-05-17 07:03:25 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
return [$samples, $targets];
|
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
protected function initClassifiers() : array
|
2017-02-07 11:37:56 +00:00
|
|
|
{
|
|
|
|
$classifiers = [];
|
2017-05-17 07:03:25 +00:00
|
|
|
for ($i = 0; $i < $this->numClassifier; ++$i) {
|
2017-02-07 11:37:56 +00:00
|
|
|
$ref = new \ReflectionClass($this->classifier);
|
|
|
|
if ($this->classifierOptions) {
|
|
|
|
$obj = $ref->newInstanceArgs($this->classifierOptions);
|
|
|
|
} else {
|
|
|
|
$obj = $ref->newInstance();
|
|
|
|
}
|
2017-05-17 07:03:25 +00:00
|
|
|
|
|
|
|
$classifiers[] = $this->initSingleClassifier($obj);
|
2017-02-07 11:37:56 +00:00
|
|
|
}
|
2017-08-17 06:50:37 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
return $classifiers;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param Classifier $classifier
|
2017-05-17 07:03:25 +00:00
|
|
|
*
|
2017-02-07 11:37:56 +00:00
|
|
|
* @return Classifier
|
|
|
|
*/
|
2017-05-17 07:03:25 +00:00
|
|
|
protected function initSingleClassifier($classifier)
|
2017-02-07 11:37:56 +00:00
|
|
|
{
|
|
|
|
return $classifier;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return mixed
|
|
|
|
*/
|
|
|
|
protected function predictSample(array $sample)
|
|
|
|
{
|
|
|
|
$predictions = [];
|
|
|
|
foreach ($this->classifiers as $classifier) {
|
|
|
|
/* @var $classifier Classifier */
|
|
|
|
$predictions[] = $classifier->predict($sample);
|
|
|
|
}
|
|
|
|
|
|
|
|
$counts = array_count_values($predictions);
|
|
|
|
arsort($counts);
|
|
|
|
reset($counts);
|
2017-08-17 06:50:37 +00:00
|
|
|
|
2017-02-07 11:37:56 +00:00
|
|
|
return key($counts);
|
|
|
|
}
|
|
|
|
}
|