20]; /** * @var array */ protected $classifiers; /** * @var float */ protected $subsetRatio = 0.5; /** * @var array */ private $samples = []; /** * Creates an ensemble classifier with given number of base classifiers
* Default number of base classifiers is 100. * The more number of base classifiers, the better performance but at the cost of procesing time * * @param int $numClassifier */ public function __construct($numClassifier = 50) { $this->numClassifier = $numClassifier; } /** * This method determines the ratio of samples used to create the 'bootstrap' subset, * e.g., random samples drawn from the original dataset with replacement (allow repeats), * to train each base classifier. * * @param float $ratio * @return $this * @throws Exception */ public function setSubsetRatio(float $ratio) { if ($ratio < 0.1 || $ratio > 1.0) { throw new \Exception("Subset ratio should be between 0.1 and 1.0"); } $this->subsetRatio = $ratio; return $this; } /** * This method is used to set the base classifier. Default value is * DecisionTree::class, but any class that implements the Classifier * can be used.
* While giving the parameters of the classifier, the values should be * given in the order they are in the constructor of the classifier and parameter * names are neglected. * * @param string $classifier * @param array $classifierOptions * @return $this */ public function setClassifer(string $classifier, array $classifierOptions = []) { $this->classifier = $classifier; $this->classifierOptions = $classifierOptions; return $this; } /** * @param array $samples * @param array $targets */ public function train(array $samples, array $targets) { $this->samples = array_merge($this->samples, $samples); $this->targets = array_merge($this->targets, $targets); $this->featureCount = count($samples[0]); $this->numSamples = count($this->samples); // Init classifiers and train them with random sub-samples $this->classifiers = $this->initClassifiers(); $index = 0; foreach ($this->classifiers as $classifier) { list($samples, $targets) = $this->getRandomSubset($index); $classifier->train($samples, $targets); ++$index; } } /** * @param int $index * @return array */ protected function getRandomSubset($index) { $subsetLength = (int)ceil(sqrt($this->numSamples)); $denom = $this->subsetRatio / 2; $subsetLength = $this->numSamples / (1 / $denom); $index = $index * $subsetLength % $this->numSamples; $samples = []; $targets = []; for ($i=0; $i<$subsetLength * 2; $i++) { $rand = rand($index, $this->numSamples - 1); $samples[] = $this->samples[$rand]; $targets[] = $this->targets[$rand]; } return [$samples, $targets]; } /** * @return array */ protected function initClassifiers() { $classifiers = []; for ($i=0; $i<$this->numClassifier; $i++) { $ref = new \ReflectionClass($this->classifier); if ($this->classifierOptions) { $obj = $ref->newInstanceArgs($this->classifierOptions); } else { $obj = $ref->newInstance(); } $classifiers[] = $this->initSingleClassifier($obj, $i); } return $classifiers; } /** * @param Classifier $classifier * @param int $index * @return Classifier */ protected function initSingleClassifier($classifier, $index) { return $classifier; } /** * @param array $sample * @return mixed */ protected function predictSample(array $sample) { $predictions = []; foreach ($this->classifiers as $classifier) { /* @var $classifier Classifier */ $predictions[] = $classifier->predict($sample); } $counts = array_count_values($predictions); arsort($counts); reset($counts); return key($counts); } }