2016-02-09 06:45:07 +00:00
|
|
|
<?php
|
2016-04-04 20:49:54 +00:00
|
|
|
|
|
|
|
declare (strict_types = 1);
|
2016-02-09 06:45:07 +00:00
|
|
|
|
|
|
|
namespace Phpml\Classifier;
|
|
|
|
|
2016-04-04 20:25:27 +00:00
|
|
|
class NaiveBayes implements Classifier
|
2016-02-09 06:45:07 +00:00
|
|
|
{
|
2016-04-14 20:56:54 +00:00
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
private $samples;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
private $labels;
|
|
|
|
|
2016-04-04 20:25:27 +00:00
|
|
|
/**
|
2016-04-05 19:06:53 +00:00
|
|
|
* @param array $samples
|
2016-04-04 20:25:27 +00:00
|
|
|
* @param array $labels
|
|
|
|
*/
|
2016-04-05 19:06:53 +00:00
|
|
|
public function train(array $samples, array $labels)
|
2016-04-04 20:25:27 +00:00
|
|
|
{
|
2016-04-14 20:56:54 +00:00
|
|
|
$this->samples = $samples;
|
|
|
|
$this->labels = $labels;
|
2016-04-04 20:25:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-04-08 20:25:15 +00:00
|
|
|
* @param array $samples
|
2016-04-04 20:49:54 +00:00
|
|
|
*
|
2016-04-04 20:25:27 +00:00
|
|
|
* @return mixed
|
|
|
|
*/
|
2016-04-08 20:25:15 +00:00
|
|
|
public function predict(array $samples)
|
2016-04-04 20:25:27 +00:00
|
|
|
{
|
2016-04-14 20:56:54 +00:00
|
|
|
if (!is_array($samples[0])) {
|
|
|
|
$predicted = $this->predictSample($samples);
|
|
|
|
} else {
|
|
|
|
$predicted = [];
|
|
|
|
foreach ($samples as $index => $sample) {
|
|
|
|
$predicted[$index] = $this->predictSample($sample);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $predicted;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param array $sample
|
|
|
|
*
|
|
|
|
* @return mixed
|
|
|
|
*/
|
|
|
|
private function predictSample(array $sample)
|
|
|
|
{
|
|
|
|
$predictions = [];
|
|
|
|
foreach ($this->labels as $index => $label) {
|
|
|
|
$predictions[$label] = 0;
|
|
|
|
foreach ($sample as $token => $count) {
|
|
|
|
if (array_key_exists($token, $this->samples[$index])) {
|
|
|
|
$predictions[$label] += $count * $this->samples[$index][$token];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
arsort($predictions, SORT_NUMERIC);
|
|
|
|
reset($predictions);
|
|
|
|
|
|
|
|
return key($predictions);
|
2016-04-04 20:25:27 +00:00
|
|
|
}
|
2016-02-09 06:45:07 +00:00
|
|
|
}
|