2016-04-04 20:25:27 +00:00
|
|
|
<?php
|
2016-04-04 20:49:54 +00:00
|
|
|
|
|
|
|
declare (strict_types = 1);
|
2016-04-04 20:25:27 +00:00
|
|
|
|
|
|
|
namespace Phpml\Classifier;
|
|
|
|
|
2016-04-05 19:06:53 +00:00
|
|
|
use Phpml\Metric\Distance;
|
|
|
|
|
2016-04-04 20:25:27 +00:00
|
|
|
class KNearestNeighbors implements Classifier
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $k;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
2016-04-05 19:06:53 +00:00
|
|
|
private $samples;
|
2016-04-04 20:25:27 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
private $labels;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param int $k
|
|
|
|
*/
|
|
|
|
public function __construct(int $k = 3)
|
|
|
|
{
|
|
|
|
$this->k = $k;
|
2016-04-05 19:06:53 +00:00
|
|
|
$this->samples = [];
|
2016-04-04 20:25:27 +00:00
|
|
|
$this->labels = [];
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-04-05 19:06:53 +00:00
|
|
|
* @param array $samples
|
2016-04-04 20:25:27 +00:00
|
|
|
* @param array $labels
|
|
|
|
*/
|
2016-04-05 19:06:53 +00:00
|
|
|
public function train(array $samples, array $labels)
|
2016-04-04 20:25:27 +00:00
|
|
|
{
|
2016-04-05 19:06:53 +00:00
|
|
|
$this->samples = $samples;
|
2016-04-04 20:25:27 +00:00
|
|
|
$this->labels = $labels;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-04-05 19:06:53 +00:00
|
|
|
* @param array $sample
|
2016-04-04 20:49:54 +00:00
|
|
|
*
|
2016-04-04 20:25:27 +00:00
|
|
|
* @return mixed
|
|
|
|
*/
|
2016-04-05 19:06:53 +00:00
|
|
|
public function predict(array $sample)
|
|
|
|
{
|
|
|
|
$distances = $this->kNeighborsDistances($sample);
|
|
|
|
|
2016-04-05 19:35:06 +00:00
|
|
|
$predictions = array_combine(array_values($this->labels), array_fill(0, count($this->labels), 0));
|
|
|
|
|
2016-04-05 19:06:53 +00:00
|
|
|
foreach ($distances as $index => $distance) {
|
2016-04-05 19:35:06 +00:00
|
|
|
++$predictions[$this->labels[$index]];
|
2016-04-05 19:06:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
arsort($predictions);
|
2016-04-05 19:35:06 +00:00
|
|
|
reset($predictions);
|
2016-04-05 19:06:53 +00:00
|
|
|
|
2016-04-05 19:35:06 +00:00
|
|
|
return key($predictions);
|
2016-04-05 19:06:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param array $sample
|
|
|
|
*
|
|
|
|
* @return array
|
|
|
|
*
|
|
|
|
* @throws \Phpml\Exception\InvalidArgumentException
|
|
|
|
*/
|
|
|
|
private function kNeighborsDistances(array $sample): array
|
2016-04-04 20:25:27 +00:00
|
|
|
{
|
2016-04-05 19:06:53 +00:00
|
|
|
$distances = [];
|
2016-04-05 19:35:06 +00:00
|
|
|
|
|
|
|
foreach ($this->samples as $index => $neighbor) {
|
2016-04-05 19:06:53 +00:00
|
|
|
$distances[$index] = Distance::euclidean($sample, $neighbor);
|
|
|
|
}
|
2016-04-05 19:35:06 +00:00
|
|
|
|
2016-04-05 19:06:53 +00:00
|
|
|
asort($distances);
|
|
|
|
|
2016-04-05 19:35:06 +00:00
|
|
|
return array_slice($distances, 0, $this->k, true);
|
2016-04-04 20:25:27 +00:00
|
|
|
}
|
|
|
|
}
|