php-ml/src/Classification/KNearestNeighbors.php

76 lines
1.7 KiB
PHP
Raw Normal View History

2016-04-04 20:25:27 +00:00
<?php
2016-04-04 20:49:54 +00:00
2016-11-20 21:53:17 +00:00
declare(strict_types=1);
2016-04-04 20:25:27 +00:00
namespace Phpml\Classification;
2016-04-04 20:25:27 +00:00
2016-05-07 21:04:58 +00:00
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
2016-04-20 21:56:33 +00:00
use Phpml\Math\Distance;
use Phpml\Math\Distance\Euclidean;
2016-04-04 20:25:27 +00:00
class KNearestNeighbors implements Classifier
{
use Trainable;
use Predictable;
2016-04-16 19:24:40 +00:00
2016-04-04 20:25:27 +00:00
/**
* @var int
*/
private $k;
/**
2016-04-16 19:24:40 +00:00
* @var Distance
2016-04-04 20:25:27 +00:00
*/
2016-04-16 19:24:40 +00:00
private $distanceMetric;
2016-04-04 20:25:27 +00:00
/**
* @param Distance|null $distanceMetric (if null then Euclidean distance as default)
2016-04-04 20:25:27 +00:00
*/
public function __construct(int $k = 3, ?Distance $distanceMetric = null)
2016-04-04 20:25:27 +00:00
{
if ($distanceMetric === null) {
$distanceMetric = new Euclidean();
}
2016-07-07 21:38:11 +00:00
$this->k = $k;
$this->samples = [];
2016-06-16 07:58:12 +00:00
$this->targets = [];
$this->distanceMetric = $distanceMetric;
2016-04-04 20:25:27 +00:00
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
$distances = $this->kNeighborsDistances($sample);
2018-10-28 06:44:52 +00:00
$predictions = (array) array_combine(array_values($this->targets), array_fill(0, count($this->targets), 0));
2016-04-05 19:35:06 +00:00
foreach (array_keys($distances) as $index) {
2016-06-16 07:58:12 +00:00
++$predictions[$this->targets[$index]];
}
arsort($predictions);
2016-04-05 19:35:06 +00:00
reset($predictions);
2016-04-05 19:35:06 +00:00
return key($predictions);
}
/**
* @throws \Phpml\Exception\InvalidArgumentException
*/
private function kNeighborsDistances(array $sample): array
2016-04-04 20:25:27 +00:00
{
$distances = [];
2016-04-05 19:35:06 +00:00
foreach ($this->samples as $index => $neighbor) {
$distances[$index] = $this->distanceMetric->distance($sample, $neighbor);
}
2016-04-05 19:35:06 +00:00
asort($distances);
2016-04-05 19:35:06 +00:00
return array_slice($distances, 0, $this->k, true);
2016-04-04 20:25:27 +00:00
}
}