2016-04-30 22:47:44 +00:00
|
|
|
<?php
|
2016-04-30 22:56:43 +00:00
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-04-30 22:47:44 +00:00
|
|
|
|
|
|
|
namespace Phpml\Clustering;
|
|
|
|
|
|
|
|
use Phpml\Math\Distance;
|
2016-04-30 22:56:03 +00:00
|
|
|
use Phpml\Math\Distance\Euclidean;
|
2016-04-30 22:47:44 +00:00
|
|
|
|
|
|
|
class DBSCAN implements Clusterer
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* @var float
|
|
|
|
*/
|
|
|
|
private $epsilon;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $minSamples;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var Distance
|
|
|
|
*/
|
|
|
|
private $distanceMetric;
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null)
|
2016-04-30 22:47:44 +00:00
|
|
|
{
|
2016-04-30 22:56:03 +00:00
|
|
|
if (null === $distanceMetric) {
|
|
|
|
$distanceMetric = new Euclidean();
|
|
|
|
}
|
|
|
|
|
2016-04-30 22:47:44 +00:00
|
|
|
$this->epsilon = $epsilon;
|
|
|
|
$this->minSamples = $minSamples;
|
2016-04-30 22:56:03 +00:00
|
|
|
$this->distanceMetric = $distanceMetric;
|
2016-04-30 22:47:44 +00:00
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
public function cluster(array $samples) : array
|
2016-04-30 22:47:44 +00:00
|
|
|
{
|
|
|
|
$clusters = [];
|
|
|
|
$visited = [];
|
|
|
|
|
2016-04-30 22:56:43 +00:00
|
|
|
foreach ($samples as $index => $sample) {
|
|
|
|
if (isset($visited[$index])) {
|
2016-04-30 22:47:44 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$visited[$index] = true;
|
|
|
|
|
|
|
|
$regionSamples = $this->getSamplesInRegion($sample, $samples);
|
2016-04-30 22:56:43 +00:00
|
|
|
if (count($regionSamples) >= $this->minSamples) {
|
2016-04-30 22:47:44 +00:00
|
|
|
$clusters[] = $this->expandCluster($regionSamples, $visited);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $clusters;
|
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
private function getSamplesInRegion(array $localSample, array $samples) : array
|
2016-04-30 22:56:43 +00:00
|
|
|
{
|
2016-04-30 22:47:44 +00:00
|
|
|
$region = [];
|
|
|
|
|
2016-04-30 22:56:43 +00:00
|
|
|
foreach ($samples as $index => $sample) {
|
|
|
|
if ($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) {
|
2016-04-30 22:47:44 +00:00
|
|
|
$region[$index] = $sample;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $region;
|
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
private function expandCluster(array $samples, array &$visited) : array
|
2016-04-30 22:56:43 +00:00
|
|
|
{
|
2016-04-30 22:47:44 +00:00
|
|
|
$cluster = [];
|
|
|
|
|
2017-10-18 08:59:37 +00:00
|
|
|
$clusterMerge = [[]];
|
2016-04-30 22:56:43 +00:00
|
|
|
foreach ($samples as $index => $sample) {
|
|
|
|
if (!isset($visited[$index])) {
|
2016-04-30 22:47:44 +00:00
|
|
|
$visited[$index] = true;
|
|
|
|
$regionSamples = $this->getSamplesInRegion($sample, $samples);
|
2016-04-30 22:56:43 +00:00
|
|
|
if (count($regionSamples) > $this->minSamples) {
|
2017-10-18 08:59:37 +00:00
|
|
|
$clusterMerge[] = $regionSamples;
|
2016-04-30 22:47:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-18 08:59:37 +00:00
|
|
|
$cluster[$index] = $sample;
|
2016-04-30 22:47:44 +00:00
|
|
|
}
|
2017-10-18 08:59:37 +00:00
|
|
|
$cluster = \array_merge($cluster, ...$clusterMerge);
|
2016-04-30 22:47:44 +00:00
|
|
|
|
|
|
|
return $cluster;
|
|
|
|
}
|
|
|
|
}
|