php-ml/src/Phpml/Clustering/DBSCAN.php

114 lines
2.5 KiB
PHP
Raw Normal View History

2016-04-30 22:47:44 +00:00
<?php
2016-04-30 22:56:43 +00:00
2016-11-20 21:53:17 +00:00
declare(strict_types=1);
2016-04-30 22:47:44 +00:00
namespace Phpml\Clustering;
use Phpml\Math\Distance;
2016-04-30 22:56:03 +00:00
use Phpml\Math\Distance\Euclidean;
2016-04-30 22:47:44 +00:00
class DBSCAN implements Clusterer
{
/**
* @var float
*/
private $epsilon;
/**
* @var int
*/
private $minSamples;
/**
* @var Distance
*/
private $distanceMetric;
/**
2016-04-30 22:56:43 +00:00
* @param float $epsilon
* @param int $minSamples
2016-04-30 22:56:03 +00:00
* @param Distance $distanceMetric
2016-04-30 22:47:44 +00:00
*/
2016-04-30 22:56:03 +00:00
public function __construct($epsilon = 0.5, $minSamples = 3, Distance $distanceMetric = null)
2016-04-30 22:47:44 +00:00
{
2016-04-30 22:56:03 +00:00
if (null === $distanceMetric) {
$distanceMetric = new Euclidean();
}
2016-04-30 22:47:44 +00:00
$this->epsilon = $epsilon;
$this->minSamples = $minSamples;
2016-04-30 22:56:03 +00:00
$this->distanceMetric = $distanceMetric;
2016-04-30 22:47:44 +00:00
}
/**
* @param array $samples
*
* @return array
*/
public function cluster(array $samples)
{
$clusters = [];
$visited = [];
2016-04-30 22:56:43 +00:00
foreach ($samples as $index => $sample) {
if (isset($visited[$index])) {
2016-04-30 22:47:44 +00:00
continue;
}
$visited[$index] = true;
$regionSamples = $this->getSamplesInRegion($sample, $samples);
2016-04-30 22:56:43 +00:00
if (count($regionSamples) >= $this->minSamples) {
2016-04-30 22:47:44 +00:00
$clusters[] = $this->expandCluster($regionSamples, $visited);
}
}
return $clusters;
}
/**
* @param array $localSample
* @param array $samples
*
* @return array
*/
2016-04-30 22:56:43 +00:00
private function getSamplesInRegion($localSample, $samples)
{
2016-04-30 22:47:44 +00:00
$region = [];
2016-04-30 22:56:43 +00:00
foreach ($samples as $index => $sample) {
if ($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) {
2016-04-30 22:47:44 +00:00
$region[$index] = $sample;
}
}
return $region;
}
/**
* @param array $samples
* @param array $visited
*
* @return array
*/
2016-04-30 22:56:43 +00:00
private function expandCluster($samples, &$visited)
{
2016-04-30 22:47:44 +00:00
$cluster = [];
$clusterMerge = [[]];
2016-04-30 22:56:43 +00:00
foreach ($samples as $index => $sample) {
if (!isset($visited[$index])) {
2016-04-30 22:47:44 +00:00
$visited[$index] = true;
$regionSamples = $this->getSamplesInRegion($sample, $samples);
2016-04-30 22:56:43 +00:00
if (count($regionSamples) > $this->minSamples) {
$clusterMerge[] = $regionSamples;
2016-04-30 22:47:44 +00:00
}
}
$cluster[$index] = $sample;
2016-04-30 22:47:44 +00:00
}
$cluster = \array_merge($cluster, ...$clusterMerge);
2016-04-30 22:47:44 +00:00
return $cluster;
}
}