dbscan clustering algorithm

This commit is contained in:
Arkadiusz Kondas 2016-05-01 00:47:44 +02:00
parent 650e7dd20d
commit 22963114c3
3 changed files with 158 additions and 0 deletions

View File

@ -0,0 +1,16 @@
<?php
declare(strict_types = 1);
namespace Phpml\Clustering;
interface Clusterer
{
/**
* @param array $samples
*
* @return array
*/
public function cluster(array $samples);
}

View File

@ -0,0 +1,103 @@
<?php
declare(strict_types = 1);
namespace Phpml\Clustering;
use Phpml\Math\Distance;
class DBSCAN implements Clusterer
{
/**
* @var float
*/
private $epsilon;
/**
* @var int
*/
private $minSamples;
/**
* @var Distance
*/
private $distanceMetric;
/**
* @param float $epsilon
* @param int $minSamples
*/
public function __construct($epsilon = 0.5, $minSamples = 3)
{
$this->epsilon = $epsilon;
$this->minSamples = $minSamples;
$this->distanceMetric = new Distance\Euclidean();
}
/**
* @param array $samples
*
* @return array
*/
public function cluster(array $samples)
{
$clusters = [];
$visited = [];
foreach($samples as $index => $sample) {
if(isset($visited[$index])) {
continue;
}
$visited[$index] = true;
$regionSamples = $this->getSamplesInRegion($sample, $samples);
if(count($regionSamples) >= $this->minSamples) {
$clusters[] = $this->expandCluster($regionSamples, $visited);
}
}
return $clusters;
}
/**
* @param array $localSample
* @param array $samples
*
* @return array
*/
private function getSamplesInRegion($localSample, $samples) {
$region = [];
foreach($samples as $index => $sample) {
if($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) {
$region[$index] = $sample;
}
}
return $region;
}
/**
* @param array $samples
* @param array $visited
*
* @return array
*/
private function expandCluster($samples, &$visited) {
$cluster = [];
foreach($samples as $index => $sample) {
if(!isset($visited[$index])) {
$visited[$index] = true;
$regionSamples = $this->getSamplesInRegion($sample, $samples);
if(count($regionSamples) > $this->minSamples) {
$cluster = array_merge($regionSamples, $cluster);
}
}
$cluster[] = $sample;
}
return $cluster;
}
}

View File

@ -0,0 +1,39 @@
<?php
declare(strict_types = 1);
namespace tests\Clustering;
use Phpml\Clustering\DBSCAN;
class DBSCANTest extends \PHPUnit_Framework_TestCase
{
public function testDBSCANSamplesClustering()
{
$samples = [[1, 1],[8, 7],[1, 2],[7, 8],[2, 1],[8, 9]];
$clustered = [
[[1, 1], [1, 2], [2, 1]],
[[8, 7], [7, 8], [8, 9]]
];
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3);
$this->assertEquals($clustered, $dbscan->cluster($samples));
}
public function testDBSCANSamplesInCircleClustering()
{
$samples = [[1, 1],[6, 6],[1, -1],[5, 6],[-1, -1],[7, 8],[-1, 1],[7, 7]];
$clustered = [
[[1, 1],[1, -1],[-1, -1],[-1, 1]],
[[6, 6],[5, 6],[7, 8],[7, 7]]
];
$dbscan = new DBSCAN($epsilon = 3, $minSamples = 4);
$this->assertEquals($clustered, $dbscan->cluster($samples));
}
}