From 22963114c309fe084040a7902bd02646a941923b Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Sun, 1 May 2016 00:47:44 +0200 Subject: [PATCH] dbscan clustering algorithm --- src/Phpml/Clustering/Clusterer.php | 16 ++++ src/Phpml/Clustering/DBSCAN.php | 103 ++++++++++++++++++++++++++ tests/Phpml/Clustering/DBSCANTest.php | 39 ++++++++++ 3 files changed, 158 insertions(+) create mode 100644 src/Phpml/Clustering/Clusterer.php create mode 100644 src/Phpml/Clustering/DBSCAN.php create mode 100644 tests/Phpml/Clustering/DBSCANTest.php diff --git a/src/Phpml/Clustering/Clusterer.php b/src/Phpml/Clustering/Clusterer.php new file mode 100644 index 0000000..760c996 --- /dev/null +++ b/src/Phpml/Clustering/Clusterer.php @@ -0,0 +1,16 @@ +epsilon = $epsilon; + $this->minSamples = $minSamples; + $this->distanceMetric = new Distance\Euclidean(); + } + + /** + * @param array $samples + * + * @return array + */ + public function cluster(array $samples) + { + $clusters = []; + $visited = []; + + foreach($samples as $index => $sample) { + if(isset($visited[$index])) { + continue; + } + $visited[$index] = true; + + $regionSamples = $this->getSamplesInRegion($sample, $samples); + if(count($regionSamples) >= $this->minSamples) { + $clusters[] = $this->expandCluster($regionSamples, $visited); + } + } + + return $clusters; + } + + /** + * @param array $localSample + * @param array $samples + * + * @return array + */ + private function getSamplesInRegion($localSample, $samples) { + $region = []; + + foreach($samples as $index => $sample) { + if($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) { + $region[$index] = $sample; + } + } + + return $region; + } + + /** + * @param array $samples + * @param array $visited + * + * @return array + */ + private function expandCluster($samples, &$visited) { + $cluster = []; + + foreach($samples as $index => $sample) { + if(!isset($visited[$index])) { + $visited[$index] = true; + $regionSamples = $this->getSamplesInRegion($sample, $samples); + if(count($regionSamples) > $this->minSamples) { + $cluster = array_merge($regionSamples, $cluster); + } + } + + $cluster[] = $sample; + } + + return $cluster; + } + +} diff --git a/tests/Phpml/Clustering/DBSCANTest.php b/tests/Phpml/Clustering/DBSCANTest.php new file mode 100644 index 0000000..748b87a --- /dev/null +++ b/tests/Phpml/Clustering/DBSCANTest.php @@ -0,0 +1,39 @@ +assertEquals($clustered, $dbscan->cluster($samples)); + } + + public function testDBSCANSamplesInCircleClustering() + { + $samples = [[1, 1],[6, 6],[1, -1],[5, 6],[-1, -1],[7, 8],[-1, 1],[7, 7]]; + + $clustered = [ + [[1, 1],[1, -1],[-1, -1],[-1, 1]], + [[6, 6],[5, 6],[7, 8],[7, 7]] + ]; + + $dbscan = new DBSCAN($epsilon = 3, $minSamples = 4); + + $this->assertEquals($clustered, $dbscan->cluster($samples)); + } + +}