From b48b82bd343301308c3d658b3fb6f95133d786a2 Mon Sep 17 00:00:00 2001 From: Maxim Kasatkin Date: Wed, 18 Oct 2017 15:59:37 +0700 Subject: [PATCH] DBSCAN fix for associative keys and array_merge performance optimization (#139) --- src/Phpml/Clustering/DBSCAN.php | 6 ++++-- tests/Phpml/Clustering/DBSCANTest.php | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Phpml/Clustering/DBSCAN.php b/src/Phpml/Clustering/DBSCAN.php index ebb1f5d..9e65063 100644 --- a/src/Phpml/Clustering/DBSCAN.php +++ b/src/Phpml/Clustering/DBSCAN.php @@ -94,17 +94,19 @@ class DBSCAN implements Clusterer { $cluster = []; + $clusterMerge = [[]]; foreach ($samples as $index => $sample) { if (!isset($visited[$index])) { $visited[$index] = true; $regionSamples = $this->getSamplesInRegion($sample, $samples); if (count($regionSamples) > $this->minSamples) { - $cluster = array_merge($regionSamples, $cluster); + $clusterMerge[] = $regionSamples; } } - $cluster[] = $sample; + $cluster[$index] = $sample; } + $cluster = \array_merge($cluster, ...$clusterMerge); return $cluster; } diff --git a/tests/Phpml/Clustering/DBSCANTest.php b/tests/Phpml/Clustering/DBSCANTest.php index 31fc1e6..a093b20 100644 --- a/tests/Phpml/Clustering/DBSCANTest.php +++ b/tests/Phpml/Clustering/DBSCANTest.php @@ -31,4 +31,17 @@ class DBSCANTest extends TestCase $this->assertEquals($clustered, $dbscan->cluster($samples)); } + + public function testDBSCANSamplesClusteringAssociative() + { + $samples = ['a' => [1, 1], 'b' => [9, 9], 'c' => [1, 2], 'd' => [9, 8], 'e' => [7, 7], 'f' => [8, 7]]; + $clustered = [ + ['a' => [1, 1], 'c' => [1, 2]], + ['b' => [9, 9], 'd' => [9, 8], 'e' => [7, 7], 'f' => [8, 7]], + ]; + + $dbscan = new DBSCAN($epsilon = 3, $minSamples = 2); + + $this->assertEquals($clustered, $dbscan->cluster($samples)); + } }