mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-25 06:17:34 +00:00
dbscan clustering algorithm
This commit is contained in:
parent
650e7dd20d
commit
22963114c3
16
src/Phpml/Clustering/Clusterer.php
Normal file
16
src/Phpml/Clustering/Clusterer.php
Normal file
@ -0,0 +1,16 @@
|
||||
<?php
|
||||
declare(strict_types = 1);
|
||||
|
||||
namespace Phpml\Clustering;
|
||||
|
||||
interface Clusterer
|
||||
{
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function cluster(array $samples);
|
||||
|
||||
}
|
103
src/Phpml/Clustering/DBSCAN.php
Normal file
103
src/Phpml/Clustering/DBSCAN.php
Normal file
@ -0,0 +1,103 @@
|
||||
<?php
|
||||
declare(strict_types = 1);
|
||||
|
||||
namespace Phpml\Clustering;
|
||||
|
||||
use Phpml\Math\Distance;
|
||||
|
||||
class DBSCAN implements Clusterer
|
||||
{
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $epsilon;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $minSamples;
|
||||
|
||||
/**
|
||||
* @var Distance
|
||||
*/
|
||||
private $distanceMetric;
|
||||
|
||||
/**
|
||||
* @param float $epsilon
|
||||
* @param int $minSamples
|
||||
*/
|
||||
public function __construct($epsilon = 0.5, $minSamples = 3)
|
||||
{
|
||||
$this->epsilon = $epsilon;
|
||||
$this->minSamples = $minSamples;
|
||||
$this->distanceMetric = new Distance\Euclidean();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function cluster(array $samples)
|
||||
{
|
||||
$clusters = [];
|
||||
$visited = [];
|
||||
|
||||
foreach($samples as $index => $sample) {
|
||||
if(isset($visited[$index])) {
|
||||
continue;
|
||||
}
|
||||
$visited[$index] = true;
|
||||
|
||||
$regionSamples = $this->getSamplesInRegion($sample, $samples);
|
||||
if(count($regionSamples) >= $this->minSamples) {
|
||||
$clusters[] = $this->expandCluster($regionSamples, $visited);
|
||||
}
|
||||
}
|
||||
|
||||
return $clusters;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $localSample
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function getSamplesInRegion($localSample, $samples) {
|
||||
$region = [];
|
||||
|
||||
foreach($samples as $index => $sample) {
|
||||
if($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) {
|
||||
$region[$index] = $sample;
|
||||
}
|
||||
}
|
||||
|
||||
return $region;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
* @param array $visited
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function expandCluster($samples, &$visited) {
|
||||
$cluster = [];
|
||||
|
||||
foreach($samples as $index => $sample) {
|
||||
if(!isset($visited[$index])) {
|
||||
$visited[$index] = true;
|
||||
$regionSamples = $this->getSamplesInRegion($sample, $samples);
|
||||
if(count($regionSamples) > $this->minSamples) {
|
||||
$cluster = array_merge($regionSamples, $cluster);
|
||||
}
|
||||
}
|
||||
|
||||
$cluster[] = $sample;
|
||||
}
|
||||
|
||||
return $cluster;
|
||||
}
|
||||
|
||||
}
|
39
tests/Phpml/Clustering/DBSCANTest.php
Normal file
39
tests/Phpml/Clustering/DBSCANTest.php
Normal file
@ -0,0 +1,39 @@
|
||||
<?php
|
||||
declare(strict_types = 1);
|
||||
|
||||
namespace tests\Clustering;
|
||||
|
||||
use Phpml\Clustering\DBSCAN;
|
||||
|
||||
class DBSCANTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
|
||||
public function testDBSCANSamplesClustering()
|
||||
{
|
||||
$samples = [[1, 1],[8, 7],[1, 2],[7, 8],[2, 1],[8, 9]];
|
||||
|
||||
$clustered = [
|
||||
[[1, 1], [1, 2], [2, 1]],
|
||||
[[8, 7], [7, 8], [8, 9]]
|
||||
];
|
||||
|
||||
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3);
|
||||
|
||||
$this->assertEquals($clustered, $dbscan->cluster($samples));
|
||||
}
|
||||
|
||||
public function testDBSCANSamplesInCircleClustering()
|
||||
{
|
||||
$samples = [[1, 1],[6, 6],[1, -1],[5, 6],[-1, -1],[7, 8],[-1, 1],[7, 7]];
|
||||
|
||||
$clustered = [
|
||||
[[1, 1],[1, -1],[-1, -1],[-1, 1]],
|
||||
[[6, 6],[5, 6],[7, 8],[7, 7]]
|
||||
];
|
||||
|
||||
$dbscan = new DBSCAN($epsilon = 3, $minSamples = 4);
|
||||
|
||||
$this->assertEquals($clustered, $dbscan->cluster($samples));
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user