refactor kmeans init methods

This commit is contained in:
Arkadiusz Kondas 2016-05-02 23:36:58 +02:00
parent 56114d99ce
commit 5c67cfaeef
6 changed files with 60 additions and 34 deletions

View File

@ -49,7 +49,7 @@ class Point implements ArrayAccess
$distance += $difference * $difference;
}
return $precise ? sqrt((float)$distance) : $distance;
return $precise ? sqrt((float) $distance) : $distance;
}
/**

View File

@ -150,37 +150,11 @@ class Space extends SplObjectStorage
{
switch ($initMethod) {
case KMeans::INIT_RANDOM:
list($min, $max) = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
$clusters = $this->initializeRandomClusters($clustersNumber);
break;
case KMeans::INIT_KMEANS_PLUS_PLUS:
$position = rand(1, count($this));
for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
$clusters[] = new Cluster($this, $this->current()->getCoordinates());
$distances = new SplObjectStorage();
for ($i = 1; $i < $clustersNumber; ++$i) {
$sum = 0;
foreach ($this as $point) {
$distance = $point->getDistanceWith($point->getClosest($clusters));
$sum += $distances[$point] = $distance;
}
$sum = rand(0, (int) $sum);
foreach ($this as $point) {
if (($sum -= $distances[$point]) > 0) {
continue;
}
$clusters[] = new Cluster($this, $point->getCoordinates());
break;
}
}
$clusters = $this->initializeKMPPClusters($clustersNumber);
break;
}
$clusters[0]->attachAll($this);
@ -230,4 +204,56 @@ class Space extends SplObjectStorage
return $convergence;
}
/**
* @param int $clustersNumber
*
* @return array
*/
private function initializeRandomClusters(int $clustersNumber)
{
$clusters = [];
list($min, $max) = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
return $clusters;
}
/**
* @param int $clustersNumber
*
* @return array
*/
protected function initializeKMPPClusters(int $clustersNumber)
{
$clusters = [];
$position = rand(1, count($this));
for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
$clusters[] = new Cluster($this, $this->current()->getCoordinates());
$distances = new SplObjectStorage();
for ($i = 1; $i < $clustersNumber; ++$i) {
$sum = 0;
foreach ($this as $point) {
$distance = $point->getDistanceWith($point->getClosest($clusters));
$sum += $distances[$point] = $distance;
}
$sum = rand(0, (int) $sum);
foreach ($this as $point) {
if (($sum -= $distances[$point]) > 0) {
continue;
}
$clusters[] = new Cluster($this, $point->getCoordinates());
break;
}
}
return $clusters;
}
}

View File

@ -30,6 +30,6 @@ class Euclidean implements Distance
$distance += pow($a[$i] - $b[$i], 2);
}
return sqrt((float)$distance);
return sqrt((float) $distance);
}
}

View File

@ -147,7 +147,7 @@ class Matrix
for ($j = 0; $j < $this->columns; ++$j) {
$subMatrix = $this->crossOut(0, $j);
$minor = $this->matrix[0][$j] * $subMatrix->getDeterminant();
$determinant += fmod((float)$j, 2.0) == 0 ? $minor : -$minor;
$determinant += fmod((float) $j, 2.0) == 0 ? $minor : -$minor;
}
}
@ -236,7 +236,7 @@ class Matrix
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$minor = $this->crossOut($i, $j)->getDeterminant();
$newMatrix[$i][$j] = fmod((float)($i + $j), 2.0) == 0 ? $minor : -$minor;
$newMatrix[$i][$j] = fmod((float) ($i + $j), 2.0) == 0 ? $minor : -$minor;
}
}

View File

@ -38,7 +38,7 @@ class Correlation
$b2 = $b2 + pow($b, 2);
}
$corr = $axb / sqrt((float)($a2 * $b2));
$corr = $axb / sqrt((float) ($a2 * $b2));
return $corr;
}

View File

@ -39,6 +39,6 @@ class StandardDeviation
--$n;
}
return sqrt((float)($carry / $n));
return sqrt((float) ($carry / $n));
}
}