2016-05-01 21:17:09 +00:00
|
|
|
<?php
|
2016-05-01 21:36:33 +00:00
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-05-01 21:17:09 +00:00
|
|
|
|
|
|
|
namespace Phpml\Clustering\KMeans;
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
use InvalidArgumentException;
|
|
|
|
use LogicException;
|
2016-05-01 21:36:33 +00:00
|
|
|
use Phpml\Clustering\KMeans;
|
|
|
|
use SplObjectStorage;
|
2016-05-01 21:17:09 +00:00
|
|
|
|
|
|
|
class Space extends SplObjectStorage
|
|
|
|
{
|
2016-05-01 21:36:33 +00:00
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
protected $dimension;
|
|
|
|
|
2018-02-16 06:25:24 +00:00
|
|
|
public function __construct(int $dimension)
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
|
|
|
if ($dimension < 1) {
|
|
|
|
throw new LogicException('a space dimension cannot be null or negative');
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->dimension = $dimension;
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function toArray(): array
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
|
|
|
$points = [];
|
2018-10-28 06:44:52 +00:00
|
|
|
|
|
|
|
/** @var Point $point */
|
2016-05-01 21:36:33 +00:00
|
|
|
foreach ($this as $point) {
|
|
|
|
$points[] = $point->toArray();
|
|
|
|
}
|
|
|
|
|
|
|
|
return ['points' => $points];
|
|
|
|
}
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/**
|
|
|
|
* @param mixed $label
|
|
|
|
*/
|
2018-03-08 21:27:16 +00:00
|
|
|
public function newPoint(array $coordinates, $label = null): Point
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
2018-10-28 06:44:52 +00:00
|
|
|
if (count($coordinates) !== $this->dimension) {
|
2016-05-01 21:36:33 +00:00
|
|
|
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
|
|
|
|
}
|
|
|
|
|
2018-03-08 21:27:16 +00:00
|
|
|
return new Point($coordinates, $label);
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2018-10-28 06:44:52 +00:00
|
|
|
* @param mixed $label
|
|
|
|
* @param mixed $data
|
2016-05-01 21:36:33 +00:00
|
|
|
*/
|
2018-03-08 21:27:16 +00:00
|
|
|
public function addPoint(array $coordinates, $label = null, $data = null): void
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
2018-03-08 21:27:16 +00:00
|
|
|
$this->attach($this->newPoint($coordinates, $label), $data);
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2018-10-28 06:44:52 +00:00
|
|
|
* @param object $point
|
|
|
|
* @param mixed $data
|
2016-05-01 21:36:33 +00:00
|
|
|
*/
|
2017-11-14 20:21:23 +00:00
|
|
|
public function attach($point, $data = null): void
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
|
|
|
if (!$point instanceof Point) {
|
|
|
|
throw new InvalidArgumentException('can only attach points to spaces');
|
|
|
|
}
|
|
|
|
|
2016-08-02 11:23:58 +00:00
|
|
|
parent::attach($point, $data);
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function getDimension(): int
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
|
|
|
return $this->dimension;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return array|bool
|
|
|
|
*/
|
|
|
|
public function getBoundaries()
|
|
|
|
{
|
2018-02-17 23:09:24 +00:00
|
|
|
if (count($this) === 0) {
|
2016-05-01 21:36:33 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
$min = $this->newPoint(array_fill(0, $this->dimension, null));
|
|
|
|
$max = $this->newPoint(array_fill(0, $this->dimension, null));
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/** @var self $point */
|
2016-05-01 21:36:33 +00:00
|
|
|
foreach ($this as $point) {
|
|
|
|
for ($n = 0; $n < $this->dimension; ++$n) {
|
2018-10-28 06:44:52 +00:00
|
|
|
if ($min[$n] === null || $min[$n] > $point[$n]) {
|
|
|
|
$min[$n] = $point[$n];
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($max[$n] === null || $max[$n] < $point[$n]) {
|
|
|
|
$max[$n] = $point[$n];
|
|
|
|
}
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-01-31 19:33:08 +00:00
|
|
|
return [$min, $max];
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function getRandomPoint(Point $min, Point $max): Point
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
|
|
|
$point = $this->newPoint(array_fill(0, $this->dimension, null));
|
|
|
|
|
|
|
|
for ($n = 0; $n < $this->dimension; ++$n) {
|
2016-12-12 18:09:45 +00:00
|
|
|
$point[$n] = random_int($min[$n], $max[$n]);
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return $point;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return array|Cluster[]
|
|
|
|
*/
|
2017-11-22 21:16:10 +00:00
|
|
|
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
2016-05-02 12:02:00 +00:00
|
|
|
$clusters = $this->initializeClusters($clustersNumber, $initMethod);
|
2016-05-01 21:36:33 +00:00
|
|
|
|
|
|
|
do {
|
2016-05-02 12:02:00 +00:00
|
|
|
} while (!$this->iterate($clusters));
|
2016-05-01 21:36:33 +00:00
|
|
|
|
|
|
|
return $clusters;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-05-02 12:02:00 +00:00
|
|
|
* @return array|Cluster[]
|
2016-05-01 21:36:33 +00:00
|
|
|
*/
|
2017-11-22 21:16:10 +00:00
|
|
|
protected function initializeClusters(int $clustersNumber, int $initMethod): array
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
2016-05-02 12:02:00 +00:00
|
|
|
switch ($initMethod) {
|
2016-05-01 21:36:33 +00:00
|
|
|
case KMeans::INIT_RANDOM:
|
2016-05-02 21:36:58 +00:00
|
|
|
$clusters = $this->initializeRandomClusters($clustersNumber);
|
2018-01-06 12:09:33 +00:00
|
|
|
|
2016-05-01 21:36:33 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case KMeans::INIT_KMEANS_PLUS_PLUS:
|
2016-05-02 21:36:58 +00:00
|
|
|
$clusters = $this->initializeKMPPClusters($clustersNumber);
|
2018-01-06 12:09:33 +00:00
|
|
|
|
2016-05-01 21:36:33 +00:00
|
|
|
break;
|
2017-05-17 07:03:25 +00:00
|
|
|
|
|
|
|
default:
|
|
|
|
return [];
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
2017-05-17 07:03:25 +00:00
|
|
|
|
2016-05-01 21:36:33 +00:00
|
|
|
$clusters[0]->attachAll($this);
|
|
|
|
|
|
|
|
return $clusters;
|
|
|
|
}
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/**
|
|
|
|
* @param Cluster[] $clusters
|
|
|
|
*/
|
|
|
|
protected function iterate(array $clusters): bool
|
2016-05-01 21:36:33 +00:00
|
|
|
{
|
2016-05-02 12:02:00 +00:00
|
|
|
$convergence = true;
|
2016-05-01 21:36:33 +00:00
|
|
|
|
|
|
|
$attach = new SplObjectStorage();
|
|
|
|
$detach = new SplObjectStorage();
|
|
|
|
|
|
|
|
foreach ($clusters as $cluster) {
|
|
|
|
foreach ($cluster as $point) {
|
|
|
|
$closest = $point->getClosest($clusters);
|
|
|
|
|
|
|
|
if ($closest !== $cluster) {
|
2018-02-16 06:25:24 +00:00
|
|
|
$attach[$closest] ?? $attach[$closest] = new SplObjectStorage();
|
|
|
|
$detach[$cluster] ?? $detach[$cluster] = new SplObjectStorage();
|
2016-05-01 21:36:33 +00:00
|
|
|
|
|
|
|
$attach[$closest]->attach($point);
|
|
|
|
$detach[$cluster]->attach($point);
|
|
|
|
|
2016-05-02 12:02:00 +00:00
|
|
|
$convergence = false;
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/** @var Cluster $cluster */
|
2016-05-01 21:36:33 +00:00
|
|
|
foreach ($attach as $cluster) {
|
|
|
|
$cluster->attachAll($attach[$cluster]);
|
|
|
|
}
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/** @var Cluster $cluster */
|
2016-05-01 21:36:33 +00:00
|
|
|
foreach ($detach as $cluster) {
|
|
|
|
$cluster->detachAll($detach[$cluster]);
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach ($clusters as $cluster) {
|
|
|
|
$cluster->updateCentroid();
|
|
|
|
}
|
|
|
|
|
2016-05-02 12:02:00 +00:00
|
|
|
return $convergence;
|
2016-05-01 21:36:33 +00:00
|
|
|
}
|
2016-05-02 21:36:58 +00:00
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/**
|
|
|
|
* @return Cluster[]
|
|
|
|
*/
|
2017-11-22 21:16:10 +00:00
|
|
|
protected function initializeKMPPClusters(int $clustersNumber): array
|
2016-05-02 21:36:58 +00:00
|
|
|
{
|
|
|
|
$clusters = [];
|
2016-08-02 11:23:58 +00:00
|
|
|
$this->rewind();
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/** @var Point $current */
|
|
|
|
$current = $this->current();
|
|
|
|
|
|
|
|
$clusters[] = new Cluster($this, $current->getCoordinates());
|
2016-05-02 21:36:58 +00:00
|
|
|
|
|
|
|
$distances = new SplObjectStorage();
|
|
|
|
|
|
|
|
for ($i = 1; $i < $clustersNumber; ++$i) {
|
|
|
|
$sum = 0;
|
2018-10-28 06:44:52 +00:00
|
|
|
/** @var Point $point */
|
2016-05-02 21:36:58 +00:00
|
|
|
foreach ($this as $point) {
|
2018-10-28 06:44:52 +00:00
|
|
|
$closest = $point->getClosest($clusters);
|
|
|
|
if ($closest === null) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$distance = $point->getDistanceWith($closest);
|
2016-05-02 21:36:58 +00:00
|
|
|
$sum += $distances[$point] = $distance;
|
|
|
|
}
|
|
|
|
|
2016-12-12 18:09:45 +00:00
|
|
|
$sum = random_int(0, (int) $sum);
|
2018-10-28 06:44:52 +00:00
|
|
|
/** @var Point $point */
|
2016-05-02 21:36:58 +00:00
|
|
|
foreach ($this as $point) {
|
2018-06-15 05:57:45 +00:00
|
|
|
$sum -= $distances[$point];
|
|
|
|
|
|
|
|
if ($sum > 0) {
|
2016-05-02 21:36:58 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$clusters[] = new Cluster($this, $point->getCoordinates());
|
2018-01-06 12:09:33 +00:00
|
|
|
|
2016-05-02 21:36:58 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $clusters;
|
|
|
|
}
|
2017-11-22 21:16:10 +00:00
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/**
|
|
|
|
* @return Cluster[]
|
|
|
|
*/
|
2017-11-22 21:16:10 +00:00
|
|
|
private function initializeRandomClusters(int $clustersNumber): array
|
|
|
|
{
|
|
|
|
$clusters = [];
|
|
|
|
[$min, $max] = $this->getBoundaries();
|
|
|
|
|
|
|
|
for ($n = 0; $n < $clustersNumber; ++$n) {
|
|
|
|
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
|
|
|
|
}
|
|
|
|
|
|
|
|
return $clusters;
|
|
|
|
}
|
2016-05-01 21:17:09 +00:00
|
|
|
}
|