php-ml/src/Phpml/Clustering/KMeans/Space.php
Tomáš Votruba a348111e97 Add PHPStan and level to max (#168)
* tests: update to PHPUnit 6.0 with rector

* fix namespaces on tests

* composer + tests: use standard test namespace naming

* update travis

* resolve conflict

* phpstan lvl 2

* phpstan lvl 3

* phpstan lvl 4

* phpstan lvl 5

* phpstan lvl 6

* phpstan lvl 7

* level max

* resolve conflict

* [cs] clean empty docs

* composer: bump to PHPUnit 6.4

* cleanup

* composer + travis: add phpstan

* phpstan lvl 1

* composer: update dev deps

* phpstan fixes

* update Contributing with new tools

* docs: link fixes, PHP version update

* composer: drop php-cs-fixer, cs already handled by ecs

* ecs: add old set rules

* [cs] apply rest of rules
2018-01-06 13:09:33 +01:00

225 lines
5.5 KiB
PHP

<?php
declare(strict_types=1);
namespace Phpml\Clustering\KMeans;
use InvalidArgumentException;
use LogicException;
use Phpml\Clustering\KMeans;
use SplObjectStorage;
class Space extends SplObjectStorage
{
/**
* @var int
*/
protected $dimension;
public function __construct($dimension)
{
if ($dimension < 1) {
throw new LogicException('a space dimension cannot be null or negative');
}
$this->dimension = $dimension;
}
public function toArray(): array
{
$points = [];
foreach ($this as $point) {
$points[] = $point->toArray();
}
return ['points' => $points];
}
public function newPoint(array $coordinates): Point
{
if (count($coordinates) != $this->dimension) {
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
}
return new Point($coordinates);
}
/**
* @param null $data
*/
public function addPoint(array $coordinates, $data = null): void
{
$this->attach($this->newPoint($coordinates), $data);
}
/**
* @param Point $point
* @param null $data
*/
public function attach($point, $data = null): void
{
if (!$point instanceof Point) {
throw new InvalidArgumentException('can only attach points to spaces');
}
parent::attach($point, $data);
}
public function getDimension(): int
{
return $this->dimension;
}
/**
* @return array|bool
*/
public function getBoundaries()
{
if (!count($this)) {
return false;
}
$min = $this->newPoint(array_fill(0, $this->dimension, null));
$max = $this->newPoint(array_fill(0, $this->dimension, null));
foreach ($this as $point) {
for ($n = 0; $n < $this->dimension; ++$n) {
($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n];
($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n];
}
}
return [$min, $max];
}
public function getRandomPoint(Point $min, Point $max): Point
{
$point = $this->newPoint(array_fill(0, $this->dimension, null));
for ($n = 0; $n < $this->dimension; ++$n) {
$point[$n] = random_int($min[$n], $max[$n]);
}
return $point;
}
/**
* @return array|Cluster[]
*/
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
{
$clusters = $this->initializeClusters($clustersNumber, $initMethod);
do {
} while (!$this->iterate($clusters));
return $clusters;
}
/**
* @return array|Cluster[]
*/
protected function initializeClusters(int $clustersNumber, int $initMethod): array
{
switch ($initMethod) {
case KMeans::INIT_RANDOM:
$clusters = $this->initializeRandomClusters($clustersNumber);
break;
case KMeans::INIT_KMEANS_PLUS_PLUS:
$clusters = $this->initializeKMPPClusters($clustersNumber);
break;
default:
return [];
}
$clusters[0]->attachAll($this);
return $clusters;
}
protected function iterate($clusters): bool
{
$convergence = true;
$attach = new SplObjectStorage();
$detach = new SplObjectStorage();
foreach ($clusters as $cluster) {
foreach ($cluster as $point) {
$closest = $point->getClosest($clusters);
if ($closest !== $cluster) {
isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage();
isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage();
$attach[$closest]->attach($point);
$detach[$cluster]->attach($point);
$convergence = false;
}
}
}
foreach ($attach as $cluster) {
$cluster->attachAll($attach[$cluster]);
}
foreach ($detach as $cluster) {
$cluster->detachAll($detach[$cluster]);
}
foreach ($clusters as $cluster) {
$cluster->updateCentroid();
}
return $convergence;
}
protected function initializeKMPPClusters(int $clustersNumber): array
{
$clusters = [];
$this->rewind();
$clusters[] = new Cluster($this, $this->current()->getCoordinates());
$distances = new SplObjectStorage();
for ($i = 1; $i < $clustersNumber; ++$i) {
$sum = 0;
foreach ($this as $point) {
$distance = $point->getDistanceWith($point->getClosest($clusters));
$sum += $distances[$point] = $distance;
}
$sum = random_int(0, (int) $sum);
foreach ($this as $point) {
if (($sum -= $distances[$point]) > 0) {
continue;
}
$clusters[] = new Cluster($this, $point->getCoordinates());
break;
}
}
return $clusters;
}
private function initializeRandomClusters(int $clustersNumber): array
{
$clusters = [];
[$min, $max] = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
return $clusters;
}
}