mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-24 22:07:33 +00:00
KMeans associative clustering (#262)
* KMeans associative clustering added * fix travis error * KMeans will return provided keys as point label if they are provided * fix travis * fix travis
This commit is contained in:
parent
0d80c78c57
commit
af2d732194
@ -19,10 +19,12 @@ To divide the samples into clusters simply use `cluster` method. It's return the
|
|||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||||
|
Or if you need to keep your indentifiers along with yours samples you can use array keys as labels.
|
||||||
|
$samples = [ 'Label1' => [1, 1], 'Label2' => [8, 7], 'Label3' => [1, 2]];
|
||||||
|
|
||||||
$kmeans = new KMeans(2);
|
$kmeans = new KMeans(2);
|
||||||
$kmeans->cluster($samples);
|
$kmeans->cluster($samples);
|
||||||
// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]]
|
// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]] or [0=>['Label1' => [1, 1], 'Label3' => [1, 2], ...], 1=>['Label2' => [8, 7], ...]]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Initialization methods
|
### Initialization methods
|
||||||
|
@ -35,9 +35,9 @@ class KMeans implements Clusterer
|
|||||||
|
|
||||||
public function cluster(array $samples): array
|
public function cluster(array $samples): array
|
||||||
{
|
{
|
||||||
$space = new Space(count($samples[0]));
|
$space = new Space(count(reset($samples)));
|
||||||
foreach ($samples as $sample) {
|
foreach ($samples as $key => $sample) {
|
||||||
$space->addPoint($sample);
|
$space->addPoint($sample, $key);
|
||||||
}
|
}
|
||||||
|
|
||||||
$clusters = [];
|
$clusters = [];
|
||||||
|
@ -32,7 +32,11 @@ class Cluster extends Point implements IteratorAggregate, Countable
|
|||||||
{
|
{
|
||||||
$points = [];
|
$points = [];
|
||||||
foreach ($this->points as $point) {
|
foreach ($this->points as $point) {
|
||||||
$points[] = $point->toArray();
|
if (!empty($point->label)) {
|
||||||
|
$points[$point->label] = $point->toArray();
|
||||||
|
} else {
|
||||||
|
$points[] = $point->toArray();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $points;
|
return $points;
|
||||||
|
@ -18,10 +18,16 @@ class Point implements ArrayAccess
|
|||||||
*/
|
*/
|
||||||
protected $coordinates = [];
|
protected $coordinates = [];
|
||||||
|
|
||||||
public function __construct(array $coordinates)
|
/**
|
||||||
|
* @var mixed
|
||||||
|
*/
|
||||||
|
protected $label;
|
||||||
|
|
||||||
|
public function __construct(array $coordinates, $label = null)
|
||||||
{
|
{
|
||||||
$this->dimension = count($coordinates);
|
$this->dimension = count($coordinates);
|
||||||
$this->coordinates = $coordinates;
|
$this->coordinates = $coordinates;
|
||||||
|
$this->label = $label;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function toArray(): array
|
public function toArray(): array
|
||||||
|
@ -35,21 +35,21 @@ class Space extends SplObjectStorage
|
|||||||
return ['points' => $points];
|
return ['points' => $points];
|
||||||
}
|
}
|
||||||
|
|
||||||
public function newPoint(array $coordinates): Point
|
public function newPoint(array $coordinates, $label = null): Point
|
||||||
{
|
{
|
||||||
if (count($coordinates) != $this->dimension) {
|
if (count($coordinates) != $this->dimension) {
|
||||||
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
|
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Point($coordinates);
|
return new Point($coordinates, $label);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param null $data
|
* @param null $data
|
||||||
*/
|
*/
|
||||||
public function addPoint(array $coordinates, $data = null): void
|
public function addPoint(array $coordinates, $label = null, $data = null): void
|
||||||
{
|
{
|
||||||
$this->attach($this->newPoint($coordinates), $data);
|
$this->attach($this->newPoint($coordinates, $label), $data);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -28,6 +28,32 @@ class KMeansTest extends TestCase
|
|||||||
$this->assertCount(0, $samples);
|
$this->assertCount(0, $samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testKMeansSamplesLabeledClustering(): void
|
||||||
|
{
|
||||||
|
$samples = [
|
||||||
|
'555' => [1, 1],
|
||||||
|
'666' => [8, 7],
|
||||||
|
'ABC' => [1, 2],
|
||||||
|
'DEF' => [7, 8],
|
||||||
|
668 => [2, 1],
|
||||||
|
[8, 9],
|
||||||
|
];
|
||||||
|
|
||||||
|
$kmeans = new KMeans(2);
|
||||||
|
$clusters = $kmeans->cluster($samples);
|
||||||
|
|
||||||
|
$this->assertCount(2, $clusters);
|
||||||
|
|
||||||
|
foreach ($samples as $index => $sample) {
|
||||||
|
if (in_array($sample, $clusters[0], true) || in_array($sample, $clusters[1], true)) {
|
||||||
|
$this->assertArrayHasKey($index, $clusters[0] + $clusters[1]);
|
||||||
|
unset($samples[$index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->assertCount(0, $samples);
|
||||||
|
}
|
||||||
|
|
||||||
public function testKMeansInitializationMethods(): void
|
public function testKMeansInitializationMethods(): void
|
||||||
{
|
{
|
||||||
$samples = [
|
$samples = [
|
||||||
|
Loading…
Reference in New Issue
Block a user