mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-22 04:55:10 +00:00
Apply comments / coding styles
* Remove user-specific gitignore * Add return type hints * Avoid global namespace in docs * Rename rules -> getRules * Split up rule generation Todo: * Move set theory out to math * Extract rule generation
This commit is contained in:
parent
c8bd8db601
commit
90038befa9
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,3 @@
|
|||||||
/.idea/
|
|
||||||
/vendor/
|
/vendor/
|
||||||
humbuglog.*
|
humbuglog.*
|
||||||
/bin/phpunit
|
/bin/phpunit
|
||||||
|
@ -8,7 +8,9 @@ Association rule learning based on [Apriori algorithm](https://en.wikipedia.org/
|
|||||||
* $confidence - [confidence](https://en.wikipedia.org/wiki/Association_rule_learning#Confidence), minimum relative amount of item set in frequent item sets
|
* $confidence - [confidence](https://en.wikipedia.org/wiki/Association_rule_learning#Confidence), minimum relative amount of item set in frequent item sets
|
||||||
|
|
||||||
```
|
```
|
||||||
$associator = new \Phpml\Association\Apriori($support = 0.5, $confidence = 0.5);
|
use Phpml\Association\Apriori;
|
||||||
|
|
||||||
|
$associator = new Apriori($support = 0.5, $confidence = 0.5);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Train
|
### Train
|
||||||
@ -19,7 +21,9 @@ To train a associator simply provide train samples and labels (as `array`). Exam
|
|||||||
$samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']];
|
$samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']];
|
||||||
$labels = [];
|
$labels = [];
|
||||||
|
|
||||||
$associator = new \Phpml\Association\Apriori(0.5, 0.5);
|
use Phpml\Association\Apriori;
|
||||||
|
|
||||||
|
$associator = new Apriori($support = 0.5, $confidence = 0.5);
|
||||||
$associator->train($samples, $labels);
|
$associator->train($samples, $labels);
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -37,10 +41,10 @@ $associator->predict([['alpha','epsilon'],['beta','theta']]);
|
|||||||
|
|
||||||
### Associating
|
### Associating
|
||||||
|
|
||||||
Generating association rules simply use `rules` method.
|
Get generated association rules simply use `rules` method.
|
||||||
|
|
||||||
```
|
```
|
||||||
$associator->rules();
|
$associator->getRules();
|
||||||
// return [['antecedent' => ['alpha', 'theta'], 'consequent' => ['beta], 'support' => 1.0, 'confidence' => 1.0], ... ]
|
// return [['antecedent' => ['alpha', 'theta'], 'consequent' => ['beta], 'support' => 1.0, 'confidence' => 1.0], ... ]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
declare(strict_types = 1);
|
declare(strict_types=1);
|
||||||
|
|
||||||
namespace Phpml\Association;
|
namespace Phpml\Association;
|
||||||
|
|
||||||
@ -53,18 +53,18 @@ class Apriori implements Associator
|
|||||||
* @param float $support
|
* @param float $support
|
||||||
* @param float $confidence
|
* @param float $confidence
|
||||||
*/
|
*/
|
||||||
public function __construct($support = 0.0, $confidence = 0.0)
|
public function __construct(float $support = 0.0, float $confidence = 0.0)
|
||||||
{
|
{
|
||||||
$this->support = $support;
|
$this->support = $support;
|
||||||
$this->confidence = $confidence;
|
$this->confidence = $confidence;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates apriori association rules.
|
* Get all association rules which are generated for every k-length frequent item set.
|
||||||
*
|
*
|
||||||
* @return mixed[][]
|
* @return mixed[][]
|
||||||
*/
|
*/
|
||||||
public function rules()
|
public function getRules() : array
|
||||||
{
|
{
|
||||||
if (!$this->large) {
|
if (!$this->large) {
|
||||||
$this->large = $this->apriori();
|
$this->large = $this->apriori();
|
||||||
@ -76,31 +76,17 @@ class Apriori implements Associator
|
|||||||
|
|
||||||
$this->rules = [];
|
$this->rules = [];
|
||||||
|
|
||||||
for ($k = 2; !empty($this->large[$k]); ++$k) {
|
$this->generateAllRules();
|
||||||
foreach ($this->large[$k] as $frequent) {
|
|
||||||
foreach ($this->antecedents($frequent) as $antecedent) {
|
|
||||||
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
|
|
||||||
$consequent = array_values(array_diff($frequent, $antecedent));
|
|
||||||
$this->rules[] = [
|
|
||||||
self::ARRAY_KEY_ANTECEDENT => $antecedent,
|
|
||||||
self::ARRAY_KEY_CONSEQUENT => $consequent,
|
|
||||||
self::ARRAY_KEY_SUPPORT => $this->support($consequent),
|
|
||||||
self::ARRAY_KEY_CONFIDENCE => $confidence,
|
|
||||||
];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $this->rules;
|
return $this->rules;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates frequent item sets
|
* Generates frequent item sets.
|
||||||
*
|
*
|
||||||
* @return mixed[][][]
|
* @return mixed[][][]
|
||||||
*/
|
*/
|
||||||
public function apriori()
|
public function apriori() : array
|
||||||
{
|
{
|
||||||
$L = [];
|
$L = [];
|
||||||
$L[1] = $this->items();
|
$L[1] = $this->items();
|
||||||
@ -119,13 +105,47 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return mixed[][]
|
* @return mixed[][]
|
||||||
*/
|
*/
|
||||||
protected function predictSample(array $sample)
|
protected function predictSample(array $sample) : array
|
||||||
{
|
{
|
||||||
$predicts = array_values(array_filter($this->rules(), function($rule) use ($sample) {
|
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
|
||||||
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
|
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
|
||||||
}));
|
}));
|
||||||
|
|
||||||
return array_map(function($rule) { return $rule[self::ARRAY_KEY_CONSEQUENT]; }, $predicts);
|
return array_map(function ($rule) {
|
||||||
|
return $rule[self::ARRAY_KEY_CONSEQUENT];
|
||||||
|
}, $predicts);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate rules for each k-length frequent item set.
|
||||||
|
*/
|
||||||
|
private function generateAllRules()
|
||||||
|
{
|
||||||
|
for ($k = 2; !empty($this->large[$k]); ++$k) {
|
||||||
|
foreach ($this->large[$k] as $frequent) {
|
||||||
|
$this->generateRules($frequent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate confident rules for frequent item set.
|
||||||
|
*
|
||||||
|
* @param mixed[] $frequent
|
||||||
|
*/
|
||||||
|
private function generateRules(array $frequent)
|
||||||
|
{
|
||||||
|
foreach ($this->antecedents($frequent) as $antecedent) {
|
||||||
|
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
|
||||||
|
$consequent = array_values(array_diff($frequent, $antecedent));
|
||||||
|
$this->rules[] = [
|
||||||
|
self::ARRAY_KEY_ANTECEDENT => $antecedent,
|
||||||
|
self::ARRAY_KEY_CONSEQUENT => $consequent,
|
||||||
|
self::ARRAY_KEY_SUPPORT => $this->support($consequent),
|
||||||
|
self::ARRAY_KEY_CONFIDENCE => $confidence,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -135,7 +155,7 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return mixed[][]
|
* @return mixed[][]
|
||||||
*/
|
*/
|
||||||
private function powerSet(array $sample)
|
private function powerSet(array $sample) : array
|
||||||
{
|
{
|
||||||
$results = [[]];
|
$results = [[]];
|
||||||
foreach ($sample as $item) {
|
foreach ($sample as $item) {
|
||||||
@ -154,12 +174,12 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return mixed[][]
|
* @return mixed[][]
|
||||||
*/
|
*/
|
||||||
private function antecedents(array $sample)
|
private function antecedents(array $sample) : array
|
||||||
{
|
{
|
||||||
$cardinality = count($sample);
|
$cardinality = count($sample);
|
||||||
$antecedents = $this->powerSet($sample);
|
$antecedents = $this->powerSet($sample);
|
||||||
|
|
||||||
return array_filter($antecedents, function($antecedent) use ($cardinality) {
|
return array_filter($antecedents, function ($antecedent) use ($cardinality) {
|
||||||
return (count($antecedent) != $cardinality) && ($antecedent != []);
|
return (count($antecedent) != $cardinality) && ($antecedent != []);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -169,7 +189,7 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return mixed[][]
|
* @return mixed[][]
|
||||||
*/
|
*/
|
||||||
private function items()
|
private function items() : array
|
||||||
{
|
{
|
||||||
$items = [];
|
$items = [];
|
||||||
|
|
||||||
@ -181,7 +201,7 @@ class Apriori implements Associator
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return array_map(function($entry) {
|
return array_map(function ($entry) {
|
||||||
return [$entry];
|
return [$entry];
|
||||||
}, $items);
|
}, $items);
|
||||||
}
|
}
|
||||||
@ -193,9 +213,9 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return mixed[][]
|
* @return mixed[][]
|
||||||
*/
|
*/
|
||||||
private function frequent(array $samples)
|
private function frequent(array $samples) : array
|
||||||
{
|
{
|
||||||
return array_filter($samples, function($entry) {
|
return array_filter($samples, function ($entry) {
|
||||||
return $this->support($entry) >= $this->support;
|
return $this->support($entry) >= $this->support;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -207,7 +227,7 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return mixed[][]
|
* @return mixed[][]
|
||||||
*/
|
*/
|
||||||
private function candidates(array $samples)
|
private function candidates(array $samples) : array
|
||||||
{
|
{
|
||||||
$candidates = [];
|
$candidates = [];
|
||||||
|
|
||||||
@ -223,7 +243,7 @@ class Apriori implements Associator
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ((array)$this->samples as $sample) {
|
foreach ((array) $this->samples as $sample) {
|
||||||
if ($this->subset($sample, $candidate)) {
|
if ($this->subset($sample, $candidate)) {
|
||||||
$candidates[] = $candidate;
|
$candidates[] = $candidate;
|
||||||
continue 2;
|
continue 2;
|
||||||
@ -244,7 +264,7 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return float
|
* @return float
|
||||||
*/
|
*/
|
||||||
private function confidence(array $set, array $subset)
|
private function confidence(array $set, array $subset) : float
|
||||||
{
|
{
|
||||||
return $this->support($set) / $this->support($subset);
|
return $this->support($set) / $this->support($subset);
|
||||||
}
|
}
|
||||||
@ -259,7 +279,7 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return float
|
* @return float
|
||||||
*/
|
*/
|
||||||
private function support(array $sample)
|
private function support(array $sample) : float
|
||||||
{
|
{
|
||||||
return $this->frequency($sample) / count($this->samples);
|
return $this->frequency($sample) / count($this->samples);
|
||||||
}
|
}
|
||||||
@ -273,9 +293,9 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
private function frequency(array $sample)
|
private function frequency(array $sample) : int
|
||||||
{
|
{
|
||||||
return count(array_filter($this->samples, function($entry) use ($sample) {
|
return count(array_filter($this->samples, function ($entry) use ($sample) {
|
||||||
return $this->subset($entry, $sample);
|
return $this->subset($entry, $sample);
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
@ -290,9 +310,9 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return bool
|
* @return bool
|
||||||
*/
|
*/
|
||||||
private function contains(array $system, array $set)
|
private function contains(array $system, array $set) : bool
|
||||||
{
|
{
|
||||||
return (bool)array_filter($system, function($entry) use ($set) {
|
return (bool) array_filter($system, function ($entry) use ($set) {
|
||||||
return $this->equals($entry, $set);
|
return $this->equals($entry, $set);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -305,7 +325,7 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return bool
|
* @return bool
|
||||||
*/
|
*/
|
||||||
private function subset(array $set, array $subset)
|
private function subset(array $set, array $subset) : bool
|
||||||
{
|
{
|
||||||
return !array_diff($subset, array_intersect($subset, $set));
|
return !array_diff($subset, array_intersect($subset, $set));
|
||||||
}
|
}
|
||||||
@ -318,7 +338,7 @@ class Apriori implements Associator
|
|||||||
*
|
*
|
||||||
* @return bool
|
* @return bool
|
||||||
*/
|
*/
|
||||||
private function equals(array $set1, array $set2)
|
private function equals(array $set1, array $set2) : bool
|
||||||
{
|
{
|
||||||
return array_diff($set1, $set2) == array_diff($set2, $set1);
|
return array_diff($set1, $set2) == array_diff($set2, $set1);
|
||||||
}
|
}
|
||||||
|
@ -71,12 +71,12 @@ class AprioriTest extends \PHPUnit_Framework_TestCase
|
|||||||
$this->assertTrue($this->invoke($apriori, 'contains', [$L[2], [3, 4]]));
|
$this->assertTrue($this->invoke($apriori, 'contains', [$L[2], [3, 4]]));
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testRules()
|
public function testGetRules()
|
||||||
{
|
{
|
||||||
$apriori = new Apriori(0.4, 0.8);
|
$apriori = new Apriori(0.4, 0.8);
|
||||||
$apriori->train($this->sampleChars, []);
|
$apriori->train($this->sampleChars, []);
|
||||||
|
|
||||||
$this->assertCount(19, $apriori->rules());
|
$this->assertCount(19, $apriori->getRules());
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testAntecedents()
|
public function testAntecedents()
|
||||||
|
Loading…
Reference in New Issue
Block a user