Apply comments / coding styles

* Remove user-specific gitignore
* Add return type hints
* Avoid global namespace in docs
* Rename rules -> getRules
* Split up rule generation

Todo:
* Move set theory out to math
* Extract rule generation
This commit is contained in:
Patrick Florek 2016-09-02 00:18:50 +02:00
parent c8bd8db601
commit 90038befa9
4 changed files with 73 additions and 50 deletions

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
/.idea/
/vendor/ /vendor/
humbuglog.* humbuglog.*
/bin/phpunit /bin/phpunit

View File

@ -8,7 +8,9 @@ Association rule learning based on [Apriori algorithm](https://en.wikipedia.org/
* $confidence - [confidence](https://en.wikipedia.org/wiki/Association_rule_learning#Confidence), minimum relative amount of item set in frequent item sets * $confidence - [confidence](https://en.wikipedia.org/wiki/Association_rule_learning#Confidence), minimum relative amount of item set in frequent item sets
``` ```
$associator = new \Phpml\Association\Apriori($support = 0.5, $confidence = 0.5); use Phpml\Association\Apriori;
$associator = new Apriori($support = 0.5, $confidence = 0.5);
``` ```
### Train ### Train
@ -19,7 +21,9 @@ To train a associator simply provide train samples and labels (as `array`). Exam
$samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']]; $samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']];
$labels = []; $labels = [];
$associator = new \Phpml\Association\Apriori(0.5, 0.5); use Phpml\Association\Apriori;
$associator = new Apriori($support = 0.5, $confidence = 0.5);
$associator->train($samples, $labels); $associator->train($samples, $labels);
``` ```
@ -37,10 +41,10 @@ $associator->predict([['alpha','epsilon'],['beta','theta']]);
### Associating ### Associating
Generating association rules simply use `rules` method. Get generated association rules simply use `rules` method.
``` ```
$associator->rules(); $associator->getRules();
// return [['antecedent' => ['alpha', 'theta'], 'consequent' => ['beta], 'support' => 1.0, 'confidence' => 1.0], ... ] // return [['antecedent' => ['alpha', 'theta'], 'consequent' => ['beta], 'support' => 1.0, 'confidence' => 1.0], ... ]
``` ```

View File

@ -53,18 +53,18 @@ class Apriori implements Associator
* @param float $support * @param float $support
* @param float $confidence * @param float $confidence
*/ */
public function __construct($support = 0.0, $confidence = 0.0) public function __construct(float $support = 0.0, float $confidence = 0.0)
{ {
$this->support = $support; $this->support = $support;
$this->confidence = $confidence; $this->confidence = $confidence;
} }
/** /**
* Generates apriori association rules. * Get all association rules which are generated for every k-length frequent item set.
* *
* @return mixed[][] * @return mixed[][]
*/ */
public function rules() public function getRules() : array
{ {
if (!$this->large) { if (!$this->large) {
$this->large = $this->apriori(); $this->large = $this->apriori();
@ -76,31 +76,17 @@ class Apriori implements Associator
$this->rules = []; $this->rules = [];
for ($k = 2; !empty($this->large[$k]); ++$k) { $this->generateAllRules();
foreach ($this->large[$k] as $frequent) {
foreach ($this->antecedents($frequent) as $antecedent) {
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
$consequent = array_values(array_diff($frequent, $antecedent));
$this->rules[] = [
self::ARRAY_KEY_ANTECEDENT => $antecedent,
self::ARRAY_KEY_CONSEQUENT => $consequent,
self::ARRAY_KEY_SUPPORT => $this->support($consequent),
self::ARRAY_KEY_CONFIDENCE => $confidence,
];
}
}
}
}
return $this->rules; return $this->rules;
} }
/** /**
* Generates frequent item sets * Generates frequent item sets.
* *
* @return mixed[][][] * @return mixed[][][]
*/ */
public function apriori() public function apriori() : array
{ {
$L = []; $L = [];
$L[1] = $this->items(); $L[1] = $this->items();
@ -119,13 +105,47 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
protected function predictSample(array $sample) protected function predictSample(array $sample) : array
{ {
$predicts = array_values(array_filter($this->rules(), function($rule) use ($sample) { $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample); return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
})); }));
return array_map(function($rule) { return $rule[self::ARRAY_KEY_CONSEQUENT]; }, $predicts); return array_map(function ($rule) {
return $rule[self::ARRAY_KEY_CONSEQUENT];
}, $predicts);
}
/**
* Generate rules for each k-length frequent item set.
*/
private function generateAllRules()
{
for ($k = 2; !empty($this->large[$k]); ++$k) {
foreach ($this->large[$k] as $frequent) {
$this->generateRules($frequent);
}
}
}
/**
* Generate confident rules for frequent item set.
*
* @param mixed[] $frequent
*/
private function generateRules(array $frequent)
{
foreach ($this->antecedents($frequent) as $antecedent) {
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
$consequent = array_values(array_diff($frequent, $antecedent));
$this->rules[] = [
self::ARRAY_KEY_ANTECEDENT => $antecedent,
self::ARRAY_KEY_CONSEQUENT => $consequent,
self::ARRAY_KEY_SUPPORT => $this->support($consequent),
self::ARRAY_KEY_CONFIDENCE => $confidence,
];
}
}
} }
/** /**
@ -135,7 +155,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function powerSet(array $sample) private function powerSet(array $sample) : array
{ {
$results = [[]]; $results = [[]];
foreach ($sample as $item) { foreach ($sample as $item) {
@ -154,7 +174,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function antecedents(array $sample) private function antecedents(array $sample) : array
{ {
$cardinality = count($sample); $cardinality = count($sample);
$antecedents = $this->powerSet($sample); $antecedents = $this->powerSet($sample);
@ -169,7 +189,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function items() private function items() : array
{ {
$items = []; $items = [];
@ -193,7 +213,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function frequent(array $samples) private function frequent(array $samples) : array
{ {
return array_filter($samples, function ($entry) { return array_filter($samples, function ($entry) {
return $this->support($entry) >= $this->support; return $this->support($entry) >= $this->support;
@ -207,7 +227,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function candidates(array $samples) private function candidates(array $samples) : array
{ {
$candidates = []; $candidates = [];
@ -244,7 +264,7 @@ class Apriori implements Associator
* *
* @return float * @return float
*/ */
private function confidence(array $set, array $subset) private function confidence(array $set, array $subset) : float
{ {
return $this->support($set) / $this->support($subset); return $this->support($set) / $this->support($subset);
} }
@ -259,7 +279,7 @@ class Apriori implements Associator
* *
* @return float * @return float
*/ */
private function support(array $sample) private function support(array $sample) : float
{ {
return $this->frequency($sample) / count($this->samples); return $this->frequency($sample) / count($this->samples);
} }
@ -273,7 +293,7 @@ class Apriori implements Associator
* *
* @return int * @return int
*/ */
private function frequency(array $sample) private function frequency(array $sample) : int
{ {
return count(array_filter($this->samples, function ($entry) use ($sample) { return count(array_filter($this->samples, function ($entry) use ($sample) {
return $this->subset($entry, $sample); return $this->subset($entry, $sample);
@ -290,7 +310,7 @@ class Apriori implements Associator
* *
* @return bool * @return bool
*/ */
private function contains(array $system, array $set) private function contains(array $system, array $set) : bool
{ {
return (bool) array_filter($system, function ($entry) use ($set) { return (bool) array_filter($system, function ($entry) use ($set) {
return $this->equals($entry, $set); return $this->equals($entry, $set);
@ -305,7 +325,7 @@ class Apriori implements Associator
* *
* @return bool * @return bool
*/ */
private function subset(array $set, array $subset) private function subset(array $set, array $subset) : bool
{ {
return !array_diff($subset, array_intersect($subset, $set)); return !array_diff($subset, array_intersect($subset, $set));
} }
@ -318,7 +338,7 @@ class Apriori implements Associator
* *
* @return bool * @return bool
*/ */
private function equals(array $set1, array $set2) private function equals(array $set1, array $set2) : bool
{ {
return array_diff($set1, $set2) == array_diff($set2, $set1); return array_diff($set1, $set2) == array_diff($set2, $set1);
} }

View File

@ -71,12 +71,12 @@ class AprioriTest extends \PHPUnit_Framework_TestCase
$this->assertTrue($this->invoke($apriori, 'contains', [$L[2], [3, 4]])); $this->assertTrue($this->invoke($apriori, 'contains', [$L[2], [3, 4]]));
} }
public function testRules() public function testGetRules()
{ {
$apriori = new Apriori(0.4, 0.8); $apriori = new Apriori(0.4, 0.8);
$apriori->train($this->sampleChars, []); $apriori->train($this->sampleChars, []);
$this->assertCount(19, $apriori->rules()); $this->assertCount(19, $apriori->getRules());
} }
public function testAntecedents() public function testAntecedents()