From 90038befa9e3505ca794ed14291dc8dd1d615b21 Mon Sep 17 00:00:00 2001 From: Patrick Florek Date: Fri, 2 Sep 2016 00:18:50 +0200 Subject: [PATCH] Apply comments / coding styles * Remove user-specific gitignore * Add return type hints * Avoid global namespace in docs * Rename rules -> getRules * Split up rule generation Todo: * Move set theory out to math * Extract rule generation --- .gitignore | 1 - docs/machine-learning/association/apriori.md | 12 ++- src/Phpml/Association/Apriori.php | 106 +++++++++++-------- tests/Phpml/Association/AprioriTest.php | 4 +- 4 files changed, 73 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index e85e1fd..8a409f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -/.idea/ /vendor/ humbuglog.* /bin/phpunit diff --git a/docs/machine-learning/association/apriori.md b/docs/machine-learning/association/apriori.md index c5986f4..544406e 100644 --- a/docs/machine-learning/association/apriori.md +++ b/docs/machine-learning/association/apriori.md @@ -8,7 +8,9 @@ Association rule learning based on [Apriori algorithm](https://en.wikipedia.org/ * $confidence - [confidence](https://en.wikipedia.org/wiki/Association_rule_learning#Confidence), minimum relative amount of item set in frequent item sets ``` -$associator = new \Phpml\Association\Apriori($support = 0.5, $confidence = 0.5); +use Phpml\Association\Apriori; + +$associator = new Apriori($support = 0.5, $confidence = 0.5); ``` ### Train @@ -19,7 +21,9 @@ To train a associator simply provide train samples and labels (as `array`). Exam $samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']]; $labels = []; -$associator = new \Phpml\Association\Apriori(0.5, 0.5); +use Phpml\Association\Apriori; + +$associator = new Apriori($support = 0.5, $confidence = 0.5); $associator->train($samples, $labels); ``` @@ -37,10 +41,10 @@ $associator->predict([['alpha','epsilon'],['beta','theta']]); ### Associating -Generating association rules simply use `rules` method. +Get generated association rules simply use `rules` method. ``` -$associator->rules(); +$associator->getRules(); // return [['antecedent' => ['alpha', 'theta'], 'consequent' => ['beta], 'support' => 1.0, 'confidence' => 1.0], ... ] ``` diff --git a/src/Phpml/Association/Apriori.php b/src/Phpml/Association/Apriori.php index bf52c27..4855691 100644 --- a/src/Phpml/Association/Apriori.php +++ b/src/Phpml/Association/Apriori.php @@ -1,6 +1,6 @@ support = $support; + $this->support = $support; $this->confidence = $confidence; } /** - * Generates apriori association rules. + * Get all association rules which are generated for every k-length frequent item set. * * @return mixed[][] */ - public function rules() + public function getRules() : array { if (!$this->large) { $this->large = $this->apriori(); @@ -76,33 +76,19 @@ class Apriori implements Associator $this->rules = []; - for ($k = 2; !empty($this->large[$k]); ++$k) { - foreach ($this->large[$k] as $frequent) { - foreach ($this->antecedents($frequent) as $antecedent) { - if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) { - $consequent = array_values(array_diff($frequent, $antecedent)); - $this->rules[] = [ - self::ARRAY_KEY_ANTECEDENT => $antecedent, - self::ARRAY_KEY_CONSEQUENT => $consequent, - self::ARRAY_KEY_SUPPORT => $this->support($consequent), - self::ARRAY_KEY_CONFIDENCE => $confidence, - ]; - } - } - } - } + $this->generateAllRules(); return $this->rules; } /** - * Generates frequent item sets + * Generates frequent item sets. * * @return mixed[][][] */ - public function apriori() + public function apriori() : array { - $L = []; + $L = []; $L[1] = $this->items(); $L[1] = $this->frequent($L[1]); @@ -119,13 +105,47 @@ class Apriori implements Associator * * @return mixed[][] */ - protected function predictSample(array $sample) + protected function predictSample(array $sample) : array { - $predicts = array_values(array_filter($this->rules(), function($rule) use ($sample) { + $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) { return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample); })); - return array_map(function($rule) { return $rule[self::ARRAY_KEY_CONSEQUENT]; }, $predicts); + return array_map(function ($rule) { + return $rule[self::ARRAY_KEY_CONSEQUENT]; + }, $predicts); + } + + /** + * Generate rules for each k-length frequent item set. + */ + private function generateAllRules() + { + for ($k = 2; !empty($this->large[$k]); ++$k) { + foreach ($this->large[$k] as $frequent) { + $this->generateRules($frequent); + } + } + } + + /** + * Generate confident rules for frequent item set. + * + * @param mixed[] $frequent + */ + private function generateRules(array $frequent) + { + foreach ($this->antecedents($frequent) as $antecedent) { + if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) { + $consequent = array_values(array_diff($frequent, $antecedent)); + $this->rules[] = [ + self::ARRAY_KEY_ANTECEDENT => $antecedent, + self::ARRAY_KEY_CONSEQUENT => $consequent, + self::ARRAY_KEY_SUPPORT => $this->support($consequent), + self::ARRAY_KEY_CONFIDENCE => $confidence, + ]; + } + } } /** @@ -135,7 +155,7 @@ class Apriori implements Associator * * @return mixed[][] */ - private function powerSet(array $sample) + private function powerSet(array $sample) : array { $results = [[]]; foreach ($sample as $item) { @@ -154,12 +174,12 @@ class Apriori implements Associator * * @return mixed[][] */ - private function antecedents(array $sample) + private function antecedents(array $sample) : array { $cardinality = count($sample); $antecedents = $this->powerSet($sample); - return array_filter($antecedents, function($antecedent) use ($cardinality) { + return array_filter($antecedents, function ($antecedent) use ($cardinality) { return (count($antecedent) != $cardinality) && ($antecedent != []); }); } @@ -169,7 +189,7 @@ class Apriori implements Associator * * @return mixed[][] */ - private function items() + private function items() : array { $items = []; @@ -181,7 +201,7 @@ class Apriori implements Associator } } - return array_map(function($entry) { + return array_map(function ($entry) { return [$entry]; }, $items); } @@ -193,9 +213,9 @@ class Apriori implements Associator * * @return mixed[][] */ - private function frequent(array $samples) + private function frequent(array $samples) : array { - return array_filter($samples, function($entry) { + return array_filter($samples, function ($entry) { return $this->support($entry) >= $this->support; }); } @@ -207,7 +227,7 @@ class Apriori implements Associator * * @return mixed[][] */ - private function candidates(array $samples) + private function candidates(array $samples) : array { $candidates = []; @@ -223,7 +243,7 @@ class Apriori implements Associator continue; } - foreach ((array)$this->samples as $sample) { + foreach ((array) $this->samples as $sample) { if ($this->subset($sample, $candidate)) { $candidates[] = $candidate; continue 2; @@ -244,7 +264,7 @@ class Apriori implements Associator * * @return float */ - private function confidence(array $set, array $subset) + private function confidence(array $set, array $subset) : float { return $this->support($set) / $this->support($subset); } @@ -259,7 +279,7 @@ class Apriori implements Associator * * @return float */ - private function support(array $sample) + private function support(array $sample) : float { return $this->frequency($sample) / count($this->samples); } @@ -273,9 +293,9 @@ class Apriori implements Associator * * @return int */ - private function frequency(array $sample) + private function frequency(array $sample) : int { - return count(array_filter($this->samples, function($entry) use ($sample) { + return count(array_filter($this->samples, function ($entry) use ($sample) { return $this->subset($entry, $sample); })); } @@ -290,9 +310,9 @@ class Apriori implements Associator * * @return bool */ - private function contains(array $system, array $set) + private function contains(array $system, array $set) : bool { - return (bool)array_filter($system, function($entry) use ($set) { + return (bool) array_filter($system, function ($entry) use ($set) { return $this->equals($entry, $set); }); } @@ -305,7 +325,7 @@ class Apriori implements Associator * * @return bool */ - private function subset(array $set, array $subset) + private function subset(array $set, array $subset) : bool { return !array_diff($subset, array_intersect($subset, $set)); } @@ -318,7 +338,7 @@ class Apriori implements Associator * * @return bool */ - private function equals(array $set1, array $set2) + private function equals(array $set1, array $set2) : bool { return array_diff($set1, $set2) == array_diff($set2, $set1); } diff --git a/tests/Phpml/Association/AprioriTest.php b/tests/Phpml/Association/AprioriTest.php index 9cc595d..b249ff6 100644 --- a/tests/Phpml/Association/AprioriTest.php +++ b/tests/Phpml/Association/AprioriTest.php @@ -71,12 +71,12 @@ class AprioriTest extends \PHPUnit_Framework_TestCase $this->assertTrue($this->invoke($apriori, 'contains', [$L[2], [3, 4]])); } - public function testRules() + public function testGetRules() { $apriori = new Apriori(0.4, 0.8); $apriori->train($this->sampleChars, []); - $this->assertCount(19, $apriori->rules()); + $this->assertCount(19, $apriori->getRules()); } public function testAntecedents()