2016-08-23 13:44:53 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-08-23 13:44:53 +00:00
|
|
|
|
|
|
|
namespace Phpml\Association;
|
|
|
|
|
|
|
|
use Phpml\Helper\Predictable;
|
|
|
|
use Phpml\Helper\Trainable;
|
|
|
|
|
|
|
|
class Apriori implements Associator
|
|
|
|
{
|
|
|
|
use Trainable, Predictable;
|
|
|
|
|
|
|
|
const ARRAY_KEY_ANTECEDENT = 'antecedent';
|
|
|
|
|
|
|
|
const ARRAY_KEY_CONFIDENCE = 'confidence';
|
|
|
|
|
|
|
|
const ARRAY_KEY_CONSEQUENT = 'consequent';
|
|
|
|
|
|
|
|
const ARRAY_KEY_SUPPORT = 'support';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Minimum relative probability of frequent transactions.
|
|
|
|
*
|
|
|
|
* @var float
|
|
|
|
*/
|
|
|
|
private $confidence;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The large set contains frequent k-length item sets.
|
|
|
|
*
|
|
|
|
* @var mixed[][][]
|
|
|
|
*/
|
|
|
|
private $large;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Minimum relative frequency of transactions.
|
|
|
|
*
|
|
|
|
* @var float
|
|
|
|
*/
|
|
|
|
private $support;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The generated Apriori association rules.
|
|
|
|
*
|
|
|
|
* @var mixed[][]
|
|
|
|
*/
|
|
|
|
private $rules;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Apriori constructor.
|
|
|
|
*
|
|
|
|
* @param float $support
|
|
|
|
* @param float $confidence
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
public function __construct(float $support = 0.0, float $confidence = 0.0)
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
2016-09-01 22:18:50 +00:00
|
|
|
$this->support = $support;
|
2016-08-23 13:44:53 +00:00
|
|
|
$this->confidence = $confidence;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-09-01 22:18:50 +00:00
|
|
|
* Get all association rules which are generated for every k-length frequent item set.
|
2016-08-23 13:44:53 +00:00
|
|
|
*
|
|
|
|
* @return mixed[][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
public function getRules() : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
if (!$this->large) {
|
|
|
|
$this->large = $this->apriori();
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->rules) {
|
|
|
|
return $this->rules;
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->rules = [];
|
|
|
|
|
2016-09-01 22:18:50 +00:00
|
|
|
$this->generateAllRules();
|
2016-08-23 13:44:53 +00:00
|
|
|
|
|
|
|
return $this->rules;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2016-09-01 22:18:50 +00:00
|
|
|
* Generates frequent item sets.
|
2016-08-23 13:44:53 +00:00
|
|
|
*
|
|
|
|
* @return mixed[][][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
public function apriori() : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
2016-09-01 22:18:50 +00:00
|
|
|
$L = [];
|
2016-08-23 13:44:53 +00:00
|
|
|
$L[1] = $this->items();
|
|
|
|
$L[1] = $this->frequent($L[1]);
|
|
|
|
|
|
|
|
for ($k = 2; !empty($L[$k - 1]); ++$k) {
|
|
|
|
$L[$k] = $this->candidates($L[$k - 1]);
|
|
|
|
$L[$k] = $this->frequent($L[$k]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $L;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param mixed[] $sample
|
|
|
|
*
|
|
|
|
* @return mixed[][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
protected function predictSample(array $sample) : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
2016-09-01 22:18:50 +00:00
|
|
|
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
|
2016-08-23 13:44:53 +00:00
|
|
|
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
|
|
|
|
}));
|
|
|
|
|
2016-09-01 22:18:50 +00:00
|
|
|
return array_map(function ($rule) {
|
|
|
|
return $rule[self::ARRAY_KEY_CONSEQUENT];
|
|
|
|
}, $predicts);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate rules for each k-length frequent item set.
|
|
|
|
*/
|
|
|
|
private function generateAllRules()
|
|
|
|
{
|
|
|
|
for ($k = 2; !empty($this->large[$k]); ++$k) {
|
|
|
|
foreach ($this->large[$k] as $frequent) {
|
|
|
|
$this->generateRules($frequent);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generate confident rules for frequent item set.
|
|
|
|
*
|
|
|
|
* @param mixed[] $frequent
|
|
|
|
*/
|
|
|
|
private function generateRules(array $frequent)
|
|
|
|
{
|
|
|
|
foreach ($this->antecedents($frequent) as $antecedent) {
|
|
|
|
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
|
|
|
|
$consequent = array_values(array_diff($frequent, $antecedent));
|
|
|
|
$this->rules[] = [
|
|
|
|
self::ARRAY_KEY_ANTECEDENT => $antecedent,
|
|
|
|
self::ARRAY_KEY_CONSEQUENT => $consequent,
|
|
|
|
self::ARRAY_KEY_SUPPORT => $this->support($consequent),
|
|
|
|
self::ARRAY_KEY_CONFIDENCE => $confidence,
|
|
|
|
];
|
|
|
|
}
|
|
|
|
}
|
2016-08-23 13:44:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generates the power set for given item set $sample.
|
|
|
|
*
|
|
|
|
* @param mixed[] $sample
|
|
|
|
*
|
|
|
|
* @return mixed[][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function powerSet(array $sample) : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
$results = [[]];
|
|
|
|
foreach ($sample as $item) {
|
|
|
|
foreach ($results as $combination) {
|
|
|
|
$results[] = array_merge(array($item), $combination);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $results;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Generates all proper subsets for given set $sample without the empty set.
|
|
|
|
*
|
|
|
|
* @param mixed[] $sample
|
|
|
|
*
|
|
|
|
* @return mixed[][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function antecedents(array $sample) : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
$cardinality = count($sample);
|
|
|
|
$antecedents = $this->powerSet($sample);
|
|
|
|
|
2016-09-01 22:18:50 +00:00
|
|
|
return array_filter($antecedents, function ($antecedent) use ($cardinality) {
|
2016-08-23 13:44:53 +00:00
|
|
|
return (count($antecedent) != $cardinality) && ($antecedent != []);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates frequent k = 1 item sets.
|
|
|
|
*
|
|
|
|
* @return mixed[][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function items() : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
$items = [];
|
|
|
|
|
|
|
|
foreach ($this->samples as $sample) {
|
|
|
|
foreach ($sample as $item) {
|
|
|
|
if (!in_array($item, $items, true)) {
|
|
|
|
$items[] = $item;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-01 22:18:50 +00:00
|
|
|
return array_map(function ($entry) {
|
2016-08-23 13:44:53 +00:00
|
|
|
return [$entry];
|
|
|
|
}, $items);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns frequent item sets only.
|
|
|
|
*
|
|
|
|
* @param mixed[][] $samples
|
|
|
|
*
|
|
|
|
* @return mixed[][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function frequent(array $samples) : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
2016-09-01 22:18:50 +00:00
|
|
|
return array_filter($samples, function ($entry) {
|
2016-08-23 13:44:53 +00:00
|
|
|
return $this->support($entry) >= $this->support;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates frequent k item sets, where count($samples) == $k - 1.
|
|
|
|
*
|
|
|
|
* @param mixed[][] $samples
|
|
|
|
*
|
|
|
|
* @return mixed[][]
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function candidates(array $samples) : array
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
$candidates = [];
|
|
|
|
|
|
|
|
foreach ($samples as $p) {
|
|
|
|
foreach ($samples as $q) {
|
|
|
|
if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$candidate = array_unique(array_merge($p, $q));
|
|
|
|
|
|
|
|
if ($this->contains($candidates, $candidate)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-09-01 22:18:50 +00:00
|
|
|
foreach ((array) $this->samples as $sample) {
|
2016-08-23 13:44:53 +00:00
|
|
|
if ($this->subset($sample, $candidate)) {
|
|
|
|
$candidates[] = $candidate;
|
|
|
|
continue 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $candidates;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
|
|
|
|
* $set.
|
|
|
|
*
|
|
|
|
* @param mixed[] $set
|
|
|
|
* @param mixed[] $subset
|
|
|
|
*
|
|
|
|
* @return float
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function confidence(array $set, array $subset) : float
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
return $this->support($set) / $this->support($subset);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
|
|
|
|
* pool.
|
|
|
|
*
|
|
|
|
* @see \Phpml\Association\Apriori::samples
|
|
|
|
*
|
|
|
|
* @param mixed[] $sample
|
|
|
|
*
|
|
|
|
* @return float
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function support(array $sample) : float
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
return $this->frequency($sample) / count($this->samples);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Counts occurrences of $sample as subset in data pool.
|
|
|
|
*
|
|
|
|
* @see \Phpml\Association\Apriori::samples
|
|
|
|
*
|
|
|
|
* @param mixed[] $sample
|
|
|
|
*
|
|
|
|
* @return int
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function frequency(array $sample) : int
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
2016-09-01 22:18:50 +00:00
|
|
|
return count(array_filter($this->samples, function ($entry) use ($sample) {
|
2016-08-23 13:44:53 +00:00
|
|
|
return $this->subset($entry, $sample);
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true if set is an element of system.
|
|
|
|
*
|
|
|
|
* @see \Phpml\Association\Apriori::equals()
|
|
|
|
*
|
|
|
|
* @param mixed[][] $system
|
|
|
|
* @param mixed[] $set
|
|
|
|
*
|
|
|
|
* @return bool
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function contains(array $system, array $set) : bool
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
2016-09-01 22:18:50 +00:00
|
|
|
return (bool) array_filter($system, function ($entry) use ($set) {
|
2016-08-23 13:44:53 +00:00
|
|
|
return $this->equals($entry, $set);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true if subset is a (proper) subset of set by its items string representation.
|
|
|
|
*
|
|
|
|
* @param mixed[] $set
|
|
|
|
* @param mixed[] $subset
|
|
|
|
*
|
|
|
|
* @return bool
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function subset(array $set, array $subset) : bool
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
return !array_diff($subset, array_intersect($subset, $set));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true if string representation of items does not differ.
|
|
|
|
*
|
|
|
|
* @param mixed[] $set1
|
|
|
|
* @param mixed[] $set2
|
|
|
|
*
|
|
|
|
* @return bool
|
|
|
|
*/
|
2016-09-01 22:18:50 +00:00
|
|
|
private function equals(array $set1, array $set2) : bool
|
2016-08-23 13:44:53 +00:00
|
|
|
{
|
|
|
|
return array_diff($set1, $set2) == array_diff($set2, $set1);
|
|
|
|
}
|
|
|
|
}
|