php-ml/src/Association/Apriori.php

333 lines
8.2 KiB
PHP

<?php
declare(strict_types=1);
namespace Phpml\Association;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
class Apriori implements Associator
{
use Trainable;
use Predictable;
public const ARRAY_KEY_ANTECEDENT = 'antecedent';
public const ARRAY_KEY_CONFIDENCE = 'confidence';
public const ARRAY_KEY_CONSEQUENT = 'consequent';
public const ARRAY_KEY_SUPPORT = 'support';
/**
* Minimum relative probability of frequent transactions.
*
* @var float
*/
private $confidence;
/**
* The large set contains frequent k-length item sets.
*
* @var mixed[][][]
*/
private $large = [];
/**
* Minimum relative frequency of transactions.
*
* @var float
*/
private $support;
/**
* The generated Apriori association rules.
*
* @var mixed[][]
*/
private $rules = [];
/**
* Apriori constructor.
*/
public function __construct(float $support = 0.0, float $confidence = 0.0)
{
$this->support = $support;
$this->confidence = $confidence;
}
/**
* Get all association rules which are generated for every k-length frequent item set.
*
* @return mixed[][]
*/
public function getRules(): array
{
if (count($this->large) === 0) {
$this->large = $this->apriori();
}
if (count($this->rules) > 0) {
return $this->rules;
}
$this->rules = [];
$this->generateAllRules();
return $this->rules;
}
/**
* Generates frequent item sets.
*
* @return mixed[][][]
*/
public function apriori(): array
{
$L = [];
$items = $this->frequent($this->items());
for ($k = 1; isset($items[0]); ++$k) {
$L[$k] = $items;
$items = $this->frequent($this->candidates($items));
}
return $L;
}
/**
* @param mixed[] $sample
*
* @return mixed[][]
*/
protected function predictSample(array $sample): array
{
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool {
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
}));
return array_map(static function ($rule) {
return $rule[self::ARRAY_KEY_CONSEQUENT];
}, $predicts);
}
/**
* Generate rules for each k-length frequent item set.
*/
private function generateAllRules(): void
{
for ($k = 2; isset($this->large[$k]); ++$k) {
foreach ($this->large[$k] as $frequent) {
$this->generateRules($frequent);
}
}
}
/**
* Generate confident rules for frequent item set.
*
* @param mixed[] $frequent
*/
private function generateRules(array $frequent): void
{
foreach ($this->antecedents($frequent) as $antecedent) {
$confidence = $this->confidence($frequent, $antecedent);
if ($this->confidence <= $confidence) {
$consequent = array_values(array_diff($frequent, $antecedent));
$this->rules[] = [
self::ARRAY_KEY_ANTECEDENT => $antecedent,
self::ARRAY_KEY_CONSEQUENT => $consequent,
self::ARRAY_KEY_SUPPORT => $this->support($frequent),
self::ARRAY_KEY_CONFIDENCE => $confidence,
];
}
}
}
/**
* Generates the power set for given item set $sample.
*
* @param mixed[] $sample
*
* @return mixed[][]
*/
private function powerSet(array $sample): array
{
$results = [[]];
foreach ($sample as $item) {
foreach ($results as $combination) {
$results[] = array_merge([$item], $combination);
}
}
return $results;
}
/**
* Generates all proper subsets for given set $sample without the empty set.
*
* @param mixed[] $sample
*
* @return mixed[][]
*/
private function antecedents(array $sample): array
{
$cardinality = count($sample);
$antecedents = $this->powerSet($sample);
return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool {
return (count($antecedent) != $cardinality) && ($antecedent != []);
});
}
/**
* Calculates frequent k = 1 item sets.
*
* @return mixed[][]
*/
private function items(): array
{
$items = [];
foreach ($this->samples as $sample) {
foreach ($sample as $item) {
if (!in_array($item, $items, true)) {
$items[] = $item;
}
}
}
return array_map(static function ($entry): array {
return [$entry];
}, $items);
}
/**
* Returns frequent item sets only.
*
* @param mixed[][] $samples
*
* @return mixed[][]
*/
private function frequent(array $samples): array
{
return array_values(array_filter($samples, function ($entry): bool {
return $this->support($entry) >= $this->support;
}));
}
/**
* Calculates frequent k item sets, where count($samples) == $k - 1.
*
* @param mixed[][] $samples
*
* @return mixed[][]
*/
private function candidates(array $samples): array
{
$candidates = [];
foreach ($samples as $p) {
foreach ($samples as $q) {
if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
continue;
}
$candidate = array_values(array_unique(array_merge($p, $q)));
if ($this->contains($candidates, $candidate)) {
continue;
}
foreach ($this->samples as $sample) {
if ($this->subset($sample, $candidate)) {
$candidates[] = $candidate;
continue 2;
}
}
}
}
return $candidates;
}
/**
* Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
* $set.
*
* @param mixed[] $set
* @param mixed[] $subset
*/
private function confidence(array $set, array $subset): float
{
return $this->support($set) / $this->support($subset);
}
/**
* Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
* pool.
*
* @see \Phpml\Association\Apriori::samples
*
* @param mixed[] $sample
*/
private function support(array $sample): float
{
return $this->frequency($sample) / count($this->samples);
}
/**
* Counts occurrences of $sample as subset in data pool.
*
* @see \Phpml\Association\Apriori::samples
*
* @param mixed[] $sample
*/
private function frequency(array $sample): int
{
return count(array_filter($this->samples, function ($entry) use ($sample): bool {
return $this->subset($entry, $sample);
}));
}
/**
* Returns true if set is an element of system.
*
* @see \Phpml\Association\Apriori::equals()
*
* @param mixed[][] $system
* @param mixed[] $set
*/
private function contains(array $system, array $set): bool
{
return (bool) array_filter($system, function ($entry) use ($set): bool {
return $this->equals($entry, $set);
});
}
/**
* Returns true if subset is a (proper) subset of set by its items string representation.
*
* @param mixed[] $set
* @param mixed[] $subset
*/
private function subset(array $set, array $subset): bool
{
return count(array_diff($subset, array_intersect($subset, $set))) === 0;
}
/**
* Returns true if string representation of items does not differ.
*
* @param mixed[] $set1
* @param mixed[] $set2
*/
private function equals(array $set1, array $set2): bool
{
return array_diff($set1, $set2) == array_diff($set2, $set1);
}
}