Added EasyCodingStandard + lots of code fixes (#156)

* travis: move coveralls here, decouple from package

* composer: use PSR4

* phpunit: simpler config

* travis: add ecs run

* composer: add ecs dev

* use standard vendor/bin directory for dependency bins, confuses with local bins and require gitignore handling

* ecs: add PSR2

* [cs] PSR2 spacing fixes

* [cs] PSR2 class name fix

* [cs] PHP7 fixes - return semicolon spaces, old rand functions, typehints

* [cs] fix less strict typehints

* fix typehints to make tests pass

* ecs: ignore typehint-less elements

* [cs] standardize arrays

* [cs] standardize docblock, remove unused comments

* [cs] use self where possible

* [cs] sort class elements, from public to private

* [cs] do not use yoda (found less yoda-cases, than non-yoda)

* space

* [cs] do not assign in condition

* [cs] use namespace imports if possible

* [cs] use ::class over strings

* [cs] fix defaults for arrays properties, properties and constants single spacing

* cleanup ecs comments

* [cs] use item per line in multi-items array

* missing line

* misc

* rebase
This commit is contained in:
Tomáš Votruba 2017-11-22 22:16:10 +01:00 committed by Arkadiusz Kondas
parent b1d40bfa30
commit 726cf4cddf
139 changed files with 3080 additions and 1514 deletions

4
.gitignore vendored
View File

@ -1,8 +1,4 @@
/vendor/
humbuglog.*
/bin/phpunit
.coverage
.php_cs.cache
/bin/php-cs-fixer
/bin/coveralls
/build

View File

@ -6,7 +6,7 @@ matrix:
include:
- os: linux
php: '7.1'
env: DISABLE_XDEBUG="true"
env: DISABLE_XDEBUG="true" STATIC_ANALYSIS="true"
- os: linux
php: '7.2'
@ -21,7 +21,7 @@ matrix:
before_install:
- if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/prepare_osx_env.sh ; fi
- if [[ DISABLE_XDEBUG == "true" ]]; then phpenv config-rm xdebug.ini; fi
- if [[ $DISABLE_XDEBUG == "true" ]]; then phpenv config-rm xdebug.ini; fi
install:
- if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/handle_brew_pkg.sh "${_PHP}" ; fi
@ -29,10 +29,12 @@ install:
- php composer.phar install --dev --no-interaction --ignore-platform-reqs
script:
- bin/phpunit $PHPUNIT_FLAGS
- vendor/bin/phpunit $PHPUNIT_FLAGS
- if [[ $STATIC_ANALYSIS != "" ]]; then vendor/bin/ecs check src tests; fi
after_success:
- |
if [[ $PHPUNIT_FLAGS != "" ]]; then
php bin/coveralls -v
wget https://github.com/satooshi/php-coveralls/releases/download/v1.0.1/coveralls.phar;
php coveralls.phar --verbose;
fi

View File

@ -12,8 +12,8 @@
}
],
"autoload": {
"psr-0": {
"Phpml": "src/"
"psr-4": {
"Phpml\\": "src/Phpml"
}
},
"require": {
@ -22,9 +22,8 @@
"require-dev": {
"phpunit/phpunit": "^6.0",
"friendsofphp/php-cs-fixer": "^2.4",
"php-coveralls/php-coveralls": "^1.0"
},
"config": {
"bin-dir": "bin"
"symplify/easy-coding-standard": "dev-master as 2.5",
"symplify/coding-standard": "dev-master as 2.5",
"symplify/package-builder": "dev-master#3604bea as 2.5"
}
}

1968
composer.lock generated

File diff suppressed because it is too large Load Diff

39
easy-coding-standard.neon Normal file
View File

@ -0,0 +1,39 @@
includes:
- vendor/symplify/easy-coding-standard/config/psr2.neon
- vendor/symplify/easy-coding-standard/config/php70.neon
- vendor/symplify/easy-coding-standard/config/clean-code.neon
- vendor/symplify/easy-coding-standard/config/common/array.neon
- vendor/symplify/easy-coding-standard/config/common/docblock.neon
- vendor/symplify/easy-coding-standard/config/common/namespaces.neon
- vendor/symplify/easy-coding-standard/config/common/control-structures.neon
# many errors, need help
#- vendor/symplify/easy-coding-standard/config/common/strict.neon
checkers:
- Symplify\CodingStandard\Fixer\Import\ImportNamespacedNameFixer
- Symplify\CodingStandard\Fixer\Php\ClassStringToClassConstantFixer
- Symplify\CodingStandard\Fixer\Property\ArrayPropertyDefaultValueFixer
- Symplify\CodingStandard\Fixer\ClassNotation\PropertyAndConstantSeparationFixer
- Symplify\CodingStandard\Fixer\ArrayNotation\StandaloneLineInMultilineArrayFixer
parameters:
exclude_checkers:
# from strict.neon
- PhpCsFixer\Fixer\PhpUnit\PhpUnitStrictFixer
skip:
PhpCsFixer\Fixer\Alias\RandomApiMigrationFixer:
# random_int() breaks code
- src/Phpml/CrossValidation/RandomSplit.php
SlevomatCodingStandard\Sniffs\Classes\UnusedPrivateElementsSniff:
# magic calls
- src/Phpml/Preprocessing/Normalizer.php
skip_codes:
# missing typehints
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingParameterTypeHint
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableParameterTypeHintSpecification
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingReturnTypeHint
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableReturnTypeHintSpecification
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingPropertyTypeHint
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversablePropertyTypeHintSpecification

View File

@ -6,11 +6,9 @@
beStrictAboutTestSize="true"
beStrictAboutChangesToGlobalState="true"
>
<testsuites>
<testsuite name="PHP-ML Test Suite">
<directory>tests/*</directory>
</testsuite>
</testsuites>
<filter>
<whitelist processUncoveredFilesFromWhitelist="true">

View File

@ -31,7 +31,7 @@ class Apriori implements Associator
*
* @var mixed[][][]
*/
private $large;
private $large = [];
/**
* Minimum relative frequency of transactions.
@ -45,7 +45,7 @@ class Apriori implements Associator
*
* @var mixed[][]
*/
private $rules;
private $rules = [];
/**
* Apriori constructor.
@ -61,7 +61,7 @@ class Apriori implements Associator
*
* @return mixed[][]
*/
public function getRules() : array
public function getRules(): array
{
if (!$this->large) {
$this->large = $this->apriori();
@ -83,7 +83,7 @@ class Apriori implements Associator
*
* @return mixed[][][]
*/
public function apriori() : array
public function apriori(): array
{
$L = [];
$L[1] = $this->items();
@ -102,7 +102,7 @@ class Apriori implements Associator
*
* @return mixed[][]
*/
protected function predictSample(array $sample) : array
protected function predictSample(array $sample): array
{
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
@ -133,7 +133,8 @@ class Apriori implements Associator
private function generateRules(array $frequent): void
{
foreach ($this->antecedents($frequent) as $antecedent) {
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
$confidence = $this->confidence($frequent, $antecedent);
if ($this->confidence <= $confidence) {
$consequent = array_values(array_diff($frequent, $antecedent));
$this->rules[] = [
self::ARRAY_KEY_ANTECEDENT => $antecedent,
@ -152,7 +153,7 @@ class Apriori implements Associator
*
* @return mixed[][]
*/
private function powerSet(array $sample) : array
private function powerSet(array $sample): array
{
$results = [[]];
foreach ($sample as $item) {
@ -171,7 +172,7 @@ class Apriori implements Associator
*
* @return mixed[][]
*/
private function antecedents(array $sample) : array
private function antecedents(array $sample): array
{
$cardinality = count($sample);
$antecedents = $this->powerSet($sample);
@ -186,7 +187,7 @@ class Apriori implements Associator
*
* @return mixed[][]
*/
private function items() : array
private function items(): array
{
$items = [];
@ -210,7 +211,7 @@ class Apriori implements Associator
*
* @return mixed[][]
*/
private function frequent(array $samples) : array
private function frequent(array $samples): array
{
return array_filter($samples, function ($entry) {
return $this->support($entry) >= $this->support;
@ -224,7 +225,7 @@ class Apriori implements Associator
*
* @return mixed[][]
*/
private function candidates(array $samples) : array
private function candidates(array $samples): array
{
$candidates = [];
@ -259,7 +260,7 @@ class Apriori implements Associator
* @param mixed[] $set
* @param mixed[] $subset
*/
private function confidence(array $set, array $subset) : float
private function confidence(array $set, array $subset): float
{
return $this->support($set) / $this->support($subset);
}
@ -272,7 +273,7 @@ class Apriori implements Associator
*
* @param mixed[] $sample
*/
private function support(array $sample) : float
private function support(array $sample): float
{
return $this->frequency($sample) / count($this->samples);
}
@ -284,7 +285,7 @@ class Apriori implements Associator
*
* @param mixed[] $sample
*/
private function frequency(array $sample) : int
private function frequency(array $sample): int
{
return count(array_filter($this->samples, function ($entry) use ($sample) {
return $this->subset($entry, $sample);
@ -299,7 +300,7 @@ class Apriori implements Associator
* @param mixed[][] $system
* @param mixed[] $set
*/
private function contains(array $system, array $set) : bool
private function contains(array $system, array $set): bool
{
return (bool) array_filter($system, function ($entry) use ($set) {
return $this->equals($entry, $set);
@ -312,7 +313,7 @@ class Apriori implements Associator
* @param mixed[] $set
* @param mixed[] $subset
*/
private function subset(array $set, array $subset) : bool
private function subset(array $set, array $subset): bool
{
return !array_diff($subset, array_intersect($subset, $set));
}
@ -323,7 +324,7 @@ class Apriori implements Associator
* @param mixed[] $set1
* @param mixed[] $set2
*/
private function equals(array $set1, array $set2) : bool
private function equals(array $set1, array $set2): bool
{
return array_diff($set1, $set2) == array_diff($set2, $set1);
}

View File

@ -15,22 +15,18 @@ class DecisionTree implements Classifier
use Trainable, Predictable;
public const CONTINUOUS = 1;
public const NOMINAL = 2;
/**
* @var array
*/
protected $columnTypes;
/**
* @var array
*/
private $labels = [];
/**
* @var int
*/
private $featureCount = 0;
public $actualDepth = 0;
/**
* @var array
*/
protected $columnTypes = [];
/**
* @var DecisionTreeLeaf
@ -42,10 +38,15 @@ class DecisionTree implements Classifier
*/
protected $maxDepth;
/**
* @var array
*/
private $labels = [];
/**
* @var int
*/
public $actualDepth = 0;
private $featureCount = 0;
/**
* @var int
@ -55,7 +56,7 @@ class DecisionTree implements Classifier
/**
* @var array
*/
private $selectedFeatures;
private $selectedFeatures = [];
/**
* @var array
@ -100,7 +101,7 @@ class DecisionTree implements Classifier
}
}
public static function getColumnTypes(array $samples) : array
public static function getColumnTypes(array $samples): array
{
$types = [];
$featureCount = count($samples[0]);
@ -113,7 +114,122 @@ class DecisionTree implements Classifier
return $types;
}
protected function getSplitLeaf(array $records, int $depth = 0) : DecisionTreeLeaf
/**
* @param mixed $baseValue
*/
public function getGiniIndex($baseValue, array $colValues, array $targets): float
{
$countMatrix = [];
foreach ($this->labels as $label) {
$countMatrix[$label] = [0, 0];
}
foreach ($colValues as $index => $value) {
$label = $targets[$index];
$rowIndex = $value === $baseValue ? 0 : 1;
++$countMatrix[$label][$rowIndex];
}
$giniParts = [0, 0];
for ($i = 0; $i <= 1; ++$i) {
$part = 0;
$sum = array_sum(array_column($countMatrix, $i));
if ($sum > 0) {
foreach ($this->labels as $label) {
$part += pow($countMatrix[$label][$i] / (float) $sum, 2);
}
}
$giniParts[$i] = (1 - $part) * $sum;
}
return array_sum($giniParts) / count($colValues);
}
/**
* This method is used to set number of columns to be used
* when deciding a split at an internal node of the tree. <br>
* If the value is given 0, then all features are used (default behaviour),
* otherwise the given value will be used as a maximum for number of columns
* randomly selected for each split operation.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setNumFeatures(int $numFeatures)
{
if ($numFeatures < 0) {
throw new InvalidArgumentException('Selected column count should be greater or equal to zero');
}
$this->numUsableFeatures = $numFeatures;
return $this;
}
/**
* A string array to represent columns. Useful when HTML output or
* column importances are desired to be inspected.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setColumnNames(array $names)
{
if ($this->featureCount !== 0 && count($names) !== $this->featureCount) {
throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount));
}
$this->columnNames = $names;
return $this;
}
public function getHtml(): string
{
return $this->tree->getHTML($this->columnNames);
}
/**
* This will return an array including an importance value for
* each column in the given dataset. The importance values are
* normalized and their total makes 1.<br/>
*/
public function getFeatureImportances(): array
{
if ($this->featureImportances !== null) {
return $this->featureImportances;
}
$sampleCount = count($this->samples);
$this->featureImportances = [];
foreach ($this->columnNames as $column => $columnName) {
$nodes = $this->getSplitNodesByColumn($column, $this->tree);
$importance = 0;
foreach ($nodes as $node) {
$importance += $node->getNodeImpurityDecrease($sampleCount);
}
$this->featureImportances[$columnName] = $importance;
}
// Normalize & sort the importances
$total = array_sum($this->featureImportances);
if ($total > 0) {
foreach ($this->featureImportances as &$importance) {
$importance /= $total;
}
arsort($this->featureImportances);
}
return $this->featureImportances;
}
protected function getSplitLeaf(array $records, int $depth = 0): DecisionTreeLeaf
{
$split = $this->getBestSplit($records);
$split->level = $depth;
@ -136,6 +252,7 @@ class DecisionTree implements Classifier
if ($prevRecord && $prevRecord != $record) {
$allSame = false;
}
$prevRecord = $record;
// According to the split criteron, this record will
@ -163,6 +280,7 @@ class DecisionTree implements Classifier
if ($leftRecords) {
$split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1);
}
if ($rightRecords) {
$split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1);
}
@ -171,7 +289,7 @@ class DecisionTree implements Classifier
return $split;
}
protected function getBestSplit(array $records) : DecisionTreeLeaf
protected function getBestSplit(array $records): DecisionTreeLeaf
{
$targets = array_intersect_key($this->targets, array_flip($records));
$samples = array_intersect_key($this->samples, array_flip($records));
@ -184,6 +302,7 @@ class DecisionTree implements Classifier
foreach ($samples as $index => $row) {
$colValues[$index] = $row[$i];
}
$counts = array_count_values($colValues);
arsort($counts);
$baseValue = key($counts);
@ -227,7 +346,7 @@ class DecisionTree implements Classifier
* If any of above methods were not called beforehand, then all features
* are returned by default.
*/
protected function getSelectedFeatures() : array
protected function getSelectedFeatures(): array
{
$allFeatures = range(0, $this->featureCount - 1);
if ($this->numUsableFeatures === 0 && !$this->selectedFeatures) {
@ -242,6 +361,7 @@ class DecisionTree implements Classifier
if ($numFeatures > $this->featureCount) {
$numFeatures = $this->featureCount;
}
shuffle($allFeatures);
$selectedFeatures = array_slice($allFeatures, 0, $numFeatures, false);
sort($selectedFeatures);
@ -249,39 +369,7 @@ class DecisionTree implements Classifier
return $selectedFeatures;
}
/**
* @param mixed $baseValue
*/
public function getGiniIndex($baseValue, array $colValues, array $targets) : float
{
$countMatrix = [];
foreach ($this->labels as $label) {
$countMatrix[$label] = [0, 0];
}
foreach ($colValues as $index => $value) {
$label = $targets[$index];
$rowIndex = $value === $baseValue ? 0 : 1;
++$countMatrix[$label][$rowIndex];
}
$giniParts = [0, 0];
for ($i = 0; $i <= 1; ++$i) {
$part = 0;
$sum = array_sum(array_column($countMatrix, $i));
if ($sum > 0) {
foreach ($this->labels as $label) {
$part += pow($countMatrix[$label][$i] / (float) $sum, 2);
}
}
$giniParts[$i] = (1 - $part) * $sum;
}
return array_sum($giniParts) / count($colValues);
}
protected function preprocess(array $samples) : array
protected function preprocess(array $samples): array
{
// Detect and convert continuous data column values into
// discrete values by using the median as a threshold value
@ -298,14 +386,16 @@ class DecisionTree implements Classifier
}
}
}
$columns[] = $values;
}
// Below method is a strange yet very simple & efficient method
// to get the transpose of a 2D array
return array_map(null, ...$columns);
}
protected static function isCategoricalColumn(array $columnValues) : bool
protected static function isCategoricalColumn(array $columnValues): bool
{
$count = count($columnValues);
@ -329,28 +419,6 @@ class DecisionTree implements Classifier
return count($distinctValues) <= $count / 5;
}
/**
* This method is used to set number of columns to be used
* when deciding a split at an internal node of the tree. <br>
* If the value is given 0, then all features are used (default behaviour),
* otherwise the given value will be used as a maximum for number of columns
* randomly selected for each split operation.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setNumFeatures(int $numFeatures)
{
if ($numFeatures < 0) {
throw new InvalidArgumentException('Selected column count should be greater or equal to zero');
}
$this->numUsableFeatures = $numFeatures;
return $this;
}
/**
* Used to set predefined features to consider while deciding which column to use for a split
*/
@ -359,71 +427,11 @@ class DecisionTree implements Classifier
$this->selectedFeatures = $selectedFeatures;
}
/**
* A string array to represent columns. Useful when HTML output or
* column importances are desired to be inspected.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setColumnNames(array $names)
{
if ($this->featureCount !== 0 && count($names) !== $this->featureCount) {
throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount));
}
$this->columnNames = $names;
return $this;
}
public function getHtml() : string
{
return $this->tree->getHTML($this->columnNames);
}
/**
* This will return an array including an importance value for
* each column in the given dataset. The importance values are
* normalized and their total makes 1.<br/>
*/
public function getFeatureImportances() : array
{
if ($this->featureImportances !== null) {
return $this->featureImportances;
}
$sampleCount = count($this->samples);
$this->featureImportances = [];
foreach ($this->columnNames as $column => $columnName) {
$nodes = $this->getSplitNodesByColumn($column, $this->tree);
$importance = 0;
foreach ($nodes as $node) {
$importance += $node->getNodeImpurityDecrease($sampleCount);
}
$this->featureImportances[$columnName] = $importance;
}
// Normalize & sort the importances
$total = array_sum($this->featureImportances);
if ($total > 0) {
foreach ($this->featureImportances as &$importance) {
$importance /= $total;
}
arsort($this->featureImportances);
}
return $this->featureImportances;
}
/**
* Collects and returns an array of internal nodes that use the given
* column as a split criterion
*/
protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node) : array
protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node): array
{
if (!$node || $node->isTerminal) {
return [];

View File

@ -71,7 +71,15 @@ class DecisionTreeLeaf
*/
public $level = 0;
public function evaluate(array $record) : bool
/**
* HTML representation of the tree without column names
*/
public function __toString(): string
{
return $this->getHTML();
}
public function evaluate(array $record): bool
{
$recordField = $record[$this->columnIndex];
@ -86,7 +94,7 @@ class DecisionTreeLeaf
* Returns Mean Decrease Impurity (MDI) in the node.
* For terminal nodes, this value is equal to 0
*/
public function getNodeImpurityDecrease(int $parentRecordCount) : float
public function getNodeImpurityDecrease(int $parentRecordCount): float
{
if ($this->isTerminal) {
return 0.0;
@ -111,7 +119,7 @@ class DecisionTreeLeaf
/**
* Returns HTML representation of the node including children nodes
*/
public function getHTML($columnNames = null) : string
public function getHTML($columnNames = null): string
{
if ($this->isTerminal) {
$value = "<b>$this->classValue</b>";
@ -154,12 +162,4 @@ class DecisionTreeLeaf
return $str;
}
/**
* HTML representation of the tree without column names
*/
public function __toString() : string
{
return $this->getHTML();
}
}

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Classification\Ensemble;
use Exception;
use Phpml\Classification\Classifier;
use Phpml\Classification\Linear\DecisionStump;
use Phpml\Classification\WeightedClassifier;
@ -11,6 +12,7 @@ use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use Phpml\Math\Statistic\Mean;
use Phpml\Math\Statistic\StandardDeviation;
use ReflectionClass;
class AdaBoost implements Classifier
{
@ -98,11 +100,14 @@ class AdaBoost implements Classifier
// Initialize usual variables
$this->labels = array_keys(array_count_values($targets));
if (count($this->labels) != 2) {
throw new \Exception('AdaBoost is a binary classifier and can classify between two classes only');
throw new Exception('AdaBoost is a binary classifier and can classify between two classes only');
}
// Set all target values to either -1 or 1
$this->labels = [1 => $this->labels[0], -1 => $this->labels[1]];
$this->labels = [
1 => $this->labels[0],
-1 => $this->labels[1],
];
foreach ($targets as $target) {
$this->targets[] = $target == $this->labels[1] ? 1 : -1;
}
@ -132,13 +137,27 @@ class AdaBoost implements Classifier
}
}
/**
* @return mixed
*/
public function predictSample(array $sample)
{
$sum = 0;
foreach ($this->alpha as $index => $alpha) {
$h = $this->classifiers[$index]->predict($sample);
$sum += $h * $alpha;
}
return $this->labels[$sum > 0 ? 1 : -1];
}
/**
* Returns the classifier with the lowest error rate with the
* consideration of current sample weights
*/
protected function getBestClassifier() : Classifier
protected function getBestClassifier(): Classifier
{
$ref = new \ReflectionClass($this->baseClassifier);
$ref = new ReflectionClass($this->baseClassifier);
if ($this->classifierOptions) {
$classifier = $ref->newInstanceArgs($this->classifierOptions);
} else {
@ -160,7 +179,7 @@ class AdaBoost implements Classifier
* Resamples the dataset in accordance with the weights and
* returns the new dataset
*/
protected function resample() : array
protected function resample(): array
{
$weights = $this->weights;
$std = StandardDeviation::population($weights);
@ -173,9 +192,10 @@ class AdaBoost implements Classifier
foreach ($weights as $index => $weight) {
$z = (int) round(($weight - $mean) / $std) - $minZ + 1;
for ($i = 0; $i < $z; ++$i) {
if (rand(0, 1) == 0) {
if (random_int(0, 1) == 0) {
continue;
}
$samples[] = $this->samples[$index];
$targets[] = $this->targets[$index];
}
@ -187,7 +207,7 @@ class AdaBoost implements Classifier
/**
* Evaluates the classifier and returns the classification error rate
*/
protected function evaluateClassifier(Classifier $classifier) : float
protected function evaluateClassifier(Classifier $classifier): float
{
$total = (float) array_sum($this->weights);
$wrong = 0;
@ -204,7 +224,7 @@ class AdaBoost implements Classifier
/**
* Calculates alpha of a classifier
*/
protected function calculateAlpha(float $errorRate) : float
protected function calculateAlpha(float $errorRate): float
{
if ($errorRate == 0) {
$errorRate = 1e-10;
@ -231,18 +251,4 @@ class AdaBoost implements Classifier
$this->weights = $weightsT1;
}
/**
* @return mixed
*/
public function predictSample(array $sample)
{
$sum = 0;
foreach ($this->alpha as $index => $alpha) {
$h = $this->classifiers[$index]->predict($sample);
$sum += $h * $alpha;
}
return $this->labels[$sum > 0 ? 1 : -1];
}
}

View File

@ -4,10 +4,12 @@ declare(strict_types=1);
namespace Phpml\Classification\Ensemble;
use Exception;
use Phpml\Classification\Classifier;
use Phpml\Classification\DecisionTree;
use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable;
use ReflectionClass;
class Bagging implements Classifier
{
@ -18,11 +20,6 @@ class Bagging implements Classifier
*/
protected $numSamples;
/**
* @var array
*/
private $targets = [];
/**
* @var int
*/
@ -46,13 +43,18 @@ class Bagging implements Classifier
/**
* @var array
*/
protected $classifiers;
protected $classifiers = [];
/**
* @var float
*/
protected $subsetRatio = 0.7;
/**
* @var array
*/
private $targets = [];
/**
* @var array
*/
@ -80,7 +82,7 @@ class Bagging implements Classifier
public function setSubsetRatio(float $ratio)
{
if ($ratio < 0.1 || $ratio > 1.0) {
throw new \Exception('Subset ratio should be between 0.1 and 1.0');
throw new Exception('Subset ratio should be between 0.1 and 1.0');
}
$this->subsetRatio = $ratio;
@ -123,14 +125,14 @@ class Bagging implements Classifier
}
}
protected function getRandomSubset(int $index) : array
protected function getRandomSubset(int $index): array
{
$samples = [];
$targets = [];
srand($index);
$bootstrapSize = $this->subsetRatio * $this->numSamples;
for ($i = 0; $i < $bootstrapSize; ++$i) {
$rand = rand(0, $this->numSamples - 1);
$rand = random_int(0, $this->numSamples - 1);
$samples[] = $this->samples[$rand];
$targets[] = $this->targets[$rand];
}
@ -138,11 +140,11 @@ class Bagging implements Classifier
return [$samples, $targets];
}
protected function initClassifiers() : array
protected function initClassifiers(): array
{
$classifiers = [];
for ($i = 0; $i < $this->numClassifier; ++$i) {
$ref = new \ReflectionClass($this->classifier);
$ref = new ReflectionClass($this->classifier);
if ($this->classifierOptions) {
$obj = $ref->newInstanceArgs($this->classifierOptions);
} else {
@ -155,12 +157,7 @@ class Bagging implements Classifier
return $classifiers;
}
/**
* @param Classifier $classifier
*
* @return Classifier
*/
protected function initSingleClassifier($classifier)
protected function initSingleClassifier(Classifier $classifier): Classifier
{
return $classifier;
}

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Ensemble;
use Exception;
use Phpml\Classification\Classifier;
use Phpml\Classification\DecisionTree;
class RandomForest extends Bagging
@ -48,11 +50,11 @@ class RandomForest extends Bagging
public function setFeatureSubsetRatio($ratio)
{
if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) {
throw new \Exception('When a float given, feature subset ratio should be between 0.1 and 1.0');
throw new Exception('When a float given, feature subset ratio should be between 0.1 and 1.0');
}
if (is_string($ratio) && $ratio != 'sqrt' && $ratio != 'log') {
throw new \Exception("When a string given, feature subset ratio can only be 'sqrt' or 'log' ");
throw new Exception("When a string given, feature subset ratio can only be 'sqrt' or 'log' ");
}
$this->featureSubsetRatio = $ratio;
@ -70,7 +72,7 @@ class RandomForest extends Bagging
public function setClassifer(string $classifier, array $classifierOptions = [])
{
if ($classifier != DecisionTree::class) {
throw new \Exception('RandomForest can only use DecisionTree as base classifier');
throw new Exception('RandomForest can only use DecisionTree as base classifier');
}
return parent::setClassifer($classifier, $classifierOptions);
@ -81,7 +83,7 @@ class RandomForest extends Bagging
* each column in the given dataset. Importance values for a column
* is the average importance of that column in all trees in the forest
*/
public function getFeatureImportances() : array
public function getFeatureImportances(): array
{
// Traverse each tree and sum importance of the columns
$sum = [];
@ -127,7 +129,7 @@ class RandomForest extends Bagging
*
* @return DecisionTree
*/
protected function initSingleClassifier($classifier)
protected function initSingleClassifier(Classifier $classifier): Classifier
{
if (is_float($this->featureSubsetRatio)) {
$featureCount = (int) ($this->featureSubsetRatio * $this->featureCount);

View File

@ -28,7 +28,7 @@ class KNearestNeighbors implements Classifier
*/
public function __construct(int $k = 3, ?Distance $distanceMetric = null)
{
if (null === $distanceMetric) {
if ($distanceMetric === null) {
$distanceMetric = new Euclidean();
}
@ -60,7 +60,7 @@ class KNearestNeighbors implements Classifier
/**
* @throws \Phpml\Exception\InvalidArgumentException
*/
private function kNeighborsDistances(array $sample) : array
private function kNeighborsDistances(array $sample): array
{
$distances = [];

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Exception;
class Adaline extends Perceptron
{
/**
@ -41,7 +43,7 @@ class Adaline extends Perceptron
int $trainingType = self::BATCH_TRAINING
) {
if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) {
throw new \Exception('Adaline can only be trained with batch and online/stochastic gradient descent algorithm');
throw new Exception('Adaline can only be trained with batch and online/stochastic gradient descent algorithm');
}
$this->trainingType = $trainingType;

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Exception;
use Phpml\Classification\DecisionTree;
use Phpml\Classification\WeightedClassifier;
use Phpml\Helper\OneVsRest;
@ -24,7 +25,7 @@ class DecisionStump extends WeightedClassifier
/**
* @var array
*/
protected $binaryLabels;
protected $binaryLabels = [];
/**
* Lowest error rate obtained while training/optimizing the model
@ -51,7 +52,7 @@ class DecisionStump extends WeightedClassifier
/**
* @var array
*/
protected $columnTypes;
protected $columnTypes = [];
/**
* @var int
@ -68,7 +69,7 @@ class DecisionStump extends WeightedClassifier
*
* @var array
*/
protected $prob;
protected $prob = [];
/**
* A DecisionStump classifier is a one-level deep DecisionTree. It is generally
@ -83,6 +84,25 @@ class DecisionStump extends WeightedClassifier
$this->givenColumnIndex = $columnIndex;
}
public function __toString(): string
{
return "IF $this->column $this->operator $this->value ".
'THEN '.$this->binaryLabels[0].' '.
'ELSE '.$this->binaryLabels[1];
}
/**
* While finding best split point for a numerical valued column,
* DecisionStump looks for equally distanced values between minimum and maximum
* values in the column. Given <i>$count</i> value determines how many split
* points to be probed. The more split counts, the better performance but
* worse processing time (Default value is 10.0)
*/
public function setNumericalSplitCount(float $count): void
{
$this->numSplitCount = $count;
}
/**
* @throws \Exception
*/
@ -101,7 +121,7 @@ class DecisionStump extends WeightedClassifier
if ($this->weights) {
$numWeights = count($this->weights);
if ($numWeights != count($samples)) {
throw new \Exception('Number of sample weights does not match with number of samples');
throw new Exception('Number of sample weights does not match with number of samples');
}
} else {
$this->weights = array_fill(0, count($samples), 1);
@ -118,9 +138,12 @@ class DecisionStump extends WeightedClassifier
}
$bestSplit = [
'value' => 0, 'operator' => '',
'prob' => [], 'column' => 0,
'trainingErrorRate' => 1.0];
'value' => 0,
'operator' => '',
'prob' => [],
'column' => 0,
'trainingErrorRate' => 1.0,
];
foreach ($columns as $col) {
if ($this->columnTypes[$col] == DecisionTree::CONTINUOUS) {
$split = $this->getBestNumericalSplit($samples, $targets, $col);
@ -139,22 +162,10 @@ class DecisionStump extends WeightedClassifier
}
}
/**
* While finding best split point for a numerical valued column,
* DecisionStump looks for equally distanced values between minimum and maximum
* values in the column. Given <i>$count</i> value determines how many split
* points to be probed. The more split counts, the better performance but
* worse processing time (Default value is 10.0)
*/
public function setNumericalSplitCount(float $count): void
{
$this->numSplitCount = $count;
}
/**
* Determines best split point for the given column
*/
protected function getBestNumericalSplit(array $samples, array $targets, int $col) : array
protected function getBestNumericalSplit(array $samples, array $targets, int $col): array
{
$values = array_column($samples, $col);
// Trying all possible points may be accomplished in two general ways:
@ -173,9 +184,13 @@ class DecisionStump extends WeightedClassifier
$threshold = array_sum($values) / (float) count($values);
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
if ($split == null || $errorRate < $split['trainingErrorRate']) {
$split = ['value' => $threshold, 'operator' => $operator,
'prob' => $prob, 'column' => $col,
'trainingErrorRate' => $errorRate];
$split = [
'value' => $threshold,
'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
}
// Try other possible points one by one
@ -183,9 +198,13 @@ class DecisionStump extends WeightedClassifier
$threshold = (float) $step;
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
if ($errorRate < $split['trainingErrorRate']) {
$split = ['value' => $threshold, 'operator' => $operator,
'prob' => $prob, 'column' => $col,
'trainingErrorRate' => $errorRate];
$split = [
'value' => $threshold,
'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
}
}// for
}
@ -193,7 +212,7 @@ class DecisionStump extends WeightedClassifier
return $split;
}
protected function getBestNominalSplit(array $samples, array $targets, int $col) : array
protected function getBestNominalSplit(array $samples, array $targets, int $col): array
{
$values = array_column($samples, $col);
$valueCounts = array_count_values($values);
@ -206,9 +225,13 @@ class DecisionStump extends WeightedClassifier
[$errorRate, $prob] = $this->calculateErrorRate($targets, $val, $operator, $values);
if ($split == null || $split['trainingErrorRate'] < $errorRate) {
$split = ['value' => $val, 'operator' => $operator,
'prob' => $prob, 'column' => $col,
'trainingErrorRate' => $errorRate];
$split = [
'value' => $val,
'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
}
}
}
@ -220,7 +243,7 @@ class DecisionStump extends WeightedClassifier
* Calculates the ratio of wrong predictions based on the new threshold
* value given as the parameter
*/
protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values) : array
protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values): array
{
$wrong = 0.0;
$prob = [];
@ -242,6 +265,7 @@ class DecisionStump extends WeightedClassifier
if (!isset($prob[$predicted][$target])) {
$prob[$predicted][$target] = 0;
}
++$prob[$predicted][$target];
}
@ -267,7 +291,7 @@ class DecisionStump extends WeightedClassifier
*
* @param mixed $label
*/
protected function predictProbability(array $sample, $label) : float
protected function predictProbability(array $sample, $label): float
{
$predicted = $this->predictSampleBinary($sample);
if ((string) $predicted == (string) $label) {
@ -292,11 +316,4 @@ class DecisionStump extends WeightedClassifier
protected function resetBinary(): void
{
}
public function __toString() : string
{
return "IF $this->column $this->operator $this->value ".
'THEN '.$this->binaryLabels[0].' '.
'ELSE '.$this->binaryLabels[1];
}
}

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Closure;
use Exception;
use Phpml\Helper\Optimizer\ConjugateGradient;
class LogisticRegression extends Adaline
@ -70,18 +72,18 @@ class LogisticRegression extends Adaline
) {
$trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING);
if (!in_array($trainingType, $trainingTypes)) {
throw new \Exception('Logistic regression can only be trained with '.
throw new Exception('Logistic regression can only be trained with '.
'batch (gradient descent), online (stochastic gradient descent) '.
'or conjugate batch (conjugate gradients) algorithms');
}
if (!in_array($cost, ['log', 'sse'])) {
throw new \Exception("Logistic regression cost function can be one of the following: \n".
throw new Exception("Logistic regression cost function can be one of the following: \n".
"'log' for log-likelihood and 'sse' for sum of squared errors");
}
if ($penalty != '' && strtoupper($penalty) !== 'L2') {
throw new \Exception("Logistic regression supports only 'L2' regularization");
throw new Exception("Logistic regression supports only 'L2' regularization");
}
$this->learningRate = 0.001;
@ -132,14 +134,14 @@ class LogisticRegression extends Adaline
return $this->runConjugateGradient($samples, $targets, $callback);
default:
throw new \Exception('Logistic regression has invalid training type: %s.', $this->trainingType);
throw new Exception('Logistic regression has invalid training type: %s.', $this->trainingType);
}
}
/**
* Executes Conjugate Gradient method to optimize the weights of the LogReg model
*/
protected function runConjugateGradient(array $samples, array $targets, \Closure $gradientFunc): void
protected function runConjugateGradient(array $samples, array $targets, Closure $gradientFunc): void
{
if (empty($this->optimizer)) {
$this->optimizer = (new ConjugateGradient($this->featureCount))
@ -155,7 +157,7 @@ class LogisticRegression extends Adaline
*
* @throws \Exception
*/
protected function getCostFunction() : \Closure
protected function getCostFunction(): Closure
{
$penalty = 0;
if ($this->penalty == 'L2') {
@ -183,9 +185,11 @@ class LogisticRegression extends Adaline
if ($hX == 1) {
$hX = 1 - 1e-10;
}
if ($hX == 0) {
$hX = 1e-10;
}
$error = -$y * log($hX) - (1 - $y) * log(1 - $hX);
$gradient = $hX - $y;
@ -218,16 +222,14 @@ class LogisticRegression extends Adaline
return $callback;
default:
throw new \Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction));
throw new Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction));
}
}
/**
* Returns the output of the network, a float value between 0.0 and 1.0
*
* @return float
*/
protected function output(array $sample)
protected function output(array $sample): float
{
$sum = parent::output($sample);
@ -237,7 +239,7 @@ class LogisticRegression extends Adaline
/**
* Returns the class value (either -1 or 1) for the given input
*/
protected function outputClass(array $sample) : int
protected function outputClass(array $sample): int
{
$output = $this->output($sample);
@ -253,10 +255,10 @@ class LogisticRegression extends Adaline
*
* The probability is simply taken as the distance of the sample
* to the decision plane.
*
* @param mixed $label
*/
protected function predictProbability(array $sample, $label) : float
protected function predictProbability(array $sample, $label): float
{
$predicted = $this->predictSampleBinary($sample);

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Closure;
use Exception;
use Phpml\Classification\Classifier;
use Phpml\Helper\OneVsRest;
use Phpml\Helper\Optimizer\GD;
@ -34,7 +36,7 @@ class Perceptron implements Classifier, IncrementalEstimator
/**
* @var array
*/
protected $weights;
protected $weights = [];
/**
* @var float
@ -73,11 +75,11 @@ class Perceptron implements Classifier, IncrementalEstimator
public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, bool $normalizeInputs = true)
{
if ($learningRate <= 0.0 || $learningRate > 1.0) {
throw new \Exception('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)');
throw new Exception('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)');
}
if ($maxIterations <= 0) {
throw new \Exception('Maximum number of iterations must be an integer greater than 0');
throw new Exception('Maximum number of iterations must be an integer greater than 0');
}
if ($normalizeInputs) {
@ -100,7 +102,10 @@ class Perceptron implements Classifier, IncrementalEstimator
}
// Set all target values to either -1 or 1
$this->labels = [1 => $labels[0], -1 => $labels[1]];
$this->labels = [
1 => $labels[0],
-1 => $labels[1],
];
foreach ($targets as $key => $target) {
$targets[$key] = (string) $target == (string) $this->labels[1] ? 1 : -1;
}
@ -111,15 +116,6 @@ class Perceptron implements Classifier, IncrementalEstimator
$this->runTraining($samples, $targets);
}
protected function resetBinary(): void
{
$this->labels = [];
$this->optimizer = null;
$this->featureCount = 0;
$this->weights = null;
$this->costValues = [];
}
/**
* Normally enabling early stopping for the optimization procedure may
* help saving processing time while in some cases it may result in
@ -140,16 +136,23 @@ class Perceptron implements Classifier, IncrementalEstimator
/**
* Returns the cost values obtained during the training.
*/
public function getCostValues() : array
public function getCostValues(): array
{
return $this->costValues;
}
protected function resetBinary(): void
{
$this->labels = [];
$this->optimizer = null;
$this->featureCount = 0;
$this->weights = null;
$this->costValues = [];
}
/**
* Trains the perceptron model with Stochastic Gradient Descent optimization
* to get the correct set of weights
*
* @return void|mixed
*/
protected function runTraining(array $samples, array $targets)
{
@ -171,7 +174,7 @@ class Perceptron implements Classifier, IncrementalEstimator
* Executes a Gradient Descent algorithm for
* the given cost function
*/
protected function runGradientDescent(array $samples, array $targets, \Closure $gradientFunc, bool $isBatch = false): void
protected function runGradientDescent(array $samples, array $targets, Closure $gradientFunc, bool $isBatch = false): void
{
$class = $isBatch ? GD::class : StochasticGD::class;
@ -191,7 +194,7 @@ class Perceptron implements Classifier, IncrementalEstimator
* Checks if the sample should be normalized and if so, returns the
* normalized sample
*/
protected function checkNormalizedSample(array $sample) : array
protected function checkNormalizedSample(array $sample): array
{
if ($this->normalizer) {
$samples = [$sample];
@ -205,7 +208,7 @@ class Perceptron implements Classifier, IncrementalEstimator
/**
* Calculates net output of the network as a float value for the given input
*
* @return int
* @return int|float
*/
protected function output(array $sample)
{
@ -224,7 +227,7 @@ class Perceptron implements Classifier, IncrementalEstimator
/**
* Returns the class value (either -1 or 1) for the given input
*/
protected function outputClass(array $sample) : int
protected function outputClass(array $sample): int
{
return $this->output($sample) > 0 ? 1 : -1;
}
@ -237,7 +240,7 @@ class Perceptron implements Classifier, IncrementalEstimator
*
* @param mixed $label
*/
protected function predictProbability(array $sample, $label) : float
protected function predictProbability(array $sample, $label): float
{
$predicted = $this->predictSampleBinary($sample);

View File

@ -14,7 +14,7 @@ class MLPClassifier extends MultilayerPerceptron implements Classifier
*
* @throws InvalidArgumentException
*/
public function getTargetClass($target) : int
public function getTargetClass($target): int
{
if (!in_array($target, $this->classes)) {
throw InvalidArgumentException::invalidTarget($target);

View File

@ -14,7 +14,9 @@ class NaiveBayes implements Classifier
use Trainable, Predictable;
public const CONTINUOS = 1;
public const NOMINAL = 2;
public const EPSILON = 1e-10;
/**
@ -73,6 +75,31 @@ class NaiveBayes implements Classifier
}
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
// Use NaiveBayes assumption for each label using:
// P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label)
// Then compare probability for each class to determine which label is most likely
$predictions = [];
foreach ($this->labels as $label) {
$p = $this->p[$label];
for ($i = 0; $i < $this->featureCount; ++$i) {
$Plf = $this->sampleProbability($sample, $i, $label);
$p += $Plf;
}
$predictions[$label] = $p;
}
arsort($predictions, SORT_NUMERIC);
reset($predictions);
return key($predictions);
}
/**
* Calculates vital statistics for each label & feature. Stores these
* values in private array in order to avoid repeated calculation
@ -108,7 +135,7 @@ class NaiveBayes implements Classifier
/**
* Calculates the probability P(label|sample_n)
*/
private function sampleProbability(array $sample, int $feature, string $label) : float
private function sampleProbability(array $sample, int $feature, string $label): float
{
$value = $sample[$feature];
if ($this->dataType[$label][$feature] == self::NOMINAL) {
@ -119,6 +146,7 @@ class NaiveBayes implements Classifier
return $this->discreteProb[$label][$feature][$value];
}
$std = $this->std[$label][$feature] ;
$mean = $this->mean[$label][$feature];
// Calculate the probability density by use of normal/Gaussian distribution
@ -137,7 +165,7 @@ class NaiveBayes implements Classifier
/**
* Return samples belonging to specific label
*/
private function getSamplesByLabel(string $label) : array
private function getSamplesByLabel(string $label): array
{
$samples = [];
for ($i = 0; $i < $this->sampleCount; ++$i) {
@ -148,28 +176,4 @@ class NaiveBayes implements Classifier
return $samples;
}
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
// Use NaiveBayes assumption for each label using:
// P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label)
// Then compare probability for each class to determine which label is most likely
$predictions = [];
foreach ($this->labels as $label) {
$p = $this->p[$label];
for ($i = 0; $i < $this->featureCount; ++$i) {
$Plf = $this->sampleProbability($sample, $i, $label);
$p += $Plf;
}
$predictions[$label] = $p;
}
arsort($predictions, SORT_NUMERIC);
reset($predictions);
return key($predictions);
}
}

View File

@ -9,7 +9,7 @@ abstract class WeightedClassifier implements Classifier
/**
* @var array
*/
protected $weights;
protected $weights = [];
/**
* Sets the array including a weight for each sample

View File

@ -6,5 +6,5 @@ namespace Phpml\Clustering;
interface Clusterer
{
public function cluster(array $samples) : array;
public function cluster(array $samples): array;
}

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Clustering;
use array_merge;
use Phpml\Math\Distance;
use Phpml\Math\Distance\Euclidean;
@ -26,7 +27,7 @@ class DBSCAN implements Clusterer
public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null)
{
if (null === $distanceMetric) {
if ($distanceMetric === null) {
$distanceMetric = new Euclidean();
}
@ -35,7 +36,7 @@ class DBSCAN implements Clusterer
$this->distanceMetric = $distanceMetric;
}
public function cluster(array $samples) : array
public function cluster(array $samples): array
{
$clusters = [];
$visited = [];
@ -44,6 +45,7 @@ class DBSCAN implements Clusterer
if (isset($visited[$index])) {
continue;
}
$visited[$index] = true;
$regionSamples = $this->getSamplesInRegion($sample, $samples);
@ -55,7 +57,7 @@ class DBSCAN implements Clusterer
return $clusters;
}
private function getSamplesInRegion(array $localSample, array $samples) : array
private function getSamplesInRegion(array $localSample, array $samples): array
{
$region = [];
@ -68,7 +70,7 @@ class DBSCAN implements Clusterer
return $region;
}
private function expandCluster(array $samples, array &$visited) : array
private function expandCluster(array $samples, array &$visited): array
{
$cluster = [];
@ -84,7 +86,8 @@ class DBSCAN implements Clusterer
$cluster[$index] = $sample;
}
$cluster = \array_merge($cluster, ...$clusterMerge);
$cluster = array_merge($cluster, ...$clusterMerge);
return $cluster;
}

View File

@ -30,7 +30,7 @@ class FuzzyCMeans implements Clusterer
/**
* @var array|float[][]
*/
private $membership;
private $membership = [];
/**
* @var float
@ -55,7 +55,7 @@ class FuzzyCMeans implements Clusterer
/**
* @var array
*/
private $samples;
private $samples = [];
/**
* @throws InvalidArgumentException
@ -65,12 +65,63 @@ class FuzzyCMeans implements Clusterer
if ($clustersNumber <= 0) {
throw InvalidArgumentException::invalidClustersNumber();
}
$this->clustersNumber = $clustersNumber;
$this->fuzziness = $fuzziness;
$this->epsilon = $epsilon;
$this->maxIterations = $maxIterations;
}
public function getMembershipMatrix(): array
{
return $this->membership;
}
/**
* @param array|Point[] $samples
*/
public function cluster(array $samples): array
{
// Initialize variables, clusters and membership matrix
$this->sampleCount = count($samples);
$this->samples = &$samples;
$this->space = new Space(count($samples[0]));
$this->initClusters();
// Our goal is minimizing the objective value while
// executing the clustering steps at a maximum number of iterations
$lastObjective = 0.0;
$iterations = 0;
do {
// Update the membership matrix and cluster centers, respectively
$this->updateMembershipMatrix();
$this->updateClusters();
// Calculate the new value of the objective function
$objectiveVal = $this->getObjective();
$difference = abs($lastObjective - $objectiveVal);
$lastObjective = $objectiveVal;
} while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
// Attach (hard cluster) each data point to the nearest cluster
for ($k = 0; $k < $this->sampleCount; ++$k) {
$column = array_column($this->membership, $k);
arsort($column);
reset($column);
$i = key($column);
$cluster = $this->clusters[$i];
$cluster->attach(new Point($this->samples[$k]));
}
// Return grouped samples
$grouped = [];
foreach ($this->clusters as $cluster) {
$grouped[] = $cluster->getPoints();
}
return $grouped;
}
protected function initClusters(): void
{
// Membership array is a matrix of cluster number by sample counts
@ -87,7 +138,7 @@ class FuzzyCMeans implements Clusterer
$row = [];
$total = 0.0;
for ($k = 0; $k < $cols; ++$k) {
$val = rand(1, 5) / 10.0;
$val = random_int(1, 5) / 10.0;
$row[] = $val;
$total += $val;
}
@ -146,7 +197,7 @@ class FuzzyCMeans implements Clusterer
}
}
protected function getDistanceCalc(int $row, int $col) : float
protected function getDistanceCalc(int $row, int $col): float
{
$sum = 0.0;
$distance = new Euclidean();
@ -187,54 +238,4 @@ class FuzzyCMeans implements Clusterer
return $sum;
}
public function getMembershipMatrix() : array
{
return $this->membership;
}
/**
* @param array|Point[] $samples
*/
public function cluster(array $samples) : array
{
// Initialize variables, clusters and membership matrix
$this->sampleCount = count($samples);
$this->samples = &$samples;
$this->space = new Space(count($samples[0]));
$this->initClusters();
// Our goal is minimizing the objective value while
// executing the clustering steps at a maximum number of iterations
$lastObjective = 0.0;
$iterations = 0;
do {
// Update the membership matrix and cluster centers, respectively
$this->updateMembershipMatrix();
$this->updateClusters();
// Calculate the new value of the objective function
$objectiveVal = $this->getObjective();
$difference = abs($lastObjective - $objectiveVal);
$lastObjective = $objectiveVal;
} while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
// Attach (hard cluster) each data point to the nearest cluster
for ($k = 0; $k < $this->sampleCount; ++$k) {
$column = array_column($this->membership, $k);
arsort($column);
reset($column);
$i = key($column);
$cluster = $this->clusters[$i];
$cluster->attach(new Point($this->samples[$k]));
}
// Return grouped samples
$grouped = [];
foreach ($this->clusters as $cluster) {
$grouped[] = $cluster->getPoints();
}
return $grouped;
}
}

View File

@ -10,6 +10,7 @@ use Phpml\Exception\InvalidArgumentException;
class KMeans implements Clusterer
{
public const INIT_RANDOM = 1;
public const INIT_KMEANS_PLUS_PLUS = 2;
/**
@ -32,7 +33,7 @@ class KMeans implements Clusterer
$this->initialization = $initialization;
}
public function cluster(array $samples) : array
public function cluster(array $samples): array
{
$space = new Space(count($samples[0]));
foreach ($samples as $sample) {

View File

@ -28,7 +28,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
$this->points = new SplObjectStorage();
}
public function getPoints() : array
public function getPoints(): array
{
$points = [];
foreach ($this->points as $point) {
@ -38,7 +38,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
return $points;
}
public function toArray() : array
public function toArray(): array
{
return [
'centroid' => parent::toArray(),
@ -46,7 +46,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
];
}
public function attach(Point $point) : Point
public function attach(Point $point): Point
{
if ($point instanceof self) {
throw new LogicException('cannot attach a cluster to another');
@ -57,7 +57,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
return $point;
}
public function detach(Point $point) : Point
public function detach(Point $point): Point
{
$this->points->detach($point);
@ -76,7 +76,8 @@ class Cluster extends Point implements IteratorAggregate, Countable
public function updateCentroid(): void
{
if (!$count = count($this->points)) {
$count = count($this->points);
if (!$count) {
return;
}

View File

@ -16,7 +16,7 @@ class Point implements ArrayAccess
/**
* @var array
*/
protected $coordinates;
protected $coordinates = [];
public function __construct(array $coordinates)
{
@ -24,7 +24,7 @@ class Point implements ArrayAccess
$this->coordinates = $coordinates;
}
public function toArray() : array
public function toArray(): array
{
return $this->coordinates;
}
@ -66,7 +66,7 @@ class Point implements ArrayAccess
return $minPoint;
}
public function getCoordinates() : array
public function getCoordinates(): array
{
return $this->coordinates;
}

View File

@ -25,7 +25,7 @@ class Space extends SplObjectStorage
$this->dimension = $dimension;
}
public function toArray() : array
public function toArray(): array
{
$points = [];
foreach ($this as $point) {
@ -35,7 +35,7 @@ class Space extends SplObjectStorage
return ['points' => $points];
}
public function newPoint(array $coordinates) : Point
public function newPoint(array $coordinates): Point
{
if (count($coordinates) != $this->dimension) {
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
@ -65,7 +65,7 @@ class Space extends SplObjectStorage
parent::attach($point, $data);
}
public function getDimension() : int
public function getDimension(): int
{
return $this->dimension;
}
@ -92,7 +92,7 @@ class Space extends SplObjectStorage
return [$min, $max];
}
public function getRandomPoint(Point $min, Point $max) : Point
public function getRandomPoint(Point $min, Point $max): Point
{
$point = $this->newPoint(array_fill(0, $this->dimension, null));
@ -106,7 +106,7 @@ class Space extends SplObjectStorage
/**
* @return array|Cluster[]
*/
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM) : array
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
{
$clusters = $this->initializeClusters($clustersNumber, $initMethod);
@ -119,7 +119,7 @@ class Space extends SplObjectStorage
/**
* @return array|Cluster[]
*/
protected function initializeClusters(int $clustersNumber, int $initMethod) : array
protected function initializeClusters(int $clustersNumber, int $initMethod): array
{
switch ($initMethod) {
case KMeans::INIT_RANDOM:
@ -139,7 +139,7 @@ class Space extends SplObjectStorage
return $clusters;
}
protected function iterate($clusters) : bool
protected function iterate($clusters): bool
{
$convergence = true;
@ -177,19 +177,7 @@ class Space extends SplObjectStorage
return $convergence;
}
private function initializeRandomClusters(int $clustersNumber) : array
{
$clusters = [];
[$min, $max] = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
return $clusters;
}
protected function initializeKMPPClusters(int $clustersNumber) : array
protected function initializeKMPPClusters(int $clustersNumber): array
{
$clusters = [];
$this->rewind();
@ -218,4 +206,16 @@ class Space extends SplObjectStorage
return $clusters;
}
private function initializeRandomClusters(int $clustersNumber): array
{
$clusters = [];
[$min, $max] = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
return $clusters;
}
}

View File

@ -31,39 +31,40 @@ abstract class Split
public function __construct(Dataset $dataset, float $testSize = 0.3, ?int $seed = null)
{
if (0 >= $testSize || 1 <= $testSize) {
if ($testSize <= 0 || $testSize >= 1) {
throw InvalidArgumentException::percentNotInRange('testSize');
}
$this->seedGenerator($seed);
$this->splitDataset($dataset, $testSize);
}
abstract protected function splitDataset(Dataset $dataset, float $testSize);
public function getTrainSamples() : array
public function getTrainSamples(): array
{
return $this->trainSamples;
}
public function getTestSamples() : array
public function getTestSamples(): array
{
return $this->testSamples;
}
public function getTrainLabels() : array
public function getTrainLabels(): array
{
return $this->trainLabels;
}
public function getTestLabels() : array
public function getTestLabels(): array
{
return $this->testLabels;
}
abstract protected function splitDataset(Dataset $dataset, float $testSize);
protected function seedGenerator(?int $seed = null): void
{
if (null === $seed) {
if ($seed === null) {
mt_srand();
} else {
mt_srand($seed);

View File

@ -21,7 +21,7 @@ class StratifiedRandomSplit extends RandomSplit
/**
* @return Dataset[]|array
*/
private function splitByTarget(Dataset $dataset) : array
private function splitByTarget(Dataset $dataset): array
{
$targets = $dataset->getTargets();
$samples = $dataset->getSamples();
@ -38,7 +38,7 @@ class StratifiedRandomSplit extends RandomSplit
return $datasets;
}
private function createDatasets(array $uniqueTargets, array $split) : array
private function createDatasets(array $uniqueTargets, array $split): array
{
$datasets = [];
foreach ($uniqueTargets as $target) {

View File

@ -31,12 +31,12 @@ class ArrayDataset implements Dataset
$this->targets = $targets;
}
public function getSamples() : array
public function getSamples(): array
{
return $this->samples;
}
public function getTargets() : array
public function getTargets(): array
{
return $this->targets;
}

View File

@ -11,7 +11,7 @@ class CsvDataset extends ArrayDataset
/**
* @var array
*/
protected $columnNames;
protected $columnNames = [];
/**
* @throws FileException
@ -22,7 +22,8 @@ class CsvDataset extends ArrayDataset
throw FileException::missingFile(basename($filepath));
}
if (false === $handle = fopen($filepath, 'rb')) {
$handle = fopen($filepath, 'rb');
if ($handle === false) {
throw FileException::cantOpenFile(basename($filepath));
}
@ -44,7 +45,7 @@ class CsvDataset extends ArrayDataset
parent::__construct($samples, $targets);
}
public function getColumnNames() : array
public function getColumnNames(): array
{
return $this->columnNames;
}

View File

@ -9,10 +9,10 @@ interface Dataset
/**
* @return array
*/
public function getSamples() : array;
public function getSamples(): array;
/**
* @return array
*/
public function getTargets() : array;
public function getTargets(): array;
}

View File

@ -84,7 +84,7 @@ abstract class EigenTransformerBase
/**
* Returns the reduced data
*/
protected function reduce(array $data) : array
protected function reduce(array $data): array
{
$m1 = new Matrix($data);
$m2 = new Matrix($this->eigVectors);

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Closure;
use Exception;
use Phpml\Math\Distance\Euclidean;
use Phpml\Math\Distance\Manhattan;
use Phpml\Math\Matrix;
@ -11,8 +13,11 @@ use Phpml\Math\Matrix;
class KernelPCA extends PCA
{
public const KERNEL_RBF = 1;
public const KERNEL_SIGMOID = 2;
public const KERNEL_LAPLACIAN = 3;
public const KERNEL_LINEAR = 4;
/**
@ -34,7 +39,7 @@ class KernelPCA extends PCA
*
* @var array
*/
protected $data;
protected $data = [];
/**
* Kernel principal component analysis (KernelPCA) is an extension of PCA using
@ -54,7 +59,7 @@ class KernelPCA extends PCA
{
$availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
if (!in_array($kernel, $availableKernels)) {
throw new \Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
throw new Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
}
parent::__construct($totalVariance, $numFeatures);
@ -69,7 +74,7 @@ class KernelPCA extends PCA
* $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m
*/
public function fit(array $data) : array
public function fit(array $data): array
{
$numRows = count($data);
$this->data = $data;
@ -88,11 +93,32 @@ class KernelPCA extends PCA
return Matrix::transposeArray($this->eigVectors);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the variables obtained during the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
}
if (is_array($sample[0])) {
throw new Exception('KernelPCA::transform() accepts only one-dimensional arrays');
}
$pairs = $this->getDistancePairs($sample);
return $this->projectSample($pairs);
}
/**
* Calculates similarity matrix by use of selected kernel function<br>
* An n-by-m matrix is given and an n-by-n matrix is returned
*/
protected function calculateKernelMatrix(array $data, int $numRows) : array
protected function calculateKernelMatrix(array $data, int $numRows): array
{
$kernelFunc = $this->getKernel();
@ -116,7 +142,7 @@ class KernelPCA extends PCA
*
* K = K N.K K.N + N.K.N where N is n-by-n matrix filled with 1/n
*/
protected function centerMatrix(array $matrix, int $n) : array
protected function centerMatrix(array $matrix, int $n): array
{
$N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
$N = new Matrix($N, false);
@ -140,7 +166,7 @@ class KernelPCA extends PCA
*
* @throws \Exception
*/
protected function getKernel(): \Closure
protected function getKernel(): Closure
{
switch ($this->kernel) {
case self::KERNEL_LINEAR:
@ -173,11 +199,11 @@ class KernelPCA extends PCA
};
default:
throw new \Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
throw new Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
}
}
protected function getDistancePairs(array $sample) : array
protected function getDistancePairs(array $sample): array
{
$kernel = $this->getKernel();
@ -189,7 +215,7 @@ class KernelPCA extends PCA
return $pairs;
}
protected function projectSample(array $pairs) : array
protected function projectSample(array $pairs): array
{
// Normalize eigenvectors by eig = eigVectors / eigValues
$func = function ($eigVal, $eigVect) {
@ -203,25 +229,4 @@ class KernelPCA extends PCA
// return k.dot(eig)
return Matrix::dot($pairs, $eig);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the variables obtained during the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample) : array
{
if (!$this->fit) {
throw new \Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
}
if (is_array($sample[0])) {
throw new \Exception('KernelPCA::transform() accepts only one-dimensional arrays');
}
$pairs = $this->getDistancePairs($sample);
return $this->projectSample($pairs);
}
}

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Exception;
use Phpml\Math\Matrix;
class LDA extends EigenTransformerBase
@ -16,22 +17,22 @@ class LDA extends EigenTransformerBase
/**
* @var array
*/
public $labels;
public $labels = [];
/**
* @var array
*/
public $means;
public $means = [];
/**
* @var array
*/
public $counts;
public $counts = [];
/**
* @var float[]
*/
public $overallMean;
public $overallMean = [];
/**
* Linear Discriminant Analysis (LDA) is used to reduce the dimensionality
@ -50,18 +51,21 @@ class LDA extends EigenTransformerBase
public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
{
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
throw new \Exception('Total variance can be a value between 0.1 and 0.99');
throw new Exception('Total variance can be a value between 0.1 and 0.99');
}
if ($numFeatures !== null && $numFeatures <= 0) {
throw new \Exception('Number of features to be preserved should be greater than 0');
throw new Exception('Number of features to be preserved should be greater than 0');
}
if ($totalVariance !== null && $numFeatures !== null) {
throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
throw new Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
}
if ($numFeatures !== null) {
$this->numFeatures = $numFeatures;
}
if ($totalVariance !== null) {
$this->totalVariance = $totalVariance;
}
@ -70,7 +74,7 @@ class LDA extends EigenTransformerBase
/**
* Trains the algorithm to transform the given data to a lower dimensional space.
*/
public function fit(array $data, array $classes) : array
public function fit(array $data, array $classes): array
{
$this->labels = $this->getLabels($classes);
$this->means = $this->calculateMeans($data, $classes);
@ -86,10 +90,29 @@ class LDA extends EigenTransformerBase
return $this->reduce($data);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new Exception('LDA has not been fitted with respect to original dataset, please run LDA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
return $this->reduce($sample);
}
/**
* Returns unique labels in the dataset
*/
protected function getLabels(array $classes) : array
protected function getLabels(array $classes): array
{
$counts = array_count_values($classes);
@ -100,7 +123,7 @@ class LDA extends EigenTransformerBase
* Calculates mean of each column for each class and returns
* n by m matrix where n is number of labels and m is number of columns
*/
protected function calculateMeans(array $data, array $classes) : array
protected function calculateMeans(array $data, array $classes): array
{
$means = [];
$counts = [];
@ -113,6 +136,7 @@ class LDA extends EigenTransformerBase
if (!isset($means[$label][$col])) {
$means[$label][$col] = 0.0;
}
$means[$label][$col] += $val;
$overallMean[$col] += $val;
}
@ -146,7 +170,7 @@ class LDA extends EigenTransformerBase
* is a n by m matrix where n is number of classes and
* m is number of columns
*/
protected function calculateClassVar(array $data, array $classes) : Matrix
protected function calculateClassVar(array $data, array $classes): Matrix
{
// s is an n (number of classes) by m (number of column) matrix
$s = array_fill(0, count($data[0]), array_fill(0, count($data[0]), 0));
@ -169,7 +193,7 @@ class LDA extends EigenTransformerBase
* is an n by m matrix where n is number of classes and
* m is number of columns
*/
protected function calculateClassCov() : Matrix
protected function calculateClassCov(): Matrix
{
// s is an n (number of classes) by m (number of column) matrix
$s = array_fill(0, count($this->overallMean), array_fill(0, count($this->overallMean), 0));
@ -187,7 +211,7 @@ class LDA extends EigenTransformerBase
/**
* Returns the result of the calculation (x - m)T.(x - m)
*/
protected function calculateVar(array $row, array $means) : Matrix
protected function calculateVar(array $row, array $means): Matrix
{
$x = new Matrix($row, false);
$m = new Matrix($means, false);
@ -195,23 +219,4 @@ class LDA extends EigenTransformerBase
return $diff->transpose()->multiply($diff);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample) : array
{
if (!$this->fit) {
throw new \Exception('LDA has not been fitted with respect to original dataset, please run LDA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
return $this->reduce($sample);
}
}

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Exception;
use Phpml\Math\Statistic\Covariance;
use Phpml\Math\Statistic\Mean;
@ -35,18 +36,21 @@ class PCA extends EigenTransformerBase
public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
{
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
throw new \Exception('Total variance can be a value between 0.1 and 0.99');
throw new Exception('Total variance can be a value between 0.1 and 0.99');
}
if ($numFeatures !== null && $numFeatures <= 0) {
throw new \Exception('Number of features to be preserved should be greater than 0');
throw new Exception('Number of features to be preserved should be greater than 0');
}
if ($totalVariance !== null && $numFeatures !== null) {
throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
throw new Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
}
if ($numFeatures !== null) {
$this->numFeatures = $numFeatures;
}
if ($totalVariance !== null) {
$this->totalVariance = $totalVariance;
}
@ -58,7 +62,7 @@ class PCA extends EigenTransformerBase
* $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m
*/
public function fit(array $data) : array
public function fit(array $data): array
{
$n = count($data[0]);
@ -73,6 +77,27 @@ class PCA extends EigenTransformerBase
return $this->reduce($data);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new Exception('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
$sample = $this->normalize($sample, count($sample[0]));
return $this->reduce($sample);
}
protected function calculateMeans(array $data, int $n): void
{
// Calculate means for each dimension
@ -87,7 +112,7 @@ class PCA extends EigenTransformerBase
* Normalization of the data includes subtracting mean from
* each dimension therefore dimensions will be centered to zero
*/
protected function normalize(array $data, int $n) : array
protected function normalize(array $data, int $n): array
{
if (empty($this->means)) {
$this->calculateMeans($data, $n);
@ -102,25 +127,4 @@ class PCA extends EigenTransformerBase
return $data;
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample) : array
{
if (!$this->fit) {
throw new \Exception('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
$sample = $this->normalize($sample, count($sample[0]));
return $this->reduce($sample);
}
}

View File

@ -4,9 +4,11 @@ declare(strict_types=1);
namespace Phpml\Exception;
class DatasetException extends \Exception
use Exception;
class DatasetException extends Exception
{
public static function missingFolder(string $path) : DatasetException
public static function missingFolder(string $path): self
{
return new self(sprintf('Dataset root folder "%s" missing.', $path));
}

View File

@ -4,19 +4,21 @@ declare(strict_types=1);
namespace Phpml\Exception;
class FileException extends \Exception
use Exception;
class FileException extends Exception
{
public static function missingFile(string $filepath) : FileException
public static function missingFile(string $filepath): self
{
return new self(sprintf('File "%s" missing.', $filepath));
}
public static function cantOpenFile(string $filepath) : FileException
public static function cantOpenFile(string $filepath): self
{
return new self(sprintf('File "%s" can\'t be open.', $filepath));
}
public static function cantSaveFile(string $filepath) : FileException
public static function cantSaveFile(string $filepath): self
{
return new self(sprintf('File "%s" can\'t be saved.', $filepath));
}

View File

@ -4,39 +4,41 @@ declare(strict_types=1);
namespace Phpml\Exception;
class InvalidArgumentException extends \Exception
use Exception;
class InvalidArgumentException extends Exception
{
public static function arraySizeNotMatch() : InvalidArgumentException
public static function arraySizeNotMatch(): self
{
return new self('Size of given arrays does not match');
}
public static function percentNotInRange($name) : InvalidArgumentException
public static function percentNotInRange($name): self
{
return new self(sprintf('%s must be between 0.0 and 1.0', $name));
}
public static function arrayCantBeEmpty() : InvalidArgumentException
public static function arrayCantBeEmpty(): self
{
return new self('The array has zero elements');
}
public static function arraySizeToSmall(int $minimumSize = 2) : InvalidArgumentException
public static function arraySizeToSmall(int $minimumSize = 2): self
{
return new self(sprintf('The array must have at least %d elements', $minimumSize));
}
public static function matrixDimensionsDidNotMatch() : InvalidArgumentException
public static function matrixDimensionsDidNotMatch(): self
{
return new self('Matrix dimensions did not match');
}
public static function inconsistentMatrixSupplied() : InvalidArgumentException
public static function inconsistentMatrixSupplied(): self
{
return new self('Inconsistent matrix supplied');
}
public static function invalidClustersNumber() : InvalidArgumentException
public static function invalidClustersNumber(): self
{
return new self('Invalid clusters number');
}
@ -44,57 +46,57 @@ class InvalidArgumentException extends \Exception
/**
* @param mixed $target
*/
public static function invalidTarget($target) : InvalidArgumentException
public static function invalidTarget($target): self
{
return new self(sprintf('Target with value "%s" is not part of the accepted classes', $target));
}
public static function invalidStopWordsLanguage(string $language) : InvalidArgumentException
public static function invalidStopWordsLanguage(string $language): self
{
return new self(sprintf('Can\'t find "%s" language for StopWords', $language));
}
public static function invalidLayerNodeClass() : InvalidArgumentException
public static function invalidLayerNodeClass(): self
{
return new self('Layer node class must implement Node interface');
}
public static function invalidLayersNumber() : InvalidArgumentException
public static function invalidLayersNumber(): self
{
return new self('Provide at least 1 hidden layer');
}
public static function invalidClassesNumber() : InvalidArgumentException
public static function invalidClassesNumber(): self
{
return new self('Provide at least 2 different classes');
}
public static function inconsistentClasses() : InvalidArgumentException
public static function inconsistentClasses(): self
{
return new self('The provided classes don\'t match the classes provided in the constructor');
}
public static function fileNotFound(string $file) : InvalidArgumentException
public static function fileNotFound(string $file): self
{
return new self(sprintf('File "%s" not found', $file));
}
public static function fileNotExecutable(string $file) : InvalidArgumentException
public static function fileNotExecutable(string $file): self
{
return new self(sprintf('File "%s" is not executable', $file));
}
public static function pathNotFound(string $path) : InvalidArgumentException
public static function pathNotFound(string $path): self
{
return new self(sprintf('The specified path "%s" does not exist', $path));
}
public static function pathNotWritable(string $path) : InvalidArgumentException
public static function pathNotWritable(string $path): self
{
return new self(sprintf('The specified path "%s" is not writable', $path));
}
public static function invalidOperator(string $operator) : InvalidArgumentException
public static function invalidOperator(string $operator): self
{
return new self(sprintf('Invalid operator "%s" provided', $operator));
}

View File

@ -4,19 +4,21 @@ declare(strict_types=1);
namespace Phpml\Exception;
class MatrixException extends \Exception
use Exception;
class MatrixException extends Exception
{
public static function notSquareMatrix() : MatrixException
public static function notSquareMatrix(): self
{
return new self('Matrix is not square matrix');
}
public static function columnOutOfRange() : MatrixException
public static function columnOutOfRange(): self
{
return new self('Column out of range');
}
public static function singularMatrix() : MatrixException
public static function singularMatrix(): self
{
return new self('Matrix is singular');
}

View File

@ -4,9 +4,11 @@ declare(strict_types=1);
namespace Phpml\Exception;
class NormalizerException extends \Exception
use Exception;
class NormalizerException extends Exception
{
public static function unknownNorm() : NormalizerException
public static function unknownNorm(): self
{
return new self('Unknown norm supplied.');
}

View File

@ -4,14 +4,16 @@ declare(strict_types=1);
namespace Phpml\Exception;
class SerializeException extends \Exception
use Exception;
class SerializeException extends Exception
{
public static function cantUnserialize(string $filepath) : SerializeException
public static function cantUnserialize(string $filepath): self
{
return new self(sprintf('"%s" can not be unserialized.', $filepath));
}
public static function cantSerialize(string $classname) : SerializeException
public static function cantSerialize(string $classname): self
{
return new self(sprintf('Class "%s" can not be serialized.', $classname));
}

View File

@ -11,19 +11,19 @@ class StopWords
/**
* @var array
*/
protected $stopWords;
protected $stopWords = [];
public function __construct(array $stopWords)
{
$this->stopWords = array_fill_keys($stopWords, true);
}
public function isStopWord(string $token) : bool
public function isStopWord(string $token): bool
{
return isset($this->stopWords[$token]);
}
public static function factory(string $language = 'English') : StopWords
public static function factory(string $language = 'English'): self
{
$className = __NAMESPACE__."\\StopWords\\$language";

View File

@ -11,7 +11,7 @@ class TfIdfTransformer implements Transformer
/**
* @var array
*/
private $idf;
private $idf = [];
public function __construct(?array $samples = null)
{

View File

@ -27,21 +27,18 @@ class TokenCountVectorizer implements Transformer
/**
* @var array
*/
private $vocabulary;
private $vocabulary = [];
/**
* @var array
*/
private $frequencies;
private $frequencies = [];
public function __construct(Tokenizer $tokenizer, ?StopWords $stopWords = null, float $minDF = 0.0)
{
$this->tokenizer = $tokenizer;
$this->stopWords = $stopWords;
$this->minDF = $minDF;
$this->vocabulary = [];
$this->frequencies = [];
}
public function fit(array $samples): void
@ -58,7 +55,7 @@ class TokenCountVectorizer implements Transformer
$this->checkDocumentFrequency($samples);
}
public function getVocabulary() : array
public function getVocabulary(): array
{
return array_flip($this->vocabulary);
}
@ -80,7 +77,7 @@ class TokenCountVectorizer implements Transformer
foreach ($tokens as $token) {
$index = $this->getTokenIndex($token);
if (false !== $index) {
if ($index !== false) {
$this->updateFrequency($token);
if (!isset($counts[$index])) {
$counts[$index] = 0;
@ -155,7 +152,7 @@ class TokenCountVectorizer implements Transformer
}
}
private function getBeyondMinimumIndexes(int $samplesCount) : array
private function getBeyondMinimumIndexes(int $samplesCount): array
{
$indexes = [];
foreach ($this->frequencies as $token => $frequency) {

View File

@ -36,6 +36,18 @@ trait OneVsRest
$this->trainBylabel($samples, $targets);
}
/**
* Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers.
*/
public function reset(): void
{
$this->classifiers = [];
$this->allLabels = [];
$this->costValues = [];
$this->resetBinary();
}
protected function trainByLabel(array $samples, array $targets, array $allLabels = []): void
{
// Overwrites the current value if it exist. $allLabels must be provided for each partialTrain run.
@ -44,6 +56,7 @@ trait OneVsRest
} else {
$this->allLabels = array_keys(array_count_values($targets));
}
sort($this->allLabels, SORT_STRING);
// If there are only two targets, then there is no need to perform OvR
@ -77,18 +90,6 @@ trait OneVsRest
}
}
/**
* Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers.
*/
public function reset(): void
{
$this->classifiers = [];
$this->allLabels = [];
$this->costValues = [];
$this->resetBinary();
}
/**
* Returns an instance of the current class after cleaning up OneVsRest stuff.
*
@ -105,29 +106,6 @@ trait OneVsRest
return $classifier;
}
/**
* Groups all targets into two groups: Targets equal to
* the given label and the others
*
* $targets is not passed by reference nor contains objects so this method
* changes will not affect the caller $targets array.
*
* @param mixed $label
*
* @return array Binarized targets and target's labels
*/
private function binarizeTargets(array $targets, $label) : array
{
$notLabel = "not_$label";
foreach ($targets as $key => $target) {
$targets[$key] = $target == $label ? $label : $notLabel;
}
$labels = [$label, $notLabel];
return [$targets, $labels];
}
/**
* @return mixed
*/
@ -155,8 +133,6 @@ trait OneVsRest
/**
* To be overwritten by OneVsRest classifiers.
*
* @return void
*/
abstract protected function resetBinary(): void;
@ -174,4 +150,27 @@ trait OneVsRest
* @return mixed
*/
abstract protected function predictSampleBinary(array $sample);
/**
* Groups all targets into two groups: Targets equal to
* the given label and the others
*
* $targets is not passed by reference nor contains objects so this method
* changes will not affect the caller $targets array.
*
* @param mixed $label
*
* @return array Binarized targets and target's labels
*/
private function binarizeTargets(array $targets, $label): array
{
$notLabel = "not_$label";
foreach ($targets as $key => $target) {
$targets[$key] = $target == $label ? $label : $notLabel;
}
$labels = [$label, $notLabel];
return [$targets, $labels];
}
}

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
/**
* Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
* See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
@ -17,7 +19,7 @@ namespace Phpml\Helper\Optimizer;
*/
class ConjugateGradient extends GD
{
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{
$this->samples = $samples;
$this->targets = $targets;
@ -25,7 +27,7 @@ class ConjugateGradient extends GD
$this->sampleCount = count($samples);
$this->costValues = [];
$d = mp::muls($this->gradient($this->theta), -1);
$d = MP::muls($this->gradient($this->theta), -1);
for ($i = 0; $i < $this->maxIterations; ++$i) {
// Obtain α that minimizes f(θ + α.d)
@ -59,7 +61,7 @@ class ConjugateGradient extends GD
* Executes the callback function for the problem and returns
* sum of the gradient for all samples & targets.
*/
protected function gradient(array $theta) : array
protected function gradient(array $theta): array
{
[, $gradient] = parent::gradient($theta);
@ -69,7 +71,7 @@ class ConjugateGradient extends GD
/**
* Returns the value of f(x) for given solution
*/
protected function cost(array $theta) : float
protected function cost(array $theta): float
{
[$cost] = parent::gradient($theta);
@ -90,14 +92,14 @@ class ConjugateGradient extends GD
* b-1) If cost function decreases, continue enlarging alpha
* b-2) If cost function increases, take the midpoint and try again
*/
protected function getAlpha(float $d) : float
protected function getAlpha(float $d): float
{
$small = 0.0001 * $d;
$large = 0.01 * $d;
// Obtain θ + α.d for two initial values, x0 and x1
$x0 = mp::adds($this->theta, $small);
$x1 = mp::adds($this->theta, $large);
$x0 = MP::adds($this->theta, $small);
$x1 = MP::adds($this->theta, $large);
$epsilon = 0.0001;
$iteration = 0;
@ -113,9 +115,9 @@ class ConjugateGradient extends GD
if ($fx1 < $fx0) {
$x0 = $x1;
$x1 = mp::adds($x1, 0.01); // Enlarge second
$x1 = MP::adds($x1, 0.01); // Enlarge second
} else {
$x1 = mp::divs(mp::add($x1, $x0), 2.0);
$x1 = MP::divs(MP::add($x1, $x0), 2.0);
} // Get to the midpoint
$error = $fx1 / $this->dimensions;
@ -135,7 +137,7 @@ class ConjugateGradient extends GD
*
* θ(k+1) = θ(k) + α.d
*/
protected function getNewTheta(float $alpha, array $d) : array
protected function getNewTheta(float $alpha, array $d): array
{
$theta = $this->theta;
@ -164,7 +166,7 @@ class ConjugateGradient extends GD
* See:
* R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149154.
*/
protected function getBeta(array $newTheta) : float
protected function getBeta(array $newTheta): float
{
$dNew = array_sum($this->gradient($newTheta));
$dOld = array_sum($this->gradient($this->theta)) + 1e-100;
@ -177,11 +179,11 @@ class ConjugateGradient extends GD
*
* d(k+1) =∇f(x(k+1)) + β(k).d(k)
*/
protected function getNewDirection(array $theta, float $beta, array $d) : array
protected function getNewDirection(array $theta, float $beta, array $d): array
{
$grad = $this->gradient($theta);
return mp::add(mp::muls($grad, -1), mp::muls($d, $beta));
return MP::add(MP::muls($grad, -1), MP::muls($d, $beta));
}
}
@ -189,12 +191,12 @@ class ConjugateGradient extends GD
* Handles element-wise vector operations between vector-vector
* and vector-scalar variables
*/
class mp
class MP
{
/**
* Element-wise <b>multiplication</b> of two vectors of the same size
*/
public static function mul(array $m1, array $m2) : array
public static function mul(array $m1, array $m2): array
{
$res = [];
foreach ($m1 as $i => $val) {
@ -207,7 +209,7 @@ class mp
/**
* Element-wise <b>division</b> of two vectors of the same size
*/
public static function div(array $m1, array $m2) : array
public static function div(array $m1, array $m2): array
{
$res = [];
foreach ($m1 as $i => $val) {
@ -220,7 +222,7 @@ class mp
/**
* Element-wise <b>addition</b> of two vectors of the same size
*/
public static function add(array $m1, array $m2, int $mag = 1) : array
public static function add(array $m1, array $m2, int $mag = 1): array
{
$res = [];
foreach ($m1 as $i => $val) {
@ -233,7 +235,7 @@ class mp
/**
* Element-wise <b>subtraction</b> of two vectors of the same size
*/
public static function sub(array $m1, array $m2) : array
public static function sub(array $m1, array $m2): array
{
return self::add($m1, $m2, -1);
}
@ -241,7 +243,7 @@ class mp
/**
* Element-wise <b>multiplication</b> of a vector with a scalar
*/
public static function muls(array $m1, float $m2) : array
public static function muls(array $m1, float $m2): array
{
$res = [];
foreach ($m1 as $val) {
@ -254,7 +256,7 @@ class mp
/**
* Element-wise <b>division</b> of a vector with a scalar
*/
public static function divs(array $m1, float $m2) : array
public static function divs(array $m1, float $m2): array
{
$res = [];
foreach ($m1 as $val) {
@ -267,7 +269,7 @@ class mp
/**
* Element-wise <b>addition</b> of a vector with a scalar
*/
public static function adds(array $m1, float $m2, int $mag = 1) : array
public static function adds(array $m1, float $m2, int $mag = 1): array
{
$res = [];
foreach ($m1 as $val) {
@ -280,7 +282,7 @@ class mp
/**
* Element-wise <b>subtraction</b> of a vector with a scalar
*/
public static function subs(array $m1, float $m2) : array
public static function subs(array $m1, float $m2): array
{
return self::adds($m1, $m2, -1);
}

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
/**
* Batch version of Gradient Descent to optimize the weights
* of a classifier given samples, targets and the objective function to minimize
@ -17,7 +19,7 @@ class GD extends StochasticGD
*/
protected $sampleCount = null;
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{
$this->samples = $samples;
$this->targets = $targets;
@ -51,7 +53,7 @@ class GD extends StochasticGD
* Calculates gradient, cost function and penalty term for each sample
* then returns them as an array of values
*/
protected function gradient(array $theta) : array
protected function gradient(array $theta): array
{
$costs = [];
$gradient = [];

View File

@ -4,6 +4,9 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
use Exception;
abstract class Optimizer
{
/**
@ -11,7 +14,7 @@ abstract class Optimizer
*
* @var array
*/
protected $theta;
protected $theta = [];
/**
* Number of dimensions
@ -30,7 +33,7 @@ abstract class Optimizer
// Inits the weights randomly
$this->theta = [];
for ($i = 0; $i < $this->dimensions; ++$i) {
$this->theta[] = rand() / (float) getrandmax();
$this->theta[] = random_int(0, getrandmax()) / (float) getrandmax();
}
}
@ -44,7 +47,7 @@ abstract class Optimizer
public function setInitialTheta(array $theta)
{
if (count($theta) != $this->dimensions) {
throw new \Exception("Number of values in the weights array should be $this->dimensions");
throw new Exception("Number of values in the weights array should be $this->dimensions");
}
$this->theta = $theta;
@ -56,5 +59,5 @@ abstract class Optimizer
* Executes the optimization with the given samples & targets
* and returns the weights
*/
abstract public function runOptimization(array $samples, array $targets, \Closure $gradientCb);
abstract public function runOptimization(array $samples, array $targets, Closure $gradientCb);
}

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
use Closure;
/**
* Stochastic Gradient Descent optimization method
* to find a solution for the equation A.ϴ = y where
@ -66,6 +68,7 @@ class StochasticGD extends Optimizer
* @var bool
*/
protected $enableEarlyStop = true;
/**
* List of values obtained by evaluating the cost function at each iteration
* of the algorithm
@ -141,7 +144,7 @@ class StochasticGD extends Optimizer
* The cost function to minimize and the gradient of the function are to be
* handled by the callback function provided as the third parameter of the method.
*/
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{
$this->samples = $samples;
$this->targets = $targets;
@ -181,7 +184,16 @@ class StochasticGD extends Optimizer
return $this->theta = $bestTheta;
}
protected function updateTheta() : float
/**
* Returns the list of cost values for each iteration executed in
* last run of the optimization
*/
public function getCostValues(): array
{
return $this->costValues;
}
protected function updateTheta(): float
{
$jValue = 0.0;
$theta = $this->theta;
@ -237,15 +249,6 @@ class StochasticGD extends Optimizer
return false;
}
/**
* Returns the list of cost values for each iteration executed in
* last run of the optimization
*/
public function getCostValues() : array
{
return $this->costValues;
}
/**
* Clears the optimizer internal vars after the optimization process.
*/

View File

@ -10,5 +10,5 @@ interface Distance
* @param array $a
* @param array $b
*/
public function distance(array $a, array $b) : float;
public function distance(array $a, array $b): float;
}

View File

@ -12,7 +12,7 @@ class Chebyshev implements Distance
/**
* @throws InvalidArgumentException
*/
public function distance(array $a, array $b) : float
public function distance(array $a, array $b): float
{
if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -12,7 +12,7 @@ class Euclidean implements Distance
/**
* @throws InvalidArgumentException
*/
public function distance(array $a, array $b) : float
public function distance(array $a, array $b): float
{
if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch();
@ -30,7 +30,7 @@ class Euclidean implements Distance
/**
* Square of Euclidean distance
*/
public function sqDistance(array $a, array $b) : float
public function sqDistance(array $a, array $b): float
{
return $this->distance($a, $b) ** 2;
}

View File

@ -12,7 +12,7 @@ class Manhattan implements Distance
/**
* @throws InvalidArgumentException
*/
public function distance(array $a, array $b) : float
public function distance(array $a, array $b): float
{
if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -22,7 +22,7 @@ class Minkowski implements Distance
/**
* @throws InvalidArgumentException
*/
public function distance(array $a, array $b) : float
public function distance(array $a, array $b): float
{
if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -7,10 +7,10 @@ namespace Phpml\Math;
interface Kernel
{
/**
* @param float $a
* @param float $b
* @param float|array $a
* @param float|array $b
*
* @return float
* @return float|array
*/
public function compute($a, $b);
}

View File

@ -23,12 +23,11 @@ class RBF implements Kernel
* @param array $a
* @param array $b
*/
public function compute($a, $b)
public function compute($a, $b): float
{
$score = 2 * Product::scalar($a, $b);
$squares = Product::scalar($a, $a) + Product::scalar($b, $b);
$result = exp(-$this->gamma * ($squares - $score));
return $result;
return exp(-$this->gamma * ($squares - $score));
}
}

View File

@ -1,6 +1,7 @@
<?php
declare(strict_types=1);
/**
* Class to obtain eigenvalues and eigenvectors of a real matrix.
*
@ -54,6 +55,7 @@ class EigenvalueDecomposition
* @var array
*/
private $d = [];
private $e = [];
/**
@ -75,7 +77,7 @@ class EigenvalueDecomposition
*
* @var array
*/
private $ort;
private $ort = [];
/**
* Used for complex scalar division.
@ -83,6 +85,7 @@ class EigenvalueDecomposition
* @var float
*/
private $cdivr;
private $cdivi;
/**
@ -116,6 +119,71 @@ class EigenvalueDecomposition
}
}
/**
* Return the eigenvector matrix
*/
public function getEigenvectors(): array
{
$vectors = $this->V;
// Always return the eigenvectors of length 1.0
$vectors = new Matrix($vectors);
$vectors = array_map(function ($vect) {
$sum = 0;
for ($i = 0; $i < count($vect); ++$i) {
$sum += $vect[$i] ** 2;
}
$sum = sqrt($sum);
for ($i = 0; $i < count($vect); ++$i) {
$vect[$i] /= $sum;
}
return $vect;
}, $vectors->transpose()->toArray());
return $vectors;
}
/**
* Return the real parts of the eigenvalues<br>
* d = real(diag(D));
*/
public function getRealEigenvalues(): array
{
return $this->d;
}
/**
* Return the imaginary parts of the eigenvalues <br>
* d = imag(diag(D))
*/
public function getImagEigenvalues(): array
{
return $this->e;
}
/**
* Return the block diagonal eigenvalue matrix
*/
public function getDiagonalEigenvalues(): array
{
$D = [];
for ($i = 0; $i < $this->n; ++$i) {
$D[$i] = array_fill(0, $this->n, 0.0);
$D[$i][$i] = $this->d[$i];
if ($this->e[$i] == 0) {
continue;
}
$o = ($this->e[$i] > 0) ? $i + 1 : $i - 1;
$D[$i][$o] = $this->e[$i];
}
return $D;
}
/**
* Symmetric Householder reduction to tridiagonal form.
*/
@ -158,6 +226,7 @@ class EigenvalueDecomposition
for ($j = 0; $j < $i; ++$j) {
$this->e[$j] = 0.0;
}
// Apply similarity transformation to remaining columns.
for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j];
@ -168,6 +237,7 @@ class EigenvalueDecomposition
$g += $this->V[$k][$j] * $this->d[$k];
$this->e[$k] += $this->V[$k][$j] * $f;
}
$this->e[$j] = $g;
}
@ -185,16 +255,19 @@ class EigenvalueDecomposition
for ($j = 0; $j < $i; ++$j) {
$this->e[$j] -= $hh * $this->d[$j];
}
for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j];
$g = $this->e[$j];
for ($k = $j; $k <= $i_; ++$k) {
$this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]);
}
$this->d[$j] = $this->V[$i - 1][$j];
$this->V[$i][$j] = 0.0;
}
}
$this->d[$i] = $h;
}
@ -207,16 +280,19 @@ class EigenvalueDecomposition
for ($k = 0; $k <= $i; ++$k) {
$this->d[$k] = $this->V[$k][$i + 1] / $h;
}
for ($j = 0; $j <= $i; ++$j) {
$g = 0.0;
for ($k = 0; $k <= $i; ++$k) {
$g += $this->V[$k][$i + 1] * $this->V[$k][$j];
}
for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$j] -= $g * $this->d[$k];
}
}
}
for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$i + 1] = 0.0;
}
@ -241,6 +317,7 @@ class EigenvalueDecomposition
for ($i = 1; $i < $this->n; ++$i) {
$this->e[$i - 1] = $this->e[$i];
}
$this->e[$this->n - 1] = 0.0;
$f = 0.0;
$tst1 = 0.0;
@ -254,8 +331,10 @@ class EigenvalueDecomposition
if (abs($this->e[$m]) <= $eps * $tst1) {
break;
}
++$m;
}
// If m == l, $this->d[l] is an eigenvalue,
// otherwise, iterate.
if ($m > $l) {
@ -270,6 +349,7 @@ class EigenvalueDecomposition
if ($p < 0) {
$r *= -1;
}
$this->d[$l] = $this->e[$l] / ($p + $r);
$this->d[$l + 1] = $this->e[$l] * ($p + $r);
$dl1 = $this->d[$l + 1];
@ -277,6 +357,7 @@ class EigenvalueDecomposition
for ($i = $l + 2; $i < $this->n; ++$i) {
$this->d[$i] -= $h;
}
$f += $h;
// Implicit QL transformation.
$p = $this->d[$m];
@ -303,12 +384,14 @@ class EigenvalueDecomposition
$this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h;
}
}
$p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1;
$this->e[$l] = $s * $p;
$this->d[$l] = $c * $p;
// Check for convergence.
} while (abs($this->e[$l]) > $eps * $tst1);
}
$this->d[$l] = $this->d[$l] + $f;
$this->e[$l] = 0.0;
}
@ -323,6 +406,7 @@ class EigenvalueDecomposition
$p = $this->d[$j];
}
}
if ($k != $i) {
$this->d[$k] = $this->d[$i];
$this->d[$i] = $p;
@ -354,6 +438,7 @@ class EigenvalueDecomposition
for ($i = $m; $i <= $high; ++$i) {
$scale = $scale + abs($this->H[$i][$m - 1]);
}
if ($scale != 0.0) {
// Compute Householder transformation.
$h = 0.0;
@ -361,10 +446,12 @@ class EigenvalueDecomposition
$this->ort[$i] = $this->H[$i][$m - 1] / $scale;
$h += $this->ort[$i] * $this->ort[$i];
}
$g = sqrt($h);
if ($this->ort[$m] > 0) {
$g *= -1;
}
$h -= $this->ort[$m] * $g;
$this->ort[$m] -= $g;
// Apply Householder similarity transformation
@ -374,21 +461,25 @@ class EigenvalueDecomposition
for ($i = $high; $i >= $m; --$i) {
$f += $this->ort[$i] * $this->H[$i][$j];
}
$f /= $h;
for ($i = $m; $i <= $high; ++$i) {
$this->H[$i][$j] -= $f * $this->ort[$i];
}
}
for ($i = 0; $i <= $high; ++$i) {
$f = 0.0;
for ($j = $high; $j >= $m; --$j) {
$f += $this->ort[$j] * $this->H[$i][$j];
}
$f = $f / $h;
for ($j = $m; $j <= $high; ++$j) {
$this->H[$i][$j] -= $f * $this->ort[$j];
}
}
$this->ort[$m] = $scale * $this->ort[$m];
$this->H[$m][$m - 1] = $scale * $g;
}
@ -400,16 +491,19 @@ class EigenvalueDecomposition
$this->V[$i][$j] = ($i == $j ? 1.0 : 0.0);
}
}
for ($m = $high - 1; $m >= $low + 1; --$m) {
if ($this->H[$m][$m - 1] != 0.0) {
for ($i = $m + 1; $i <= $high; ++$i) {
$this->ort[$i] = $this->H[$i][$m - 1];
}
for ($j = $m; $j <= $high; ++$j) {
$g = 0.0;
for ($i = $m; $i <= $high; ++$i) {
$g += $this->ort[$i] * $this->V[$i][$j];
}
// Double division avoids possible underflow
$g = ($g / $this->ort[$m]) / $this->H[$m][$m - 1];
for ($i = $m; $i <= $high; ++$i) {
@ -469,6 +563,7 @@ class EigenvalueDecomposition
$this->d[$i] = $this->H[$i][$i];
$this->e[$i] = 0.0;
}
for ($j = max($i - 1, 0); $j < $nn; ++$j) {
$norm = $norm + abs($this->H[$i][$j]);
}
@ -484,11 +579,14 @@ class EigenvalueDecomposition
if ($s == 0.0) {
$s = $norm;
}
if (abs($this->H[$l][$l - 1]) < $eps * $s) {
break;
}
--$l;
}
// Check for convergence
// One root found
if ($l == $n) {
@ -513,11 +611,13 @@ class EigenvalueDecomposition
} else {
$z = $p - $z;
}
$this->d[$n - 1] = $x + $z;
$this->d[$n] = $this->d[$n - 1];
if ($z != 0.0) {
$this->d[$n] = $x - $w / $z;
}
$this->e[$n - 1] = 0.0;
$this->e[$n] = 0.0;
$x = $this->H[$n][$n - 1];
@ -533,18 +633,21 @@ class EigenvalueDecomposition
$this->H[$n - 1][$j] = $q * $z + $p * $this->H[$n][$j];
$this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z;
}
// Column modification
for ($i = 0; $i <= $n; ++$i) {
$z = $this->H[$i][$n - 1];
$this->H[$i][$n - 1] = $q * $z + $p * $this->H[$i][$n];
$this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z;
}
// Accumulate transformations
for ($i = $low; $i <= $high; ++$i) {
$z = $this->V[$i][$n - 1];
$this->V[$i][$n - 1] = $q * $z + $p * $this->V[$i][$n];
$this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z;
}
// Complex pair
} else {
$this->d[$n - 1] = $x + $p;
@ -552,6 +655,7 @@ class EigenvalueDecomposition
$this->e[$n - 1] = $z;
$this->e[$n] = -$z;
}
$n = $n - 2;
$iter = 0;
// No convergence yet
@ -564,16 +668,19 @@ class EigenvalueDecomposition
$y = $this->H[$n - 1][$n - 1];
$w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n];
}
// Wilkinson's original ad hoc shift
if ($iter == 10) {
$exshift += $x;
for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $x;
}
$s = abs($this->H[$n][$n - 1]) + abs($this->H[$n - 1][$n - 2]);
$x = $y = 0.75 * $s;
$w = -0.4375 * $s * $s;
}
// MATLAB's new ad hoc shift
if ($iter == 30) {
$s = ($y - $x) / 2.0;
@ -583,14 +690,17 @@ class EigenvalueDecomposition
if ($y < $x) {
$s = -$s;
}
$s = $x - $w / (($y - $x) / 2.0 + $s);
for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $s;
}
$exshift += $s;
$x = $y = $w = 0.964;
}
}
// Could check iteration count here.
$iter = $iter + 1;
// Look for two consecutive small sub-diagonal elements
@ -609,18 +719,22 @@ class EigenvalueDecomposition
if ($m == $l) {
break;
}
if (abs($this->H[$m][$m - 1]) * (abs($q) + abs($r)) <
$eps * (abs($p) * (abs($this->H[$m - 1][$m - 1]) + abs($z) + abs($this->H[$m + 1][$m + 1])))) {
break;
}
--$m;
}
for ($i = $m + 2; $i <= $n; ++$i) {
$this->H[$i][$i - 2] = 0.0;
if ($i > $m + 2) {
$this->H[$i][$i - 3] = 0.0;
}
}
// Double QR step involving rows l:n and columns m:n
for ($k = $m; $k <= $n - 1; ++$k) {
$notlast = ($k != $n - 1);
@ -635,19 +749,23 @@ class EigenvalueDecomposition
$r = $r / $x;
}
}
if ($x == 0.0) {
break;
}
$s = sqrt($p * $p + $q * $q + $r * $r);
if ($p < 0) {
$s = -$s;
}
if ($s != 0) {
if ($k != $m) {
$this->H[$k][$k - 1] = -$s * $x;
} elseif ($l != $m) {
$this->H[$k][$k - 1] = -$this->H[$k][$k - 1];
}
$p = $p + $s;
$x = $p / $s;
$y = $q / $s;
@ -661,9 +779,11 @@ class EigenvalueDecomposition
$p = $p + $r * $this->H[$k + 2][$j];
$this->H[$k + 2][$j] = $this->H[$k + 2][$j] - $p * $z;
}
$this->H[$k][$j] = $this->H[$k][$j] - $p * $x;
$this->H[$k + 1][$j] = $this->H[$k + 1][$j] - $p * $y;
}
// Column modification
for ($i = 0; $i <= min($n, $k + 3); ++$i) {
$p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k + 1];
@ -671,9 +791,11 @@ class EigenvalueDecomposition
$p = $p + $z * $this->H[$i][$k + 2];
$this->H[$i][$k + 2] = $this->H[$i][$k + 2] - $p * $r;
}
$this->H[$i][$k] = $this->H[$i][$k] - $p;
$this->H[$i][$k + 1] = $this->H[$i][$k + 1] - $p * $q;
}
// Accumulate transformations
for ($i = $low; $i <= $high; ++$i) {
$p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k + 1];
@ -681,6 +803,7 @@ class EigenvalueDecomposition
$p = $p + $z * $this->V[$i][$k + 2];
$this->V[$i][$k + 2] = $this->V[$i][$k + 2] - $p * $r;
}
$this->V[$i][$k] = $this->V[$i][$k] - $p;
$this->V[$i][$k + 1] = $this->V[$i][$k + 1] - $p * $q;
}
@ -719,6 +842,7 @@ class EigenvalueDecomposition
} else {
$this->H[$i][$n] = -$r / ($eps * $norm);
}
// Solve real equations
} else {
$x = $this->H[$i][$i + 1];
@ -732,6 +856,7 @@ class EigenvalueDecomposition
$this->H[$i + 1][$n] = (-$s - $y * $t) / $z;
}
}
// Overflow control
$t = abs($this->H[$i][$n]);
if (($eps * $t) * $t > 1) {
@ -741,6 +866,7 @@ class EigenvalueDecomposition
}
}
}
// Complex vector
} elseif ($q < 0) {
$l = $n - 1;
@ -753,6 +879,7 @@ class EigenvalueDecomposition
$this->H[$n - 1][$n - 1] = $this->cdivr;
$this->H[$n - 1][$n] = $this->cdivi;
}
$this->H[$n][$n - 1] = 0.0;
$this->H[$n][$n] = 1.0;
for ($i = $n - 2; $i >= 0; --$i) {
@ -763,6 +890,7 @@ class EigenvalueDecomposition
$ra = $ra + $this->H[$i][$j] * $this->H[$j][$n - 1];
$sa = $sa + $this->H[$i][$j] * $this->H[$j][$n];
}
$w = $this->H[$i][$i] - $p;
if ($this->e[$i] < 0.0) {
$z = $w;
@ -783,6 +911,7 @@ class EigenvalueDecomposition
if ($vr == 0.0 & $vi == 0.0) {
$vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z));
}
$this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi);
$this->H[$i][$n - 1] = $this->cdivr;
$this->H[$i][$n] = $this->cdivi;
@ -795,6 +924,7 @@ class EigenvalueDecomposition
$this->H[$i + 1][$n] = $this->cdivi;
}
}
// Overflow control
$t = max(abs($this->H[$i][$n - 1]), abs($this->H[$i][$n]));
if (($eps * $t) * $t > 1) {
@ -824,81 +954,9 @@ class EigenvalueDecomposition
for ($k = $low; $k <= min($j, $high); ++$k) {
$z = $z + $this->V[$i][$k] * $this->H[$k][$j];
}
$this->V[$i][$j] = $z;
}
}
}
/**
* Return the eigenvector matrix
*
* @return array
*/
public function getEigenvectors()
{
$vectors = $this->V;
// Always return the eigenvectors of length 1.0
$vectors = new Matrix($vectors);
$vectors = array_map(function ($vect) {
$sum = 0;
for ($i = 0; $i < count($vect); ++$i) {
$sum += $vect[$i] ** 2;
}
$sum = sqrt($sum);
for ($i = 0; $i < count($vect); ++$i) {
$vect[$i] /= $sum;
}
return $vect;
}, $vectors->transpose()->toArray());
return $vectors;
}
/**
* Return the real parts of the eigenvalues<br>
* d = real(diag(D));
*
* @return array
*/
public function getRealEigenvalues()
{
return $this->d;
}
/**
* Return the imaginary parts of the eigenvalues <br>
* d = imag(diag(D))
*
* @return array
*/
public function getImagEigenvalues()
{
return $this->e;
}
/**
* Return the block diagonal eigenvalue matrix
*
* @return array
*/
public function getDiagonalEigenvalues()
{
$D = [];
for ($i = 0; $i < $this->n; ++$i) {
$D[$i] = array_fill(0, $this->n, 0.0);
$D[$i][$i] = $this->d[$i];
if ($this->e[$i] == 0) {
continue;
}
$o = ($this->e[$i] > 0) ? $i + 1 : $i - 1;
$D[$i][$o] = $this->e[$i];
}
return $D;
}
}

View File

@ -1,6 +1,7 @@
<?php
declare(strict_types=1);
/**
* @package JAMA
*
@ -90,6 +91,7 @@ class LUDecomposition
for ($i = 0; $i < $this->m; ++$i) {
$this->piv[$i] = $i;
}
$this->pivsign = 1;
$LUcolj = [];
@ -99,6 +101,7 @@ class LUDecomposition
for ($i = 0; $i < $this->m; ++$i) {
$LUcolj[$i] = &$this->LU[$i][$j];
}
// Apply previous transformations.
for ($i = 0; $i < $this->m; ++$i) {
$LUrowi = $this->LU[$i];
@ -108,8 +111,10 @@ class LUDecomposition
for ($k = 0; $k < $kmax; ++$k) {
$s += $LUrowi[$k] * $LUcolj[$k];
}
$LUrowi[$j] = $LUcolj[$i] -= $s;
}
// Find pivot and exchange if necessary.
$p = $j;
for ($i = $j + 1; $i < $this->m; ++$i) {
@ -117,17 +122,20 @@ class LUDecomposition
$p = $i;
}
}
if ($p != $j) {
for ($k = 0; $k < $this->n; ++$k) {
$t = $this->LU[$p][$k];
$this->LU[$p][$k] = $this->LU[$j][$k];
$this->LU[$j][$k] = $t;
}
$k = $this->piv[$p];
$this->piv[$p] = $this->piv[$j];
$this->piv[$j] = $k;
$this->pivsign = $this->pivsign * -1;
}
// Compute multipliers.
if (($j < $this->m) && ($this->LU[$j][$j] != 0.0)) {
for ($i = $j + 1; $i < $this->m; ++$i) {
@ -142,7 +150,7 @@ class LUDecomposition
*
* @return Matrix Lower triangular factor
*/
public function getL() : Matrix
public function getL(): Matrix
{
$L = [];
for ($i = 0; $i < $this->m; ++$i) {
@ -165,7 +173,7 @@ class LUDecomposition
*
* @return Matrix Upper triangular factor
*/
public function getU() : Matrix
public function getU(): Matrix
{
$U = [];
for ($i = 0; $i < $this->n; ++$i) {
@ -186,7 +194,7 @@ class LUDecomposition
*
* @return array Pivot vector
*/
public function getPivot() : array
public function getPivot(): array
{
return $this->piv;
}
@ -247,7 +255,7 @@ class LUDecomposition
*
* @throws MatrixException
*/
public function solve(Matrix $B) : array
public function solve(Matrix $B): array
{
if ($B->getRows() != $this->m) {
throw MatrixException::notSquareMatrix();
@ -268,11 +276,13 @@ class LUDecomposition
}
}
}
// Solve U*X = Y;
for ($k = $this->n - 1; $k >= 0; --$k) {
for ($j = 0; $j < $nx; ++$j) {
$X[$k][$j] /= $this->LU[$k][$k];
}
for ($i = 0; $i < $k; ++$i) {
for ($j = 0; $j < $nx; ++$j) {
$X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k];
@ -283,7 +293,7 @@ class LUDecomposition
return $X;
}
protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF) : array
protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF): array
{
$m = count($RL);
$n = $jF - $j0;

View File

@ -13,7 +13,7 @@ class Matrix
/**
* @var array
*/
private $matrix;
private $matrix = [];
/**
* @var int
@ -56,7 +56,7 @@ class Matrix
$this->matrix = $matrix;
}
public static function fromFlatArray(array $array) : Matrix
public static function fromFlatArray(array $array): self
{
$matrix = [];
foreach ($array as $value) {
@ -66,12 +66,12 @@ class Matrix
return new self($matrix);
}
public function toArray() : array
public function toArray(): array
{
return $this->matrix;
}
public function toScalar() : float
public function toScalar(): float
{
return $this->matrix[0][0];
}
@ -89,7 +89,7 @@ class Matrix
/**
* @throws MatrixException
*/
public function getColumnValues($column) : array
public function getColumnValues($column): array
{
if ($column >= $this->columns) {
throw MatrixException::columnOutOfRange();
@ -123,7 +123,7 @@ class Matrix
return $this->columns === $this->rows;
}
public function transpose() : Matrix
public function transpose(): self
{
if ($this->rows == 1) {
$matrix = array_map(function ($el) {
@ -136,7 +136,7 @@ class Matrix
return new self($matrix, false);
}
public function multiply(Matrix $matrix) : Matrix
public function multiply(self $matrix): self
{
if ($this->columns != $matrix->getRows()) {
throw InvalidArgumentException::inconsistentMatrixSupplied();
@ -157,7 +157,7 @@ class Matrix
return new self($product, false);
}
public function divideByScalar($value) : Matrix
public function divideByScalar($value): self
{
$newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) {
@ -169,7 +169,7 @@ class Matrix
return new self($newMatrix, false);
}
public function multiplyByScalar($value) : Matrix
public function multiplyByScalar($value): self
{
$newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) {
@ -184,7 +184,7 @@ class Matrix
/**
* Element-wise addition of the matrix with another one
*/
public function add(Matrix $other) : Matrix
public function add(self $other): self
{
return $this->_add($other);
}
@ -192,15 +192,74 @@ class Matrix
/**
* Element-wise subtracting of another matrix from this one
*/
public function subtract(Matrix $other) : Matrix
public function subtract(self $other): self
{
return $this->_add($other, -1);
}
public function inverse(): self
{
if (!$this->isSquare()) {
throw MatrixException::notSquareMatrix();
}
$LU = new LUDecomposition($this);
$identity = $this->getIdentity();
$inverse = $LU->solve($identity);
return new self($inverse, false);
}
public function crossOut(int $row, int $column): self
{
$newMatrix = [];
$r = 0;
for ($i = 0; $i < $this->rows; ++$i) {
$c = 0;
if ($row != $i) {
for ($j = 0; $j < $this->columns; ++$j) {
if ($column != $j) {
$newMatrix[$r][$c] = $this->matrix[$i][$j];
++$c;
}
}
++$r;
}
}
return new self($newMatrix, false);
}
public function isSingular(): bool
{
return $this->getDeterminant() == 0;
}
/**
* Returns the transpose of given array
*/
public static function transposeArray(array $array): array
{
return (new self($array, false))->transpose()->toArray();
}
/**
* Returns the dot product of two arrays<br>
* Matrix::dot(x, y) ==> x.y'
*/
public static function dot(array $array1, array $array2): array
{
$m1 = new self($array1, false);
$m2 = new self($array2, false);
return $m1->multiply($m2->transpose())->toArray()[0];
}
/**
* Element-wise addition or substraction depending on the given sign parameter
*/
protected function _add(Matrix $other, int $sign = 1) : Matrix
protected function _add(self $other, int $sign = 1): self
{
$a1 = $this->toArray();
$a2 = $other->toArray();
@ -215,23 +274,10 @@ class Matrix
return new self($newMatrix, false);
}
public function inverse() : Matrix
{
if (!$this->isSquare()) {
throw MatrixException::notSquareMatrix();
}
$LU = new LUDecomposition($this);
$identity = $this->getIdentity();
$inverse = $LU->solve($identity);
return new self($inverse, false);
}
/**
* Returns diagonal identity matrix of the same size of this matrix
*/
protected function getIdentity() : Matrix
protected function getIdentity(): self
{
$array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0));
for ($i = 0; $i < $this->rows; ++$i) {
@ -240,49 +286,4 @@ class Matrix
return new self($array, false);
}
public function crossOut(int $row, int $column) : Matrix
{
$newMatrix = [];
$r = 0;
for ($i = 0; $i < $this->rows; ++$i) {
$c = 0;
if ($row != $i) {
for ($j = 0; $j < $this->columns; ++$j) {
if ($column != $j) {
$newMatrix[$r][$c] = $this->matrix[$i][$j];
++$c;
}
}
++$r;
}
}
return new self($newMatrix, false);
}
public function isSingular() : bool
{
return 0 == $this->getDeterminant();
}
/**
* Returns the transpose of given array
*/
public static function transposeArray(array $array) : array
{
return (new self($array, false))->transpose()->toArray();
}
/**
* Returns the dot product of two arrays<br>
* Matrix::dot(x, y) ==> x.y'
*/
public static function dot(array $array1, array $array2) : array
{
$m1 = new self($array1, false);
$m2 = new self($array2, false);
return $m1->multiply($m2->transpose())->toArray()[0];
}
}

View File

@ -4,12 +4,15 @@ declare(strict_types=1);
namespace Phpml\Math;
class Set implements \IteratorAggregate
use ArrayIterator;
use IteratorAggregate;
class Set implements IteratorAggregate
{
/**
* @var string[]|int[]|float[]
*/
private $elements;
private $elements = [];
/**
* @param string[]|int[]|float[] $elements
@ -22,7 +25,7 @@ class Set implements \IteratorAggregate
/**
* Creates the union of A and B.
*/
public static function union(Set $a, Set $b) : Set
public static function union(self $a, self $b): self
{
return new self(array_merge($a->toArray(), $b->toArray()));
}
@ -30,7 +33,7 @@ class Set implements \IteratorAggregate
/**
* Creates the intersection of A and B.
*/
public static function intersection(Set $a, Set $b) : Set
public static function intersection(self $a, self $b): self
{
return new self(array_intersect($a->toArray(), $b->toArray()));
}
@ -38,7 +41,7 @@ class Set implements \IteratorAggregate
/**
* Creates the difference of A and B.
*/
public static function difference(Set $a, Set $b) : Set
public static function difference(self $a, self $b): self
{
return new self(array_diff($a->toArray(), $b->toArray()));
}
@ -48,7 +51,7 @@ class Set implements \IteratorAggregate
*
* @return Set[]
*/
public static function cartesian(Set $a, Set $b) : array
public static function cartesian(self $a, self $b): array
{
$cartesian = [];
@ -66,7 +69,7 @@ class Set implements \IteratorAggregate
*
* @return Set[]
*/
public static function power(Set $a) : array
public static function power(self $a): array
{
$power = [new self()];
@ -79,24 +82,10 @@ class Set implements \IteratorAggregate
return $power;
}
/**
* Removes duplicates and rewrites index.
*
* @param string[]|int[]|float[] $elements
*
* @return string[]|int[]|float[]
*/
private static function sanitize(array $elements) : array
{
sort($elements, SORT_ASC);
return array_values(array_unique($elements, SORT_ASC));
}
/**
* @param string|int|float $element
*/
public function add($element) : Set
public function add($element): self
{
return $this->addAll([$element]);
}
@ -104,7 +93,7 @@ class Set implements \IteratorAggregate
/**
* @param string[]|int[]|float[] $elements
*/
public function addAll(array $elements) : Set
public function addAll(array $elements): self
{
$this->elements = self::sanitize(array_merge($this->elements, $elements));
@ -114,7 +103,7 @@ class Set implements \IteratorAggregate
/**
* @param string|int|float $element
*/
public function remove($element) : Set
public function remove($element): self
{
return $this->removeAll([$element]);
}
@ -122,7 +111,7 @@ class Set implements \IteratorAggregate
/**
* @param string[]|int[]|float[] $elements
*/
public function removeAll(array $elements) : Set
public function removeAll(array $elements): self
{
$this->elements = self::sanitize(array_diff($this->elements, $elements));
@ -132,7 +121,7 @@ class Set implements \IteratorAggregate
/**
* @param string|int|float $element
*/
public function contains($element) : bool
public function contains($element): bool
{
return $this->containsAll([$element]);
}
@ -140,7 +129,7 @@ class Set implements \IteratorAggregate
/**
* @param string[]|int[]|float[] $elements
*/
public function containsAll(array $elements) : bool
public function containsAll(array $elements): bool
{
return !array_diff($elements, $this->elements);
}
@ -148,23 +137,37 @@ class Set implements \IteratorAggregate
/**
* @return string[]|int[]|float[]
*/
public function toArray() : array
public function toArray(): array
{
return $this->elements;
}
public function getIterator() : \ArrayIterator
public function getIterator(): ArrayIterator
{
return new \ArrayIterator($this->elements);
return new ArrayIterator($this->elements);
}
public function isEmpty() : bool
public function isEmpty(): bool
{
return $this->cardinality() == 0;
}
public function cardinality() : int
public function cardinality(): int
{
return count($this->elements);
}
/**
* Removes duplicates and rewrites index.
*
* @param string[]|int[]|float[] $elements
*
* @return string[]|int[]|float[]
*/
private static function sanitize(array $elements): array
{
sort($elements, SORT_ASC);
return array_values(array_unique($elements, SORT_ASC));
}
}

View File

@ -14,7 +14,7 @@ class Correlation
*
* @throws InvalidArgumentException
*/
public static function pearson(array $x, array $y) : float
public static function pearson(array $x, array $y): float
{
if (count($x) !== count($y)) {
throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Math\Statistic;
use Exception;
use Phpml\Exception\InvalidArgumentException;
class Covariance
@ -13,7 +14,7 @@ class Covariance
*
* @throws InvalidArgumentException
*/
public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null) : float
public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
{
if (empty($x) || empty($y)) {
throw InvalidArgumentException::arrayCantBeEmpty();
@ -51,7 +52,7 @@ class Covariance
* @throws InvalidArgumentException
* @throws \Exception
*/
public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null) : float
public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
{
if (empty($data)) {
throw InvalidArgumentException::arrayCantBeEmpty();
@ -63,7 +64,7 @@ class Covariance
}
if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) {
throw new \Exception('Given indices i and k do not match with the dimensionality of data');
throw new Exception('Given indices i and k do not match with the dimensionality of data');
}
if ($meanX === null || $meanY === null) {
@ -92,10 +93,12 @@ class Covariance
if ($index == $i) {
$val[0] = $col - $meanX;
}
if ($index == $k) {
$val[1] = $col - $meanY;
}
}
$sum += $val[0] * $val[1];
}
}
@ -112,7 +115,7 @@ class Covariance
*
* @param array|null $means
*/
public static function covarianceMatrix(array $data, ?array $means = null) : array
public static function covarianceMatrix(array $data, ?array $means = null): array
{
$n = count($data[0]);

View File

@ -41,7 +41,7 @@ class Gaussian
* Returns probability density value of the given <i>$value</i> based on
* given standard deviation and the mean
*/
public static function distributionPdf(float $mean, float $std, float $value) : float
public static function distributionPdf(float $mean, float $std, float $value): float
{
$normal = new self($mean, $std);

View File

@ -11,7 +11,7 @@ class Mean
/**
* @throws InvalidArgumentException
*/
public static function arithmetic(array $numbers) : float
public static function arithmetic(array $numbers): float
{
self::checkArrayLength($numbers);
@ -32,7 +32,7 @@ class Mean
sort($numbers, SORT_NUMERIC);
$median = $numbers[$middleIndex];
if (0 === $count % 2) {
if ($count % 2 === 0) {
$median = ($median + $numbers[$middleIndex - 1]) / 2;
}

View File

@ -13,7 +13,7 @@ class StandardDeviation
*
* @throws InvalidArgumentException
*/
public static function population(array $a, bool $sample = true) : float
public static function population(array $a, bool $sample = true): float
{
if (empty($a)) {
throw InvalidArgumentException::arrayCantBeEmpty();

View File

@ -51,27 +51,27 @@ class ClassificationReport
$this->computeAverage();
}
public function getPrecision() : array
public function getPrecision(): array
{
return $this->precision;
}
public function getRecall() : array
public function getRecall(): array
{
return $this->recall;
}
public function getF1score() : array
public function getF1score(): array
{
return $this->f1score;
}
public function getSupport() : array
public function getSupport(): array
{
return $this->support;
}
public function getAverage() : array
public function getAverage(): array
{
return $this->average;
}
@ -93,6 +93,7 @@ class ClassificationReport
$this->average[$metric] = 0.0;
continue;
}
$this->average[$metric] = array_sum($values) / count($values);
}
}
@ -102,7 +103,8 @@ class ClassificationReport
*/
private function computePrecision(int $truePositive, int $falsePositive)
{
if (0 == ($divider = $truePositive + $falsePositive)) {
$divider = $truePositive + $falsePositive;
if ($divider == 0) {
return 0.0;
}
@ -114,23 +116,25 @@ class ClassificationReport
*/
private function computeRecall(int $truePositive, int $falseNegative)
{
if (0 == ($divider = $truePositive + $falseNegative)) {
$divider = $truePositive + $falseNegative;
if ($divider == 0) {
return 0.0;
}
return $truePositive / $divider;
}
private function computeF1Score(float $precision, float $recall) : float
private function computeF1Score(float $precision, float $recall): float
{
if (0 == ($divider = $precision + $recall)) {
$divider = $precision + $recall;
if ($divider == 0) {
return 0.0;
}
return 2.0 * (($precision * $recall) / $divider);
}
private static function getLabelIndexedArray(array $actualLabels, array $predictedLabels) : array
private static function getLabelIndexedArray(array $actualLabels, array $predictedLabels): array
{
$labels = array_values(array_unique(array_merge($actualLabels, $predictedLabels)));
sort($labels);

View File

@ -6,7 +6,7 @@ namespace Phpml\Metric;
class ConfusionMatrix
{
public static function compute(array $actualLabels, array $predictedLabels, ?array $labels = null) : array
public static function compute(array $actualLabels, array $predictedLabels, ?array $labels = null): array
{
$labels = $labels ? array_flip($labels) : self::getUniqueLabels($actualLabels);
$matrix = self::generateMatrixWithZeros($labels);
@ -31,7 +31,7 @@ class ConfusionMatrix
return $matrix;
}
private static function generateMatrixWithZeros(array $labels) : array
private static function generateMatrixWithZeros(array $labels): array
{
$count = count($labels);
$matrix = [];
@ -43,7 +43,7 @@ class ConfusionMatrix
return $matrix;
}
private static function getUniqueLabels(array $labels) : array
private static function getUniqueLabels(array $labels): array
{
$labels = array_values(array_unique($labels));
sort($labels);

View File

@ -26,7 +26,7 @@ class ModelManager
}
}
public function restoreFromFile(string $filepath) : Estimator
public function restoreFromFile(string $filepath): Estimator
{
if (!file_exists($filepath) || !is_readable($filepath)) {
throw FileException::cantOpenFile(basename($filepath));

View File

@ -9,5 +9,5 @@ interface ActivationFunction
/**
* @param float|int $value
*/
public function compute($value) : float;
public function compute($value): float;
}

View File

@ -11,7 +11,7 @@ class BinaryStep implements ActivationFunction
/**
* @param float|int $value
*/
public function compute($value) : float
public function compute($value): float
{
return $value >= 0 ? 1.0 : 0.0;
}

View File

@ -11,7 +11,7 @@ class Gaussian implements ActivationFunction
/**
* @param float|int $value
*/
public function compute($value) : float
public function compute($value): float
{
return exp(-pow($value, 2));
}

View File

@ -21,7 +21,7 @@ class HyperbolicTangent implements ActivationFunction
/**
* @param float|int $value
*/
public function compute($value) : float
public function compute($value): float
{
return tanh($this->beta * $value);
}

View File

@ -21,7 +21,7 @@ class PReLU implements ActivationFunction
/**
* @param float|int $value
*/
public function compute($value) : float
public function compute($value): float
{
return $value >= 0 ? $value : $this->beta * $value;
}

View File

@ -21,7 +21,7 @@ class Sigmoid implements ActivationFunction
/**
* @param float|int $value
*/
public function compute($value) : float
public function compute($value): float
{
return 1 / (1 + exp(-$this->beta * $value));
}

View File

@ -21,7 +21,7 @@ class ThresholdedReLU implements ActivationFunction
/**
* @param float|int $value
*/
public function compute($value) : float
public function compute($value): float
{
return $value > $this->theta ? $value : 0.0;
}

View File

@ -28,20 +28,6 @@ class Layer
}
}
/**
* @param ActivationFunction|null $activationFunction
*
* @return Neuron
*/
private function createNode(string $nodeClass, ?ActivationFunction $activationFunction = null)
{
if (Neuron::class == $nodeClass) {
return new Neuron($activationFunction);
}
return new $nodeClass();
}
public function addNode(Node $node): void
{
$this->nodes[] = $node;
@ -50,8 +36,20 @@ class Layer
/**
* @return Node[]
*/
public function getNodes() : array
public function getNodes(): array
{
return $this->nodes;
}
/**
* @return Neuron
*/
private function createNode(string $nodeClass, ?ActivationFunction $activationFunction = null): Node
{
if ($nodeClass == Neuron::class) {
return new Neuron($activationFunction);
}
return new $nodeClass();
}
}

View File

@ -8,20 +8,15 @@ interface Network
{
/**
* @param mixed $input
*
* @return self
*/
public function setInput($input);
public function setInput($input): self;
/**
* @return array
*/
public function getOutput() : array;
public function getOutput(): array;
public function addLayer(Layer $layer);
/**
* @return Layer[]
*/
public function getLayers() : array;
public function getLayers(): array;
}

View File

@ -14,7 +14,7 @@ abstract class LayeredNetwork implements Network
/**
* @var Layer[]
*/
protected $layers;
protected $layers = [];
public function addLayer(Layer $layer): void
{
@ -24,7 +24,7 @@ abstract class LayeredNetwork implements Network
/**
* @return Layer[]
*/
public function getLayers() : array
public function getLayers(): array
{
return $this->layers;
}
@ -39,7 +39,7 @@ abstract class LayeredNetwork implements Network
return $this->layers[count($this->layers) - 1];
}
public function getOutput() : array
public function getOutput(): array
{
$result = [];
foreach ($this->getOutputLayer()->getNodes() as $neuron) {
@ -54,7 +54,7 @@ abstract class LayeredNetwork implements Network
*
* @return $this
*/
public function setInput($input)
public function setInput($input): Network
{
$firstLayer = $this->layers[0];

View File

@ -20,41 +20,36 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
{
use Predictable;
/**
* @var int
*/
private $inputLayerFeatures;
/**
* @var array
*/
private $hiddenLayers;
/**
* @var array
*/
protected $classes = [];
/**
* @var int
*/
private $iterations;
/**
* @var ActivationFunction
*/
protected $activationFunction;
/**
* @var float
*/
private $learningRate;
/**
* @var Backpropagation
*/
protected $backpropagation = null;
/**
* @var int
*/
private $inputLayerFeatures;
/**
* @var array
*/
private $hiddenLayers = [];
/**
* @var float
*/
private $learningRate;
/**
* @throws InvalidArgumentException
*/
@ -78,18 +73,6 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
$this->initNetwork();
}
private function initNetwork(): void
{
$this->addInputLayer($this->inputLayerFeatures);
$this->addNeuronLayers($this->hiddenLayers, $this->activationFunction);
$this->addNeuronLayers([count($this->classes)], $this->activationFunction);
$this->addBiasNodes();
$this->generateSynapses();
$this->backpropagation = new Backpropagation($this->learningRate);
}
public function train(array $samples, array $targets): void
{
$this->reset();
@ -127,6 +110,18 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
$this->removeLayers();
}
private function initNetwork(): void
{
$this->addInputLayer($this->inputLayerFeatures);
$this->addNeuronLayers($this->hiddenLayers, $this->activationFunction);
$this->addNeuronLayers([count($this->classes)], $this->activationFunction);
$this->addBiasNodes();
$this->generateSynapses();
$this->backpropagation = new Backpropagation($this->learningRate);
}
private function addInputLayer(int $nodes): void
{
$this->addLayer(new Layer($nodes, Input::class));

View File

@ -6,5 +6,5 @@ namespace Phpml\NeuralNetwork;
interface Node
{
public function getOutput() : float;
public function getOutput(): float;
}

View File

@ -8,7 +8,7 @@ use Phpml\NeuralNetwork\Node;
class Bias implements Node
{
public function getOutput() : float
public function getOutput(): float
{
return 1.0;
}

View File

@ -18,7 +18,7 @@ class Input implements Node
$this->input = $input;
}
public function getOutput() : float
public function getOutput(): float
{
return $this->input;
}

View File

@ -5,6 +5,7 @@ declare(strict_types=1);
namespace Phpml\NeuralNetwork\Node;
use Phpml\NeuralNetwork\ActivationFunction;
use Phpml\NeuralNetwork\ActivationFunction\Sigmoid;
use Phpml\NeuralNetwork\Node;
use Phpml\NeuralNetwork\Node\Neuron\Synapse;
@ -13,7 +14,7 @@ class Neuron implements Node
/**
* @var Synapse[]
*/
protected $synapses;
protected $synapses = [];
/**
* @var ActivationFunction
@ -27,7 +28,7 @@ class Neuron implements Node
public function __construct(?ActivationFunction $activationFunction = null)
{
$this->activationFunction = $activationFunction ?: new ActivationFunction\Sigmoid();
$this->activationFunction = $activationFunction ?: new Sigmoid();
$this->synapses = [];
$this->output = 0;
}
@ -45,9 +46,9 @@ class Neuron implements Node
return $this->synapses;
}
public function getOutput() : float
public function getOutput(): float
{
if (0 === $this->output) {
if ($this->output === 0) {
$sum = 0;
foreach ($this->synapses as $synapse) {
$sum += $synapse->getOutput();

View File

@ -27,12 +27,7 @@ class Synapse
$this->weight = $weight ?: $this->generateRandomWeight();
}
protected function generateRandomWeight() : float
{
return 1 / random_int(5, 25) * (random_int(0, 1) ? -1 : 1);
}
public function getOutput() : float
public function getOutput(): float
{
return $this->weight * $this->node->getOutput();
}
@ -42,7 +37,7 @@ class Synapse
$this->weight += $delta;
}
public function getWeight() : float
public function getWeight(): float
{
return $this->weight;
}
@ -51,4 +46,9 @@ class Synapse
{
return $this->node;
}
protected function generateRandomWeight(): float
{
return 1 / random_int(5, 25) * (random_int(0, 1) ? -1 : 1);
}
}

View File

@ -47,6 +47,7 @@ class Backpropagation
}
}
}
$this->prevSigmas = $this->sigmas;
}
@ -55,7 +56,7 @@ class Backpropagation
$this->prevSigmas = null;
}
private function getSigma(Neuron $neuron, int $targetClass, int $key, bool $lastLayer) : float
private function getSigma(Neuron $neuron, int $targetClass, int $key, bool $lastLayer): float
{
$neuronOutput = $neuron->getOutput();
$sigma = $neuronOutput * (1 - $neuronOutput);
@ -65,6 +66,7 @@ class Backpropagation
if ($targetClass === $key) {
$value = 1;
}
$sigma *= ($value - $neuronOutput);
} else {
$sigma *= $this->getPrevSigma($neuron);
@ -75,7 +77,7 @@ class Backpropagation
return $sigma;
}
private function getPrevSigma(Neuron $neuron) : float
private function getPrevSigma(Neuron $neuron): float
{
$sigma = 0.0;

View File

@ -29,12 +29,12 @@ class Sigma
return $this->neuron;
}
public function getSigma() : float
public function getSigma(): float
{
return $this->sigma;
}
public function getSigmaForNeuron(Neuron $neuron) : float
public function getSigmaForNeuron(Neuron $neuron): float
{
$sigma = 0.0;

View File

@ -9,7 +9,7 @@ class Pipeline implements Estimator
/**
* @var array|Transformer[]
*/
private $transformers;
private $transformers = [];
/**
* @var Estimator
@ -41,7 +41,7 @@ class Pipeline implements Estimator
/**
* @return array|Transformer[]
*/
public function getTransformers() : array
public function getTransformers(): array
{
return $this->transformers;
}

View File

@ -9,6 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
class Imputer implements Preprocessor
{
public const AXIS_COLUMN = 0;
public const AXIS_ROW = 1;
/**
@ -64,9 +65,9 @@ class Imputer implements Preprocessor
}
}
private function getAxis(int $column, array $currentSample) : array
private function getAxis(int $column, array $currentSample): array
{
if (self::AXIS_ROW === $this->axis) {
if ($this->axis === self::AXIS_ROW) {
return array_diff($currentSample, [$this->missingValue]);
}

View File

@ -9,7 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
class MeanStrategy implements Strategy
{
public function replaceValue(array $currentAxis) : float
public function replaceValue(array $currentAxis): float
{
return Mean::arithmetic($currentAxis);
}

View File

@ -9,7 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
class MedianStrategy implements Strategy
{
public function replaceValue(array $currentAxis) : float
public function replaceValue(array $currentAxis): float
{
return Mean::median($currentAxis);
}

View File

@ -11,7 +11,9 @@ use Phpml\Math\Statistic\StandardDeviation;
class Normalizer implements Preprocessor
{
public const NORM_L1 = 1;
public const NORM_L2 = 2;
public const NORM_STD = 3;
/**
@ -27,12 +29,12 @@ class Normalizer implements Preprocessor
/**
* @var array
*/
private $std;
private $std = [];
/**
* @var array
*/
private $mean;
private $mean = [];
/**
* @throws NormalizerException
@ -69,7 +71,7 @@ class Normalizer implements Preprocessor
$methods = [
self::NORM_L1 => 'normalizeL1',
self::NORM_L2 => 'normalizeL2',
self::NORM_STD => 'normalizeSTD'
self::NORM_STD => 'normalizeSTD',
];
$method = $methods[$this->norm];
@ -87,7 +89,7 @@ class Normalizer implements Preprocessor
$norm1 += abs($feature);
}
if (0 == $norm1) {
if ($norm1 == 0) {
$count = count($sample);
$sample = array_fill(0, $count, 1.0 / $count);
} else {
@ -103,9 +105,10 @@ class Normalizer implements Preprocessor
foreach ($sample as $feature) {
$norm2 += $feature * $feature;
}
$norm2 = sqrt((float) $norm2);
if (0 == $norm2) {
if ($norm2 == 0) {
$sample = array_fill(0, count($sample), 1);
} else {
foreach ($sample as &$feature) {

View File

@ -28,7 +28,7 @@ class LeastSquares implements Regression
/**
* @var array
*/
private $coefficients;
private $coefficients = [];
public function train(array $samples, array $targets): void
{
@ -51,12 +51,12 @@ class LeastSquares implements Regression
return $result;
}
public function getCoefficients() : array
public function getCoefficients(): array
{
return $this->coefficients;
}
public function getIntercept() : float
public function getIntercept(): float
{
return $this->intercept;
}
@ -79,7 +79,7 @@ class LeastSquares implements Regression
/**
* Add one dimension for intercept calculation.
*/
private function getSamplesMatrix() : Matrix
private function getSamplesMatrix(): Matrix
{
$samples = [];
foreach ($this->samples as $sample) {
@ -90,7 +90,7 @@ class LeastSquares implements Regression
return new Matrix($samples);
}
private function getTargetsMatrix() : Matrix
private function getTargetsMatrix(): Matrix
{
if (is_array($this->targets[0])) {
return new Matrix($this->targets);

View File

@ -34,7 +34,7 @@ class DataTransformer
return $set;
}
public static function predictions(string $rawPredictions, array $labels) : array
public static function predictions(string $rawPredictions, array $labels): array
{
$numericLabels = self::numericLabels($labels);
$results = [];
@ -47,7 +47,7 @@ class DataTransformer
return $results;
}
public static function numericLabels(array $labels) : array
public static function numericLabels(array $labels): array
{
$numericLabels = [];
foreach ($labels as $label) {

View File

@ -167,7 +167,7 @@ class SupportVectorMachine
}
/**
* @return array
* @return array|string
*/
public function predict(array $samples)
{

View File

@ -6,5 +6,5 @@ namespace Phpml\Tokenization;
interface Tokenizer
{
public function tokenize(string $text) : array;
public function tokenize(string $text): array;
}

View File

@ -6,7 +6,7 @@ namespace Phpml\Tokenization;
class WhitespaceTokenizer implements Tokenizer
{
public function tokenize(string $text) : array
public function tokenize(string $text): array
{
return preg_split('/[\pZ\pC]+/u', $text, -1, PREG_SPLIT_NO_EMPTY);
}

View File

@ -6,7 +6,7 @@ namespace Phpml\Tokenization;
class WordTokenizer implements Tokenizer
{
public function tokenize(string $text) : array
public function tokenize(string $text): array
{
$tokens = [];
preg_match_all('/\w\w+/u', $text, $tokens);

View File

@ -7,6 +7,7 @@ namespace tests\Phpml\Classification;
use Phpml\Association\Apriori;
use Phpml\ModelManager;
use PHPUnit\Framework\TestCase;
use ReflectionClass;
class AprioriTest extends TestCase
{
@ -172,7 +173,6 @@ class AprioriTest extends TestCase
/**
* Invokes objects method. Private/protected will be set accessible.
*
* @param object &$object Instantiated object to be called on
* @param string $method Method name to be called
* @param array $params Array of params to be passed
*
@ -180,7 +180,7 @@ class AprioriTest extends TestCase
*/
public function invoke(&$object, $method, array $params = [])
{
$reflection = new \ReflectionClass(get_class($object));
$reflection = new ReflectionClass(get_class($object));
$method = $reflection->getMethod($method);
$method->setAccessible(true);
@ -195,7 +195,7 @@ class AprioriTest extends TestCase
$testSamples = [['alpha', 'epsilon'], ['beta', 'theta']];
$predicted = $classifier->predict($testSamples);
$filename = 'apriori-test-'.rand(100, 999).'-'.uniqid();
$filename = 'apriori-test-'.random_int(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);

View File

@ -24,7 +24,7 @@ class DecisionTreeTest extends TestCase
['sunny', 75, 70, 'true', 'Play'],
['overcast', 72, 90, 'true', 'Play'],
['overcast', 81, 75, 'false', 'Play'],
['rain', 71, 80, 'true', 'Dont_play']
['rain', 71, 80, 'true', 'Dont_play'],
];
private $extraData = [
@ -32,16 +32,6 @@ class DecisionTreeTest extends TestCase
['scorching', 100, 93, 'true', 'Dont_play'],
];
private function getData($input)
{
$targets = array_column($input, 4);
array_walk($input, function (&$v): void {
array_splice($v, 4, 1);
});
return [$input, $targets];
}
public function testPredictSingleSample()
{
[$data, $targets] = $this->getData($this->data);
@ -68,7 +58,7 @@ class DecisionTreeTest extends TestCase
$testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']];
$predicted = $classifier->predict($testSamples);
$filename = 'decision-tree-test-'.rand(100, 999).'-'.uniqid();
$filename = 'decision-tree-test-'.random_int(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
@ -83,6 +73,16 @@ class DecisionTreeTest extends TestCase
[$data, $targets] = $this->getData($this->data);
$classifier = new DecisionTree(5);
$classifier->train($data, $targets);
$this->assertTrue(5 >= $classifier->actualDepth);
$this->assertTrue($classifier->actualDepth <= 5);
}
private function getData($input)
{
$targets = array_column($input, 4);
array_walk($input, function (&$v): void {
array_splice($v, 4, 1);
});
return [$input, $targets];
}
}

Some files were not shown because too many files have changed in this diff Show More