mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-21 20:45:10 +00:00
Added EasyCodingStandard + lots of code fixes (#156)
* travis: move coveralls here, decouple from package * composer: use PSR4 * phpunit: simpler config * travis: add ecs run * composer: add ecs dev * use standard vendor/bin directory for dependency bins, confuses with local bins and require gitignore handling * ecs: add PSR2 * [cs] PSR2 spacing fixes * [cs] PSR2 class name fix * [cs] PHP7 fixes - return semicolon spaces, old rand functions, typehints * [cs] fix less strict typehints * fix typehints to make tests pass * ecs: ignore typehint-less elements * [cs] standardize arrays * [cs] standardize docblock, remove unused comments * [cs] use self where possible * [cs] sort class elements, from public to private * [cs] do not use yoda (found less yoda-cases, than non-yoda) * space * [cs] do not assign in condition * [cs] use namespace imports if possible * [cs] use ::class over strings * [cs] fix defaults for arrays properties, properties and constants single spacing * cleanup ecs comments * [cs] use item per line in multi-items array * missing line * misc * rebase
This commit is contained in:
parent
b1d40bfa30
commit
726cf4cddf
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,8 +1,4 @@
|
||||
/vendor/
|
||||
humbuglog.*
|
||||
/bin/phpunit
|
||||
.coverage
|
||||
.php_cs.cache
|
||||
/bin/php-cs-fixer
|
||||
/bin/coveralls
|
||||
/build
|
||||
|
10
.travis.yml
10
.travis.yml
@ -6,7 +6,7 @@ matrix:
|
||||
include:
|
||||
- os: linux
|
||||
php: '7.1'
|
||||
env: DISABLE_XDEBUG="true"
|
||||
env: DISABLE_XDEBUG="true" STATIC_ANALYSIS="true"
|
||||
|
||||
- os: linux
|
||||
php: '7.2'
|
||||
@ -21,7 +21,7 @@ matrix:
|
||||
|
||||
before_install:
|
||||
- if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/prepare_osx_env.sh ; fi
|
||||
- if [[ DISABLE_XDEBUG == "true" ]]; then phpenv config-rm xdebug.ini; fi
|
||||
- if [[ $DISABLE_XDEBUG == "true" ]]; then phpenv config-rm xdebug.ini; fi
|
||||
|
||||
install:
|
||||
- if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/handle_brew_pkg.sh "${_PHP}" ; fi
|
||||
@ -29,10 +29,12 @@ install:
|
||||
- php composer.phar install --dev --no-interaction --ignore-platform-reqs
|
||||
|
||||
script:
|
||||
- bin/phpunit $PHPUNIT_FLAGS
|
||||
- vendor/bin/phpunit $PHPUNIT_FLAGS
|
||||
- if [[ $STATIC_ANALYSIS != "" ]]; then vendor/bin/ecs check src tests; fi
|
||||
|
||||
after_success:
|
||||
- |
|
||||
if [[ $PHPUNIT_FLAGS != "" ]]; then
|
||||
php bin/coveralls -v
|
||||
wget https://github.com/satooshi/php-coveralls/releases/download/v1.0.1/coveralls.phar;
|
||||
php coveralls.phar --verbose;
|
||||
fi
|
||||
|
@ -12,8 +12,8 @@
|
||||
}
|
||||
],
|
||||
"autoload": {
|
||||
"psr-0": {
|
||||
"Phpml": "src/"
|
||||
"psr-4": {
|
||||
"Phpml\\": "src/Phpml"
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
@ -22,9 +22,8 @@
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^6.0",
|
||||
"friendsofphp/php-cs-fixer": "^2.4",
|
||||
"php-coveralls/php-coveralls": "^1.0"
|
||||
},
|
||||
"config": {
|
||||
"bin-dir": "bin"
|
||||
"symplify/easy-coding-standard": "dev-master as 2.5",
|
||||
"symplify/coding-standard": "dev-master as 2.5",
|
||||
"symplify/package-builder": "dev-master#3604bea as 2.5"
|
||||
}
|
||||
}
|
||||
|
1964
composer.lock
generated
1964
composer.lock
generated
File diff suppressed because it is too large
Load Diff
39
easy-coding-standard.neon
Normal file
39
easy-coding-standard.neon
Normal file
@ -0,0 +1,39 @@
|
||||
includes:
|
||||
- vendor/symplify/easy-coding-standard/config/psr2.neon
|
||||
- vendor/symplify/easy-coding-standard/config/php70.neon
|
||||
- vendor/symplify/easy-coding-standard/config/clean-code.neon
|
||||
- vendor/symplify/easy-coding-standard/config/common/array.neon
|
||||
- vendor/symplify/easy-coding-standard/config/common/docblock.neon
|
||||
- vendor/symplify/easy-coding-standard/config/common/namespaces.neon
|
||||
- vendor/symplify/easy-coding-standard/config/common/control-structures.neon
|
||||
|
||||
# many errors, need help
|
||||
#- vendor/symplify/easy-coding-standard/config/common/strict.neon
|
||||
|
||||
checkers:
|
||||
- Symplify\CodingStandard\Fixer\Import\ImportNamespacedNameFixer
|
||||
- Symplify\CodingStandard\Fixer\Php\ClassStringToClassConstantFixer
|
||||
- Symplify\CodingStandard\Fixer\Property\ArrayPropertyDefaultValueFixer
|
||||
- Symplify\CodingStandard\Fixer\ClassNotation\PropertyAndConstantSeparationFixer
|
||||
- Symplify\CodingStandard\Fixer\ArrayNotation\StandaloneLineInMultilineArrayFixer
|
||||
|
||||
parameters:
|
||||
exclude_checkers:
|
||||
# from strict.neon
|
||||
- PhpCsFixer\Fixer\PhpUnit\PhpUnitStrictFixer
|
||||
skip:
|
||||
PhpCsFixer\Fixer\Alias\RandomApiMigrationFixer:
|
||||
# random_int() breaks code
|
||||
- src/Phpml/CrossValidation/RandomSplit.php
|
||||
SlevomatCodingStandard\Sniffs\Classes\UnusedPrivateElementsSniff:
|
||||
# magic calls
|
||||
- src/Phpml/Preprocessing/Normalizer.php
|
||||
|
||||
skip_codes:
|
||||
# missing typehints
|
||||
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingParameterTypeHint
|
||||
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableParameterTypeHintSpecification
|
||||
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingReturnTypeHint
|
||||
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableReturnTypeHintSpecification
|
||||
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingPropertyTypeHint
|
||||
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversablePropertyTypeHintSpecification
|
@ -6,11 +6,9 @@
|
||||
beStrictAboutTestSize="true"
|
||||
beStrictAboutChangesToGlobalState="true"
|
||||
>
|
||||
<testsuites>
|
||||
<testsuite name="PHP-ML Test Suite">
|
||||
<directory>tests/*</directory>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
<testsuite name="PHP-ML Test Suite">
|
||||
<directory>tests/*</directory>
|
||||
</testsuite>
|
||||
|
||||
<filter>
|
||||
<whitelist processUncoveredFilesFromWhitelist="true">
|
||||
|
@ -31,7 +31,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @var mixed[][][]
|
||||
*/
|
||||
private $large;
|
||||
private $large = [];
|
||||
|
||||
/**
|
||||
* Minimum relative frequency of transactions.
|
||||
@ -45,7 +45,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @var mixed[][]
|
||||
*/
|
||||
private $rules;
|
||||
private $rules = [];
|
||||
|
||||
/**
|
||||
* Apriori constructor.
|
||||
@ -61,7 +61,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][]
|
||||
*/
|
||||
public function getRules() : array
|
||||
public function getRules(): array
|
||||
{
|
||||
if (!$this->large) {
|
||||
$this->large = $this->apriori();
|
||||
@ -83,7 +83,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][][]
|
||||
*/
|
||||
public function apriori() : array
|
||||
public function apriori(): array
|
||||
{
|
||||
$L = [];
|
||||
$L[1] = $this->items();
|
||||
@ -102,7 +102,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][]
|
||||
*/
|
||||
protected function predictSample(array $sample) : array
|
||||
protected function predictSample(array $sample): array
|
||||
{
|
||||
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
|
||||
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
|
||||
@ -133,7 +133,8 @@ class Apriori implements Associator
|
||||
private function generateRules(array $frequent): void
|
||||
{
|
||||
foreach ($this->antecedents($frequent) as $antecedent) {
|
||||
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
|
||||
$confidence = $this->confidence($frequent, $antecedent);
|
||||
if ($this->confidence <= $confidence) {
|
||||
$consequent = array_values(array_diff($frequent, $antecedent));
|
||||
$this->rules[] = [
|
||||
self::ARRAY_KEY_ANTECEDENT => $antecedent,
|
||||
@ -152,7 +153,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][]
|
||||
*/
|
||||
private function powerSet(array $sample) : array
|
||||
private function powerSet(array $sample): array
|
||||
{
|
||||
$results = [[]];
|
||||
foreach ($sample as $item) {
|
||||
@ -171,7 +172,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][]
|
||||
*/
|
||||
private function antecedents(array $sample) : array
|
||||
private function antecedents(array $sample): array
|
||||
{
|
||||
$cardinality = count($sample);
|
||||
$antecedents = $this->powerSet($sample);
|
||||
@ -186,7 +187,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][]
|
||||
*/
|
||||
private function items() : array
|
||||
private function items(): array
|
||||
{
|
||||
$items = [];
|
||||
|
||||
@ -210,7 +211,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][]
|
||||
*/
|
||||
private function frequent(array $samples) : array
|
||||
private function frequent(array $samples): array
|
||||
{
|
||||
return array_filter($samples, function ($entry) {
|
||||
return $this->support($entry) >= $this->support;
|
||||
@ -224,7 +225,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @return mixed[][]
|
||||
*/
|
||||
private function candidates(array $samples) : array
|
||||
private function candidates(array $samples): array
|
||||
{
|
||||
$candidates = [];
|
||||
|
||||
@ -259,7 +260,7 @@ class Apriori implements Associator
|
||||
* @param mixed[] $set
|
||||
* @param mixed[] $subset
|
||||
*/
|
||||
private function confidence(array $set, array $subset) : float
|
||||
private function confidence(array $set, array $subset): float
|
||||
{
|
||||
return $this->support($set) / $this->support($subset);
|
||||
}
|
||||
@ -272,7 +273,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @param mixed[] $sample
|
||||
*/
|
||||
private function support(array $sample) : float
|
||||
private function support(array $sample): float
|
||||
{
|
||||
return $this->frequency($sample) / count($this->samples);
|
||||
}
|
||||
@ -284,7 +285,7 @@ class Apriori implements Associator
|
||||
*
|
||||
* @param mixed[] $sample
|
||||
*/
|
||||
private function frequency(array $sample) : int
|
||||
private function frequency(array $sample): int
|
||||
{
|
||||
return count(array_filter($this->samples, function ($entry) use ($sample) {
|
||||
return $this->subset($entry, $sample);
|
||||
@ -299,7 +300,7 @@ class Apriori implements Associator
|
||||
* @param mixed[][] $system
|
||||
* @param mixed[] $set
|
||||
*/
|
||||
private function contains(array $system, array $set) : bool
|
||||
private function contains(array $system, array $set): bool
|
||||
{
|
||||
return (bool) array_filter($system, function ($entry) use ($set) {
|
||||
return $this->equals($entry, $set);
|
||||
@ -312,7 +313,7 @@ class Apriori implements Associator
|
||||
* @param mixed[] $set
|
||||
* @param mixed[] $subset
|
||||
*/
|
||||
private function subset(array $set, array $subset) : bool
|
||||
private function subset(array $set, array $subset): bool
|
||||
{
|
||||
return !array_diff($subset, array_intersect($subset, $set));
|
||||
}
|
||||
@ -323,7 +324,7 @@ class Apriori implements Associator
|
||||
* @param mixed[] $set1
|
||||
* @param mixed[] $set2
|
||||
*/
|
||||
private function equals(array $set1, array $set2) : bool
|
||||
private function equals(array $set1, array $set2): bool
|
||||
{
|
||||
return array_diff($set1, $set2) == array_diff($set2, $set1);
|
||||
}
|
||||
|
@ -15,22 +15,18 @@ class DecisionTree implements Classifier
|
||||
use Trainable, Predictable;
|
||||
|
||||
public const CONTINUOUS = 1;
|
||||
|
||||
public const NOMINAL = 2;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $columnTypes;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $labels = [];
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $featureCount = 0;
|
||||
public $actualDepth = 0;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $columnTypes = [];
|
||||
|
||||
/**
|
||||
* @var DecisionTreeLeaf
|
||||
@ -42,10 +38,15 @@ class DecisionTree implements Classifier
|
||||
*/
|
||||
protected $maxDepth;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $labels = [];
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
public $actualDepth = 0;
|
||||
private $featureCount = 0;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
@ -55,7 +56,7 @@ class DecisionTree implements Classifier
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $selectedFeatures;
|
||||
private $selectedFeatures = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
@ -100,7 +101,7 @@ class DecisionTree implements Classifier
|
||||
}
|
||||
}
|
||||
|
||||
public static function getColumnTypes(array $samples) : array
|
||||
public static function getColumnTypes(array $samples): array
|
||||
{
|
||||
$types = [];
|
||||
$featureCount = count($samples[0]);
|
||||
@ -113,7 +114,122 @@ class DecisionTree implements Classifier
|
||||
return $types;
|
||||
}
|
||||
|
||||
protected function getSplitLeaf(array $records, int $depth = 0) : DecisionTreeLeaf
|
||||
/**
|
||||
* @param mixed $baseValue
|
||||
*/
|
||||
public function getGiniIndex($baseValue, array $colValues, array $targets): float
|
||||
{
|
||||
$countMatrix = [];
|
||||
foreach ($this->labels as $label) {
|
||||
$countMatrix[$label] = [0, 0];
|
||||
}
|
||||
|
||||
foreach ($colValues as $index => $value) {
|
||||
$label = $targets[$index];
|
||||
$rowIndex = $value === $baseValue ? 0 : 1;
|
||||
++$countMatrix[$label][$rowIndex];
|
||||
}
|
||||
|
||||
$giniParts = [0, 0];
|
||||
for ($i = 0; $i <= 1; ++$i) {
|
||||
$part = 0;
|
||||
$sum = array_sum(array_column($countMatrix, $i));
|
||||
if ($sum > 0) {
|
||||
foreach ($this->labels as $label) {
|
||||
$part += pow($countMatrix[$label][$i] / (float) $sum, 2);
|
||||
}
|
||||
}
|
||||
|
||||
$giniParts[$i] = (1 - $part) * $sum;
|
||||
}
|
||||
|
||||
return array_sum($giniParts) / count($colValues);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used to set number of columns to be used
|
||||
* when deciding a split at an internal node of the tree. <br>
|
||||
* If the value is given 0, then all features are used (default behaviour),
|
||||
* otherwise the given value will be used as a maximum for number of columns
|
||||
* randomly selected for each split operation.
|
||||
*
|
||||
* @return $this
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function setNumFeatures(int $numFeatures)
|
||||
{
|
||||
if ($numFeatures < 0) {
|
||||
throw new InvalidArgumentException('Selected column count should be greater or equal to zero');
|
||||
}
|
||||
|
||||
$this->numUsableFeatures = $numFeatures;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* A string array to represent columns. Useful when HTML output or
|
||||
* column importances are desired to be inspected.
|
||||
*
|
||||
* @return $this
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function setColumnNames(array $names)
|
||||
{
|
||||
if ($this->featureCount !== 0 && count($names) !== $this->featureCount) {
|
||||
throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount));
|
||||
}
|
||||
|
||||
$this->columnNames = $names;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function getHtml(): string
|
||||
{
|
||||
return $this->tree->getHTML($this->columnNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* This will return an array including an importance value for
|
||||
* each column in the given dataset. The importance values are
|
||||
* normalized and their total makes 1.<br/>
|
||||
*/
|
||||
public function getFeatureImportances(): array
|
||||
{
|
||||
if ($this->featureImportances !== null) {
|
||||
return $this->featureImportances;
|
||||
}
|
||||
|
||||
$sampleCount = count($this->samples);
|
||||
$this->featureImportances = [];
|
||||
foreach ($this->columnNames as $column => $columnName) {
|
||||
$nodes = $this->getSplitNodesByColumn($column, $this->tree);
|
||||
|
||||
$importance = 0;
|
||||
foreach ($nodes as $node) {
|
||||
$importance += $node->getNodeImpurityDecrease($sampleCount);
|
||||
}
|
||||
|
||||
$this->featureImportances[$columnName] = $importance;
|
||||
}
|
||||
|
||||
// Normalize & sort the importances
|
||||
$total = array_sum($this->featureImportances);
|
||||
if ($total > 0) {
|
||||
foreach ($this->featureImportances as &$importance) {
|
||||
$importance /= $total;
|
||||
}
|
||||
|
||||
arsort($this->featureImportances);
|
||||
}
|
||||
|
||||
return $this->featureImportances;
|
||||
}
|
||||
|
||||
protected function getSplitLeaf(array $records, int $depth = 0): DecisionTreeLeaf
|
||||
{
|
||||
$split = $this->getBestSplit($records);
|
||||
$split->level = $depth;
|
||||
@ -136,6 +252,7 @@ class DecisionTree implements Classifier
|
||||
if ($prevRecord && $prevRecord != $record) {
|
||||
$allSame = false;
|
||||
}
|
||||
|
||||
$prevRecord = $record;
|
||||
|
||||
// According to the split criteron, this record will
|
||||
@ -163,6 +280,7 @@ class DecisionTree implements Classifier
|
||||
if ($leftRecords) {
|
||||
$split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1);
|
||||
}
|
||||
|
||||
if ($rightRecords) {
|
||||
$split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1);
|
||||
}
|
||||
@ -171,7 +289,7 @@ class DecisionTree implements Classifier
|
||||
return $split;
|
||||
}
|
||||
|
||||
protected function getBestSplit(array $records) : DecisionTreeLeaf
|
||||
protected function getBestSplit(array $records): DecisionTreeLeaf
|
||||
{
|
||||
$targets = array_intersect_key($this->targets, array_flip($records));
|
||||
$samples = array_intersect_key($this->samples, array_flip($records));
|
||||
@ -184,6 +302,7 @@ class DecisionTree implements Classifier
|
||||
foreach ($samples as $index => $row) {
|
||||
$colValues[$index] = $row[$i];
|
||||
}
|
||||
|
||||
$counts = array_count_values($colValues);
|
||||
arsort($counts);
|
||||
$baseValue = key($counts);
|
||||
@ -227,7 +346,7 @@ class DecisionTree implements Classifier
|
||||
* If any of above methods were not called beforehand, then all features
|
||||
* are returned by default.
|
||||
*/
|
||||
protected function getSelectedFeatures() : array
|
||||
protected function getSelectedFeatures(): array
|
||||
{
|
||||
$allFeatures = range(0, $this->featureCount - 1);
|
||||
if ($this->numUsableFeatures === 0 && !$this->selectedFeatures) {
|
||||
@ -242,6 +361,7 @@ class DecisionTree implements Classifier
|
||||
if ($numFeatures > $this->featureCount) {
|
||||
$numFeatures = $this->featureCount;
|
||||
}
|
||||
|
||||
shuffle($allFeatures);
|
||||
$selectedFeatures = array_slice($allFeatures, 0, $numFeatures, false);
|
||||
sort($selectedFeatures);
|
||||
@ -249,39 +369,7 @@ class DecisionTree implements Classifier
|
||||
return $selectedFeatures;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $baseValue
|
||||
*/
|
||||
public function getGiniIndex($baseValue, array $colValues, array $targets) : float
|
||||
{
|
||||
$countMatrix = [];
|
||||
foreach ($this->labels as $label) {
|
||||
$countMatrix[$label] = [0, 0];
|
||||
}
|
||||
|
||||
foreach ($colValues as $index => $value) {
|
||||
$label = $targets[$index];
|
||||
$rowIndex = $value === $baseValue ? 0 : 1;
|
||||
++$countMatrix[$label][$rowIndex];
|
||||
}
|
||||
|
||||
$giniParts = [0, 0];
|
||||
for ($i = 0; $i <= 1; ++$i) {
|
||||
$part = 0;
|
||||
$sum = array_sum(array_column($countMatrix, $i));
|
||||
if ($sum > 0) {
|
||||
foreach ($this->labels as $label) {
|
||||
$part += pow($countMatrix[$label][$i] / (float) $sum, 2);
|
||||
}
|
||||
}
|
||||
|
||||
$giniParts[$i] = (1 - $part) * $sum;
|
||||
}
|
||||
|
||||
return array_sum($giniParts) / count($colValues);
|
||||
}
|
||||
|
||||
protected function preprocess(array $samples) : array
|
||||
protected function preprocess(array $samples): array
|
||||
{
|
||||
// Detect and convert continuous data column values into
|
||||
// discrete values by using the median as a threshold value
|
||||
@ -298,14 +386,16 @@ class DecisionTree implements Classifier
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$columns[] = $values;
|
||||
}
|
||||
|
||||
// Below method is a strange yet very simple & efficient method
|
||||
// to get the transpose of a 2D array
|
||||
return array_map(null, ...$columns);
|
||||
}
|
||||
|
||||
protected static function isCategoricalColumn(array $columnValues) : bool
|
||||
protected static function isCategoricalColumn(array $columnValues): bool
|
||||
{
|
||||
$count = count($columnValues);
|
||||
|
||||
@ -329,28 +419,6 @@ class DecisionTree implements Classifier
|
||||
return count($distinctValues) <= $count / 5;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used to set number of columns to be used
|
||||
* when deciding a split at an internal node of the tree. <br>
|
||||
* If the value is given 0, then all features are used (default behaviour),
|
||||
* otherwise the given value will be used as a maximum for number of columns
|
||||
* randomly selected for each split operation.
|
||||
*
|
||||
* @return $this
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function setNumFeatures(int $numFeatures)
|
||||
{
|
||||
if ($numFeatures < 0) {
|
||||
throw new InvalidArgumentException('Selected column count should be greater or equal to zero');
|
||||
}
|
||||
|
||||
$this->numUsableFeatures = $numFeatures;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to set predefined features to consider while deciding which column to use for a split
|
||||
*/
|
||||
@ -359,71 +427,11 @@ class DecisionTree implements Classifier
|
||||
$this->selectedFeatures = $selectedFeatures;
|
||||
}
|
||||
|
||||
/**
|
||||
* A string array to represent columns. Useful when HTML output or
|
||||
* column importances are desired to be inspected.
|
||||
*
|
||||
* @return $this
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function setColumnNames(array $names)
|
||||
{
|
||||
if ($this->featureCount !== 0 && count($names) !== $this->featureCount) {
|
||||
throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount));
|
||||
}
|
||||
|
||||
$this->columnNames = $names;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function getHtml() : string
|
||||
{
|
||||
return $this->tree->getHTML($this->columnNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* This will return an array including an importance value for
|
||||
* each column in the given dataset. The importance values are
|
||||
* normalized and their total makes 1.<br/>
|
||||
*/
|
||||
public function getFeatureImportances() : array
|
||||
{
|
||||
if ($this->featureImportances !== null) {
|
||||
return $this->featureImportances;
|
||||
}
|
||||
|
||||
$sampleCount = count($this->samples);
|
||||
$this->featureImportances = [];
|
||||
foreach ($this->columnNames as $column => $columnName) {
|
||||
$nodes = $this->getSplitNodesByColumn($column, $this->tree);
|
||||
|
||||
$importance = 0;
|
||||
foreach ($nodes as $node) {
|
||||
$importance += $node->getNodeImpurityDecrease($sampleCount);
|
||||
}
|
||||
|
||||
$this->featureImportances[$columnName] = $importance;
|
||||
}
|
||||
|
||||
// Normalize & sort the importances
|
||||
$total = array_sum($this->featureImportances);
|
||||
if ($total > 0) {
|
||||
foreach ($this->featureImportances as &$importance) {
|
||||
$importance /= $total;
|
||||
}
|
||||
arsort($this->featureImportances);
|
||||
}
|
||||
|
||||
return $this->featureImportances;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collects and returns an array of internal nodes that use the given
|
||||
* column as a split criterion
|
||||
*/
|
||||
protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node) : array
|
||||
protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node): array
|
||||
{
|
||||
if (!$node || $node->isTerminal) {
|
||||
return [];
|
||||
|
@ -71,7 +71,15 @@ class DecisionTreeLeaf
|
||||
*/
|
||||
public $level = 0;
|
||||
|
||||
public function evaluate(array $record) : bool
|
||||
/**
|
||||
* HTML representation of the tree without column names
|
||||
*/
|
||||
public function __toString(): string
|
||||
{
|
||||
return $this->getHTML();
|
||||
}
|
||||
|
||||
public function evaluate(array $record): bool
|
||||
{
|
||||
$recordField = $record[$this->columnIndex];
|
||||
|
||||
@ -86,7 +94,7 @@ class DecisionTreeLeaf
|
||||
* Returns Mean Decrease Impurity (MDI) in the node.
|
||||
* For terminal nodes, this value is equal to 0
|
||||
*/
|
||||
public function getNodeImpurityDecrease(int $parentRecordCount) : float
|
||||
public function getNodeImpurityDecrease(int $parentRecordCount): float
|
||||
{
|
||||
if ($this->isTerminal) {
|
||||
return 0.0;
|
||||
@ -111,7 +119,7 @@ class DecisionTreeLeaf
|
||||
/**
|
||||
* Returns HTML representation of the node including children nodes
|
||||
*/
|
||||
public function getHTML($columnNames = null) : string
|
||||
public function getHTML($columnNames = null): string
|
||||
{
|
||||
if ($this->isTerminal) {
|
||||
$value = "<b>$this->classValue</b>";
|
||||
@ -154,12 +162,4 @@ class DecisionTreeLeaf
|
||||
|
||||
return $str;
|
||||
}
|
||||
|
||||
/**
|
||||
* HTML representation of the tree without column names
|
||||
*/
|
||||
public function __toString() : string
|
||||
{
|
||||
return $this->getHTML();
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Classification\Ensemble;
|
||||
|
||||
use Exception;
|
||||
use Phpml\Classification\Classifier;
|
||||
use Phpml\Classification\Linear\DecisionStump;
|
||||
use Phpml\Classification\WeightedClassifier;
|
||||
@ -11,6 +12,7 @@ use Phpml\Helper\Predictable;
|
||||
use Phpml\Helper\Trainable;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
use Phpml\Math\Statistic\StandardDeviation;
|
||||
use ReflectionClass;
|
||||
|
||||
class AdaBoost implements Classifier
|
||||
{
|
||||
@ -98,11 +100,14 @@ class AdaBoost implements Classifier
|
||||
// Initialize usual variables
|
||||
$this->labels = array_keys(array_count_values($targets));
|
||||
if (count($this->labels) != 2) {
|
||||
throw new \Exception('AdaBoost is a binary classifier and can classify between two classes only');
|
||||
throw new Exception('AdaBoost is a binary classifier and can classify between two classes only');
|
||||
}
|
||||
|
||||
// Set all target values to either -1 or 1
|
||||
$this->labels = [1 => $this->labels[0], -1 => $this->labels[1]];
|
||||
$this->labels = [
|
||||
1 => $this->labels[0],
|
||||
-1 => $this->labels[1],
|
||||
];
|
||||
foreach ($targets as $target) {
|
||||
$this->targets[] = $target == $this->labels[1] ? 1 : -1;
|
||||
}
|
||||
@ -132,13 +137,27 @@ class AdaBoost implements Classifier
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
public function predictSample(array $sample)
|
||||
{
|
||||
$sum = 0;
|
||||
foreach ($this->alpha as $index => $alpha) {
|
||||
$h = $this->classifiers[$index]->predict($sample);
|
||||
$sum += $h * $alpha;
|
||||
}
|
||||
|
||||
return $this->labels[$sum > 0 ? 1 : -1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the classifier with the lowest error rate with the
|
||||
* consideration of current sample weights
|
||||
*/
|
||||
protected function getBestClassifier() : Classifier
|
||||
protected function getBestClassifier(): Classifier
|
||||
{
|
||||
$ref = new \ReflectionClass($this->baseClassifier);
|
||||
$ref = new ReflectionClass($this->baseClassifier);
|
||||
if ($this->classifierOptions) {
|
||||
$classifier = $ref->newInstanceArgs($this->classifierOptions);
|
||||
} else {
|
||||
@ -160,7 +179,7 @@ class AdaBoost implements Classifier
|
||||
* Resamples the dataset in accordance with the weights and
|
||||
* returns the new dataset
|
||||
*/
|
||||
protected function resample() : array
|
||||
protected function resample(): array
|
||||
{
|
||||
$weights = $this->weights;
|
||||
$std = StandardDeviation::population($weights);
|
||||
@ -173,9 +192,10 @@ class AdaBoost implements Classifier
|
||||
foreach ($weights as $index => $weight) {
|
||||
$z = (int) round(($weight - $mean) / $std) - $minZ + 1;
|
||||
for ($i = 0; $i < $z; ++$i) {
|
||||
if (rand(0, 1) == 0) {
|
||||
if (random_int(0, 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$samples[] = $this->samples[$index];
|
||||
$targets[] = $this->targets[$index];
|
||||
}
|
||||
@ -187,7 +207,7 @@ class AdaBoost implements Classifier
|
||||
/**
|
||||
* Evaluates the classifier and returns the classification error rate
|
||||
*/
|
||||
protected function evaluateClassifier(Classifier $classifier) : float
|
||||
protected function evaluateClassifier(Classifier $classifier): float
|
||||
{
|
||||
$total = (float) array_sum($this->weights);
|
||||
$wrong = 0;
|
||||
@ -204,7 +224,7 @@ class AdaBoost implements Classifier
|
||||
/**
|
||||
* Calculates alpha of a classifier
|
||||
*/
|
||||
protected function calculateAlpha(float $errorRate) : float
|
||||
protected function calculateAlpha(float $errorRate): float
|
||||
{
|
||||
if ($errorRate == 0) {
|
||||
$errorRate = 1e-10;
|
||||
@ -231,18 +251,4 @@ class AdaBoost implements Classifier
|
||||
|
||||
$this->weights = $weightsT1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
public function predictSample(array $sample)
|
||||
{
|
||||
$sum = 0;
|
||||
foreach ($this->alpha as $index => $alpha) {
|
||||
$h = $this->classifiers[$index]->predict($sample);
|
||||
$sum += $h * $alpha;
|
||||
}
|
||||
|
||||
return $this->labels[$sum > 0 ? 1 : -1];
|
||||
}
|
||||
}
|
||||
|
@ -4,10 +4,12 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Classification\Ensemble;
|
||||
|
||||
use Exception;
|
||||
use Phpml\Classification\Classifier;
|
||||
use Phpml\Classification\DecisionTree;
|
||||
use Phpml\Helper\Predictable;
|
||||
use Phpml\Helper\Trainable;
|
||||
use ReflectionClass;
|
||||
|
||||
class Bagging implements Classifier
|
||||
{
|
||||
@ -18,11 +20,6 @@ class Bagging implements Classifier
|
||||
*/
|
||||
protected $numSamples;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $targets = [];
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
@ -46,13 +43,18 @@ class Bagging implements Classifier
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $classifiers;
|
||||
protected $classifiers = [];
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
protected $subsetRatio = 0.7;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $targets = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
@ -80,7 +82,7 @@ class Bagging implements Classifier
|
||||
public function setSubsetRatio(float $ratio)
|
||||
{
|
||||
if ($ratio < 0.1 || $ratio > 1.0) {
|
||||
throw new \Exception('Subset ratio should be between 0.1 and 1.0');
|
||||
throw new Exception('Subset ratio should be between 0.1 and 1.0');
|
||||
}
|
||||
|
||||
$this->subsetRatio = $ratio;
|
||||
@ -123,14 +125,14 @@ class Bagging implements Classifier
|
||||
}
|
||||
}
|
||||
|
||||
protected function getRandomSubset(int $index) : array
|
||||
protected function getRandomSubset(int $index): array
|
||||
{
|
||||
$samples = [];
|
||||
$targets = [];
|
||||
srand($index);
|
||||
$bootstrapSize = $this->subsetRatio * $this->numSamples;
|
||||
for ($i = 0; $i < $bootstrapSize; ++$i) {
|
||||
$rand = rand(0, $this->numSamples - 1);
|
||||
$rand = random_int(0, $this->numSamples - 1);
|
||||
$samples[] = $this->samples[$rand];
|
||||
$targets[] = $this->targets[$rand];
|
||||
}
|
||||
@ -138,11 +140,11 @@ class Bagging implements Classifier
|
||||
return [$samples, $targets];
|
||||
}
|
||||
|
||||
protected function initClassifiers() : array
|
||||
protected function initClassifiers(): array
|
||||
{
|
||||
$classifiers = [];
|
||||
for ($i = 0; $i < $this->numClassifier; ++$i) {
|
||||
$ref = new \ReflectionClass($this->classifier);
|
||||
$ref = new ReflectionClass($this->classifier);
|
||||
if ($this->classifierOptions) {
|
||||
$obj = $ref->newInstanceArgs($this->classifierOptions);
|
||||
} else {
|
||||
@ -155,12 +157,7 @@ class Bagging implements Classifier
|
||||
return $classifiers;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Classifier $classifier
|
||||
*
|
||||
* @return Classifier
|
||||
*/
|
||||
protected function initSingleClassifier($classifier)
|
||||
protected function initSingleClassifier(Classifier $classifier): Classifier
|
||||
{
|
||||
return $classifier;
|
||||
}
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Classification\Ensemble;
|
||||
|
||||
use Exception;
|
||||
use Phpml\Classification\Classifier;
|
||||
use Phpml\Classification\DecisionTree;
|
||||
|
||||
class RandomForest extends Bagging
|
||||
@ -48,11 +50,11 @@ class RandomForest extends Bagging
|
||||
public function setFeatureSubsetRatio($ratio)
|
||||
{
|
||||
if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) {
|
||||
throw new \Exception('When a float given, feature subset ratio should be between 0.1 and 1.0');
|
||||
throw new Exception('When a float given, feature subset ratio should be between 0.1 and 1.0');
|
||||
}
|
||||
|
||||
if (is_string($ratio) && $ratio != 'sqrt' && $ratio != 'log') {
|
||||
throw new \Exception("When a string given, feature subset ratio can only be 'sqrt' or 'log' ");
|
||||
throw new Exception("When a string given, feature subset ratio can only be 'sqrt' or 'log' ");
|
||||
}
|
||||
|
||||
$this->featureSubsetRatio = $ratio;
|
||||
@ -70,7 +72,7 @@ class RandomForest extends Bagging
|
||||
public function setClassifer(string $classifier, array $classifierOptions = [])
|
||||
{
|
||||
if ($classifier != DecisionTree::class) {
|
||||
throw new \Exception('RandomForest can only use DecisionTree as base classifier');
|
||||
throw new Exception('RandomForest can only use DecisionTree as base classifier');
|
||||
}
|
||||
|
||||
return parent::setClassifer($classifier, $classifierOptions);
|
||||
@ -81,7 +83,7 @@ class RandomForest extends Bagging
|
||||
* each column in the given dataset. Importance values for a column
|
||||
* is the average importance of that column in all trees in the forest
|
||||
*/
|
||||
public function getFeatureImportances() : array
|
||||
public function getFeatureImportances(): array
|
||||
{
|
||||
// Traverse each tree and sum importance of the columns
|
||||
$sum = [];
|
||||
@ -127,7 +129,7 @@ class RandomForest extends Bagging
|
||||
*
|
||||
* @return DecisionTree
|
||||
*/
|
||||
protected function initSingleClassifier($classifier)
|
||||
protected function initSingleClassifier(Classifier $classifier): Classifier
|
||||
{
|
||||
if (is_float($this->featureSubsetRatio)) {
|
||||
$featureCount = (int) ($this->featureSubsetRatio * $this->featureCount);
|
||||
|
@ -28,7 +28,7 @@ class KNearestNeighbors implements Classifier
|
||||
*/
|
||||
public function __construct(int $k = 3, ?Distance $distanceMetric = null)
|
||||
{
|
||||
if (null === $distanceMetric) {
|
||||
if ($distanceMetric === null) {
|
||||
$distanceMetric = new Euclidean();
|
||||
}
|
||||
|
||||
@ -60,7 +60,7 @@ class KNearestNeighbors implements Classifier
|
||||
/**
|
||||
* @throws \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
private function kNeighborsDistances(array $sample) : array
|
||||
private function kNeighborsDistances(array $sample): array
|
||||
{
|
||||
$distances = [];
|
||||
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Classification\Linear;
|
||||
|
||||
use Exception;
|
||||
|
||||
class Adaline extends Perceptron
|
||||
{
|
||||
/**
|
||||
@ -41,7 +43,7 @@ class Adaline extends Perceptron
|
||||
int $trainingType = self::BATCH_TRAINING
|
||||
) {
|
||||
if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) {
|
||||
throw new \Exception('Adaline can only be trained with batch and online/stochastic gradient descent algorithm');
|
||||
throw new Exception('Adaline can only be trained with batch and online/stochastic gradient descent algorithm');
|
||||
}
|
||||
|
||||
$this->trainingType = $trainingType;
|
||||
|
@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Classification\Linear;
|
||||
|
||||
use Exception;
|
||||
use Phpml\Classification\DecisionTree;
|
||||
use Phpml\Classification\WeightedClassifier;
|
||||
use Phpml\Helper\OneVsRest;
|
||||
@ -24,7 +25,7 @@ class DecisionStump extends WeightedClassifier
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $binaryLabels;
|
||||
protected $binaryLabels = [];
|
||||
|
||||
/**
|
||||
* Lowest error rate obtained while training/optimizing the model
|
||||
@ -51,7 +52,7 @@ class DecisionStump extends WeightedClassifier
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $columnTypes;
|
||||
protected $columnTypes = [];
|
||||
|
||||
/**
|
||||
* @var int
|
||||
@ -68,7 +69,7 @@ class DecisionStump extends WeightedClassifier
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $prob;
|
||||
protected $prob = [];
|
||||
|
||||
/**
|
||||
* A DecisionStump classifier is a one-level deep DecisionTree. It is generally
|
||||
@ -83,6 +84,25 @@ class DecisionStump extends WeightedClassifier
|
||||
$this->givenColumnIndex = $columnIndex;
|
||||
}
|
||||
|
||||
public function __toString(): string
|
||||
{
|
||||
return "IF $this->column $this->operator $this->value ".
|
||||
'THEN '.$this->binaryLabels[0].' '.
|
||||
'ELSE '.$this->binaryLabels[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* While finding best split point for a numerical valued column,
|
||||
* DecisionStump looks for equally distanced values between minimum and maximum
|
||||
* values in the column. Given <i>$count</i> value determines how many split
|
||||
* points to be probed. The more split counts, the better performance but
|
||||
* worse processing time (Default value is 10.0)
|
||||
*/
|
||||
public function setNumericalSplitCount(float $count): void
|
||||
{
|
||||
$this->numSplitCount = $count;
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws \Exception
|
||||
*/
|
||||
@ -101,7 +121,7 @@ class DecisionStump extends WeightedClassifier
|
||||
if ($this->weights) {
|
||||
$numWeights = count($this->weights);
|
||||
if ($numWeights != count($samples)) {
|
||||
throw new \Exception('Number of sample weights does not match with number of samples');
|
||||
throw new Exception('Number of sample weights does not match with number of samples');
|
||||
}
|
||||
} else {
|
||||
$this->weights = array_fill(0, count($samples), 1);
|
||||
@ -118,9 +138,12 @@ class DecisionStump extends WeightedClassifier
|
||||
}
|
||||
|
||||
$bestSplit = [
|
||||
'value' => 0, 'operator' => '',
|
||||
'prob' => [], 'column' => 0,
|
||||
'trainingErrorRate' => 1.0];
|
||||
'value' => 0,
|
||||
'operator' => '',
|
||||
'prob' => [],
|
||||
'column' => 0,
|
||||
'trainingErrorRate' => 1.0,
|
||||
];
|
||||
foreach ($columns as $col) {
|
||||
if ($this->columnTypes[$col] == DecisionTree::CONTINUOUS) {
|
||||
$split = $this->getBestNumericalSplit($samples, $targets, $col);
|
||||
@ -139,22 +162,10 @@ class DecisionStump extends WeightedClassifier
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* While finding best split point for a numerical valued column,
|
||||
* DecisionStump looks for equally distanced values between minimum and maximum
|
||||
* values in the column. Given <i>$count</i> value determines how many split
|
||||
* points to be probed. The more split counts, the better performance but
|
||||
* worse processing time (Default value is 10.0)
|
||||
*/
|
||||
public function setNumericalSplitCount(float $count): void
|
||||
{
|
||||
$this->numSplitCount = $count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines best split point for the given column
|
||||
*/
|
||||
protected function getBestNumericalSplit(array $samples, array $targets, int $col) : array
|
||||
protected function getBestNumericalSplit(array $samples, array $targets, int $col): array
|
||||
{
|
||||
$values = array_column($samples, $col);
|
||||
// Trying all possible points may be accomplished in two general ways:
|
||||
@ -173,9 +184,13 @@ class DecisionStump extends WeightedClassifier
|
||||
$threshold = array_sum($values) / (float) count($values);
|
||||
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
|
||||
if ($split == null || $errorRate < $split['trainingErrorRate']) {
|
||||
$split = ['value' => $threshold, 'operator' => $operator,
|
||||
'prob' => $prob, 'column' => $col,
|
||||
'trainingErrorRate' => $errorRate];
|
||||
$split = [
|
||||
'value' => $threshold,
|
||||
'operator' => $operator,
|
||||
'prob' => $prob,
|
||||
'column' => $col,
|
||||
'trainingErrorRate' => $errorRate,
|
||||
];
|
||||
}
|
||||
|
||||
// Try other possible points one by one
|
||||
@ -183,9 +198,13 @@ class DecisionStump extends WeightedClassifier
|
||||
$threshold = (float) $step;
|
||||
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
|
||||
if ($errorRate < $split['trainingErrorRate']) {
|
||||
$split = ['value' => $threshold, 'operator' => $operator,
|
||||
'prob' => $prob, 'column' => $col,
|
||||
'trainingErrorRate' => $errorRate];
|
||||
$split = [
|
||||
'value' => $threshold,
|
||||
'operator' => $operator,
|
||||
'prob' => $prob,
|
||||
'column' => $col,
|
||||
'trainingErrorRate' => $errorRate,
|
||||
];
|
||||
}
|
||||
}// for
|
||||
}
|
||||
@ -193,7 +212,7 @@ class DecisionStump extends WeightedClassifier
|
||||
return $split;
|
||||
}
|
||||
|
||||
protected function getBestNominalSplit(array $samples, array $targets, int $col) : array
|
||||
protected function getBestNominalSplit(array $samples, array $targets, int $col): array
|
||||
{
|
||||
$values = array_column($samples, $col);
|
||||
$valueCounts = array_count_values($values);
|
||||
@ -206,9 +225,13 @@ class DecisionStump extends WeightedClassifier
|
||||
[$errorRate, $prob] = $this->calculateErrorRate($targets, $val, $operator, $values);
|
||||
|
||||
if ($split == null || $split['trainingErrorRate'] < $errorRate) {
|
||||
$split = ['value' => $val, 'operator' => $operator,
|
||||
'prob' => $prob, 'column' => $col,
|
||||
'trainingErrorRate' => $errorRate];
|
||||
$split = [
|
||||
'value' => $val,
|
||||
'operator' => $operator,
|
||||
'prob' => $prob,
|
||||
'column' => $col,
|
||||
'trainingErrorRate' => $errorRate,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -220,7 +243,7 @@ class DecisionStump extends WeightedClassifier
|
||||
* Calculates the ratio of wrong predictions based on the new threshold
|
||||
* value given as the parameter
|
||||
*/
|
||||
protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values) : array
|
||||
protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values): array
|
||||
{
|
||||
$wrong = 0.0;
|
||||
$prob = [];
|
||||
@ -242,6 +265,7 @@ class DecisionStump extends WeightedClassifier
|
||||
if (!isset($prob[$predicted][$target])) {
|
||||
$prob[$predicted][$target] = 0;
|
||||
}
|
||||
|
||||
++$prob[$predicted][$target];
|
||||
}
|
||||
|
||||
@ -267,7 +291,7 @@ class DecisionStump extends WeightedClassifier
|
||||
*
|
||||
* @param mixed $label
|
||||
*/
|
||||
protected function predictProbability(array $sample, $label) : float
|
||||
protected function predictProbability(array $sample, $label): float
|
||||
{
|
||||
$predicted = $this->predictSampleBinary($sample);
|
||||
if ((string) $predicted == (string) $label) {
|
||||
@ -292,11 +316,4 @@ class DecisionStump extends WeightedClassifier
|
||||
protected function resetBinary(): void
|
||||
{
|
||||
}
|
||||
|
||||
public function __toString() : string
|
||||
{
|
||||
return "IF $this->column $this->operator $this->value ".
|
||||
'THEN '.$this->binaryLabels[0].' '.
|
||||
'ELSE '.$this->binaryLabels[1];
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Classification\Linear;
|
||||
|
||||
use Closure;
|
||||
use Exception;
|
||||
use Phpml\Helper\Optimizer\ConjugateGradient;
|
||||
|
||||
class LogisticRegression extends Adaline
|
||||
@ -70,18 +72,18 @@ class LogisticRegression extends Adaline
|
||||
) {
|
||||
$trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING);
|
||||
if (!in_array($trainingType, $trainingTypes)) {
|
||||
throw new \Exception('Logistic regression can only be trained with '.
|
||||
throw new Exception('Logistic regression can only be trained with '.
|
||||
'batch (gradient descent), online (stochastic gradient descent) '.
|
||||
'or conjugate batch (conjugate gradients) algorithms');
|
||||
}
|
||||
|
||||
if (!in_array($cost, ['log', 'sse'])) {
|
||||
throw new \Exception("Logistic regression cost function can be one of the following: \n".
|
||||
throw new Exception("Logistic regression cost function can be one of the following: \n".
|
||||
"'log' for log-likelihood and 'sse' for sum of squared errors");
|
||||
}
|
||||
|
||||
if ($penalty != '' && strtoupper($penalty) !== 'L2') {
|
||||
throw new \Exception("Logistic regression supports only 'L2' regularization");
|
||||
throw new Exception("Logistic regression supports only 'L2' regularization");
|
||||
}
|
||||
|
||||
$this->learningRate = 0.001;
|
||||
@ -132,14 +134,14 @@ class LogisticRegression extends Adaline
|
||||
return $this->runConjugateGradient($samples, $targets, $callback);
|
||||
|
||||
default:
|
||||
throw new \Exception('Logistic regression has invalid training type: %s.', $this->trainingType);
|
||||
throw new Exception('Logistic regression has invalid training type: %s.', $this->trainingType);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes Conjugate Gradient method to optimize the weights of the LogReg model
|
||||
*/
|
||||
protected function runConjugateGradient(array $samples, array $targets, \Closure $gradientFunc): void
|
||||
protected function runConjugateGradient(array $samples, array $targets, Closure $gradientFunc): void
|
||||
{
|
||||
if (empty($this->optimizer)) {
|
||||
$this->optimizer = (new ConjugateGradient($this->featureCount))
|
||||
@ -155,7 +157,7 @@ class LogisticRegression extends Adaline
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function getCostFunction() : \Closure
|
||||
protected function getCostFunction(): Closure
|
||||
{
|
||||
$penalty = 0;
|
||||
if ($this->penalty == 'L2') {
|
||||
@ -183,9 +185,11 @@ class LogisticRegression extends Adaline
|
||||
if ($hX == 1) {
|
||||
$hX = 1 - 1e-10;
|
||||
}
|
||||
|
||||
if ($hX == 0) {
|
||||
$hX = 1e-10;
|
||||
}
|
||||
|
||||
$error = -$y * log($hX) - (1 - $y) * log(1 - $hX);
|
||||
$gradient = $hX - $y;
|
||||
|
||||
@ -218,16 +222,14 @@ class LogisticRegression extends Adaline
|
||||
return $callback;
|
||||
|
||||
default:
|
||||
throw new \Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction));
|
||||
throw new Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the output of the network, a float value between 0.0 and 1.0
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
protected function output(array $sample)
|
||||
protected function output(array $sample): float
|
||||
{
|
||||
$sum = parent::output($sample);
|
||||
|
||||
@ -237,7 +239,7 @@ class LogisticRegression extends Adaline
|
||||
/**
|
||||
* Returns the class value (either -1 or 1) for the given input
|
||||
*/
|
||||
protected function outputClass(array $sample) : int
|
||||
protected function outputClass(array $sample): int
|
||||
{
|
||||
$output = $this->output($sample);
|
||||
|
||||
@ -253,10 +255,10 @@ class LogisticRegression extends Adaline
|
||||
*
|
||||
* The probability is simply taken as the distance of the sample
|
||||
* to the decision plane.
|
||||
|
||||
*
|
||||
* @param mixed $label
|
||||
*/
|
||||
protected function predictProbability(array $sample, $label) : float
|
||||
protected function predictProbability(array $sample, $label): float
|
||||
{
|
||||
$predicted = $this->predictSampleBinary($sample);
|
||||
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Classification\Linear;
|
||||
|
||||
use Closure;
|
||||
use Exception;
|
||||
use Phpml\Classification\Classifier;
|
||||
use Phpml\Helper\OneVsRest;
|
||||
use Phpml\Helper\Optimizer\GD;
|
||||
@ -34,7 +36,7 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $weights;
|
||||
protected $weights = [];
|
||||
|
||||
/**
|
||||
* @var float
|
||||
@ -73,11 +75,11 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, bool $normalizeInputs = true)
|
||||
{
|
||||
if ($learningRate <= 0.0 || $learningRate > 1.0) {
|
||||
throw new \Exception('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)');
|
||||
throw new Exception('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)');
|
||||
}
|
||||
|
||||
if ($maxIterations <= 0) {
|
||||
throw new \Exception('Maximum number of iterations must be an integer greater than 0');
|
||||
throw new Exception('Maximum number of iterations must be an integer greater than 0');
|
||||
}
|
||||
|
||||
if ($normalizeInputs) {
|
||||
@ -100,7 +102,10 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
}
|
||||
|
||||
// Set all target values to either -1 or 1
|
||||
$this->labels = [1 => $labels[0], -1 => $labels[1]];
|
||||
$this->labels = [
|
||||
1 => $labels[0],
|
||||
-1 => $labels[1],
|
||||
];
|
||||
foreach ($targets as $key => $target) {
|
||||
$targets[$key] = (string) $target == (string) $this->labels[1] ? 1 : -1;
|
||||
}
|
||||
@ -111,15 +116,6 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
$this->runTraining($samples, $targets);
|
||||
}
|
||||
|
||||
protected function resetBinary(): void
|
||||
{
|
||||
$this->labels = [];
|
||||
$this->optimizer = null;
|
||||
$this->featureCount = 0;
|
||||
$this->weights = null;
|
||||
$this->costValues = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Normally enabling early stopping for the optimization procedure may
|
||||
* help saving processing time while in some cases it may result in
|
||||
@ -140,16 +136,23 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
/**
|
||||
* Returns the cost values obtained during the training.
|
||||
*/
|
||||
public function getCostValues() : array
|
||||
public function getCostValues(): array
|
||||
{
|
||||
return $this->costValues;
|
||||
}
|
||||
|
||||
protected function resetBinary(): void
|
||||
{
|
||||
$this->labels = [];
|
||||
$this->optimizer = null;
|
||||
$this->featureCount = 0;
|
||||
$this->weights = null;
|
||||
$this->costValues = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Trains the perceptron model with Stochastic Gradient Descent optimization
|
||||
* to get the correct set of weights
|
||||
*
|
||||
* @return void|mixed
|
||||
*/
|
||||
protected function runTraining(array $samples, array $targets)
|
||||
{
|
||||
@ -171,7 +174,7 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
* Executes a Gradient Descent algorithm for
|
||||
* the given cost function
|
||||
*/
|
||||
protected function runGradientDescent(array $samples, array $targets, \Closure $gradientFunc, bool $isBatch = false): void
|
||||
protected function runGradientDescent(array $samples, array $targets, Closure $gradientFunc, bool $isBatch = false): void
|
||||
{
|
||||
$class = $isBatch ? GD::class : StochasticGD::class;
|
||||
|
||||
@ -191,7 +194,7 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
* Checks if the sample should be normalized and if so, returns the
|
||||
* normalized sample
|
||||
*/
|
||||
protected function checkNormalizedSample(array $sample) : array
|
||||
protected function checkNormalizedSample(array $sample): array
|
||||
{
|
||||
if ($this->normalizer) {
|
||||
$samples = [$sample];
|
||||
@ -205,7 +208,7 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
/**
|
||||
* Calculates net output of the network as a float value for the given input
|
||||
*
|
||||
* @return int
|
||||
* @return int|float
|
||||
*/
|
||||
protected function output(array $sample)
|
||||
{
|
||||
@ -224,7 +227,7 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
/**
|
||||
* Returns the class value (either -1 or 1) for the given input
|
||||
*/
|
||||
protected function outputClass(array $sample) : int
|
||||
protected function outputClass(array $sample): int
|
||||
{
|
||||
return $this->output($sample) > 0 ? 1 : -1;
|
||||
}
|
||||
@ -237,7 +240,7 @@ class Perceptron implements Classifier, IncrementalEstimator
|
||||
*
|
||||
* @param mixed $label
|
||||
*/
|
||||
protected function predictProbability(array $sample, $label) : float
|
||||
protected function predictProbability(array $sample, $label): float
|
||||
{
|
||||
$predicted = $this->predictSampleBinary($sample);
|
||||
|
||||
|
@ -14,7 +14,7 @@ class MLPClassifier extends MultilayerPerceptron implements Classifier
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function getTargetClass($target) : int
|
||||
public function getTargetClass($target): int
|
||||
{
|
||||
if (!in_array($target, $this->classes)) {
|
||||
throw InvalidArgumentException::invalidTarget($target);
|
||||
|
@ -14,7 +14,9 @@ class NaiveBayes implements Classifier
|
||||
use Trainable, Predictable;
|
||||
|
||||
public const CONTINUOS = 1;
|
||||
|
||||
public const NOMINAL = 2;
|
||||
|
||||
public const EPSILON = 1e-10;
|
||||
|
||||
/**
|
||||
@ -73,6 +75,31 @@ class NaiveBayes implements Classifier
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
protected function predictSample(array $sample)
|
||||
{
|
||||
// Use NaiveBayes assumption for each label using:
|
||||
// P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label)
|
||||
// Then compare probability for each class to determine which label is most likely
|
||||
$predictions = [];
|
||||
foreach ($this->labels as $label) {
|
||||
$p = $this->p[$label];
|
||||
for ($i = 0; $i < $this->featureCount; ++$i) {
|
||||
$Plf = $this->sampleProbability($sample, $i, $label);
|
||||
$p += $Plf;
|
||||
}
|
||||
|
||||
$predictions[$label] = $p;
|
||||
}
|
||||
|
||||
arsort($predictions, SORT_NUMERIC);
|
||||
reset($predictions);
|
||||
|
||||
return key($predictions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates vital statistics for each label & feature. Stores these
|
||||
* values in private array in order to avoid repeated calculation
|
||||
@ -108,7 +135,7 @@ class NaiveBayes implements Classifier
|
||||
/**
|
||||
* Calculates the probability P(label|sample_n)
|
||||
*/
|
||||
private function sampleProbability(array $sample, int $feature, string $label) : float
|
||||
private function sampleProbability(array $sample, int $feature, string $label): float
|
||||
{
|
||||
$value = $sample[$feature];
|
||||
if ($this->dataType[$label][$feature] == self::NOMINAL) {
|
||||
@ -119,6 +146,7 @@ class NaiveBayes implements Classifier
|
||||
|
||||
return $this->discreteProb[$label][$feature][$value];
|
||||
}
|
||||
|
||||
$std = $this->std[$label][$feature] ;
|
||||
$mean = $this->mean[$label][$feature];
|
||||
// Calculate the probability density by use of normal/Gaussian distribution
|
||||
@ -137,7 +165,7 @@ class NaiveBayes implements Classifier
|
||||
/**
|
||||
* Return samples belonging to specific label
|
||||
*/
|
||||
private function getSamplesByLabel(string $label) : array
|
||||
private function getSamplesByLabel(string $label): array
|
||||
{
|
||||
$samples = [];
|
||||
for ($i = 0; $i < $this->sampleCount; ++$i) {
|
||||
@ -148,28 +176,4 @@ class NaiveBayes implements Classifier
|
||||
|
||||
return $samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
protected function predictSample(array $sample)
|
||||
{
|
||||
// Use NaiveBayes assumption for each label using:
|
||||
// P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label)
|
||||
// Then compare probability for each class to determine which label is most likely
|
||||
$predictions = [];
|
||||
foreach ($this->labels as $label) {
|
||||
$p = $this->p[$label];
|
||||
for ($i = 0; $i < $this->featureCount; ++$i) {
|
||||
$Plf = $this->sampleProbability($sample, $i, $label);
|
||||
$p += $Plf;
|
||||
}
|
||||
$predictions[$label] = $p;
|
||||
}
|
||||
|
||||
arsort($predictions, SORT_NUMERIC);
|
||||
reset($predictions);
|
||||
|
||||
return key($predictions);
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ abstract class WeightedClassifier implements Classifier
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $weights;
|
||||
protected $weights = [];
|
||||
|
||||
/**
|
||||
* Sets the array including a weight for each sample
|
||||
|
@ -6,5 +6,5 @@ namespace Phpml\Clustering;
|
||||
|
||||
interface Clusterer
|
||||
{
|
||||
public function cluster(array $samples) : array;
|
||||
public function cluster(array $samples): array;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Clustering;
|
||||
|
||||
use array_merge;
|
||||
use Phpml\Math\Distance;
|
||||
use Phpml\Math\Distance\Euclidean;
|
||||
|
||||
@ -26,7 +27,7 @@ class DBSCAN implements Clusterer
|
||||
|
||||
public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null)
|
||||
{
|
||||
if (null === $distanceMetric) {
|
||||
if ($distanceMetric === null) {
|
||||
$distanceMetric = new Euclidean();
|
||||
}
|
||||
|
||||
@ -35,7 +36,7 @@ class DBSCAN implements Clusterer
|
||||
$this->distanceMetric = $distanceMetric;
|
||||
}
|
||||
|
||||
public function cluster(array $samples) : array
|
||||
public function cluster(array $samples): array
|
||||
{
|
||||
$clusters = [];
|
||||
$visited = [];
|
||||
@ -44,6 +45,7 @@ class DBSCAN implements Clusterer
|
||||
if (isset($visited[$index])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$visited[$index] = true;
|
||||
|
||||
$regionSamples = $this->getSamplesInRegion($sample, $samples);
|
||||
@ -55,7 +57,7 @@ class DBSCAN implements Clusterer
|
||||
return $clusters;
|
||||
}
|
||||
|
||||
private function getSamplesInRegion(array $localSample, array $samples) : array
|
||||
private function getSamplesInRegion(array $localSample, array $samples): array
|
||||
{
|
||||
$region = [];
|
||||
|
||||
@ -68,7 +70,7 @@ class DBSCAN implements Clusterer
|
||||
return $region;
|
||||
}
|
||||
|
||||
private function expandCluster(array $samples, array &$visited) : array
|
||||
private function expandCluster(array $samples, array &$visited): array
|
||||
{
|
||||
$cluster = [];
|
||||
|
||||
@ -84,7 +86,8 @@ class DBSCAN implements Clusterer
|
||||
|
||||
$cluster[$index] = $sample;
|
||||
}
|
||||
$cluster = \array_merge($cluster, ...$clusterMerge);
|
||||
|
||||
$cluster = array_merge($cluster, ...$clusterMerge);
|
||||
|
||||
return $cluster;
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ class FuzzyCMeans implements Clusterer
|
||||
/**
|
||||
* @var array|float[][]
|
||||
*/
|
||||
private $membership;
|
||||
private $membership = [];
|
||||
|
||||
/**
|
||||
* @var float
|
||||
@ -55,7 +55,7 @@ class FuzzyCMeans implements Clusterer
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $samples;
|
||||
private $samples = [];
|
||||
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
@ -65,12 +65,63 @@ class FuzzyCMeans implements Clusterer
|
||||
if ($clustersNumber <= 0) {
|
||||
throw InvalidArgumentException::invalidClustersNumber();
|
||||
}
|
||||
|
||||
$this->clustersNumber = $clustersNumber;
|
||||
$this->fuzziness = $fuzziness;
|
||||
$this->epsilon = $epsilon;
|
||||
$this->maxIterations = $maxIterations;
|
||||
}
|
||||
|
||||
public function getMembershipMatrix(): array
|
||||
{
|
||||
return $this->membership;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array|Point[] $samples
|
||||
*/
|
||||
public function cluster(array $samples): array
|
||||
{
|
||||
// Initialize variables, clusters and membership matrix
|
||||
$this->sampleCount = count($samples);
|
||||
$this->samples = &$samples;
|
||||
$this->space = new Space(count($samples[0]));
|
||||
$this->initClusters();
|
||||
|
||||
// Our goal is minimizing the objective value while
|
||||
// executing the clustering steps at a maximum number of iterations
|
||||
$lastObjective = 0.0;
|
||||
$iterations = 0;
|
||||
do {
|
||||
// Update the membership matrix and cluster centers, respectively
|
||||
$this->updateMembershipMatrix();
|
||||
$this->updateClusters();
|
||||
|
||||
// Calculate the new value of the objective function
|
||||
$objectiveVal = $this->getObjective();
|
||||
$difference = abs($lastObjective - $objectiveVal);
|
||||
$lastObjective = $objectiveVal;
|
||||
} while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
|
||||
|
||||
// Attach (hard cluster) each data point to the nearest cluster
|
||||
for ($k = 0; $k < $this->sampleCount; ++$k) {
|
||||
$column = array_column($this->membership, $k);
|
||||
arsort($column);
|
||||
reset($column);
|
||||
$i = key($column);
|
||||
$cluster = $this->clusters[$i];
|
||||
$cluster->attach(new Point($this->samples[$k]));
|
||||
}
|
||||
|
||||
// Return grouped samples
|
||||
$grouped = [];
|
||||
foreach ($this->clusters as $cluster) {
|
||||
$grouped[] = $cluster->getPoints();
|
||||
}
|
||||
|
||||
return $grouped;
|
||||
}
|
||||
|
||||
protected function initClusters(): void
|
||||
{
|
||||
// Membership array is a matrix of cluster number by sample counts
|
||||
@ -87,7 +138,7 @@ class FuzzyCMeans implements Clusterer
|
||||
$row = [];
|
||||
$total = 0.0;
|
||||
for ($k = 0; $k < $cols; ++$k) {
|
||||
$val = rand(1, 5) / 10.0;
|
||||
$val = random_int(1, 5) / 10.0;
|
||||
$row[] = $val;
|
||||
$total += $val;
|
||||
}
|
||||
@ -146,13 +197,13 @@ class FuzzyCMeans implements Clusterer
|
||||
}
|
||||
}
|
||||
|
||||
protected function getDistanceCalc(int $row, int $col) : float
|
||||
protected function getDistanceCalc(int $row, int $col): float
|
||||
{
|
||||
$sum = 0.0;
|
||||
$distance = new Euclidean();
|
||||
$dist1 = $distance->distance(
|
||||
$this->clusters[$row]->getCoordinates(),
|
||||
$this->samples[$col]
|
||||
$this->clusters[$row]->getCoordinates(),
|
||||
$this->samples[$col]
|
||||
);
|
||||
|
||||
for ($j = 0; $j < $this->clustersNumber; ++$j) {
|
||||
@ -187,54 +238,4 @@ class FuzzyCMeans implements Clusterer
|
||||
|
||||
return $sum;
|
||||
}
|
||||
|
||||
public function getMembershipMatrix() : array
|
||||
{
|
||||
return $this->membership;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array|Point[] $samples
|
||||
*/
|
||||
public function cluster(array $samples) : array
|
||||
{
|
||||
// Initialize variables, clusters and membership matrix
|
||||
$this->sampleCount = count($samples);
|
||||
$this->samples = &$samples;
|
||||
$this->space = new Space(count($samples[0]));
|
||||
$this->initClusters();
|
||||
|
||||
// Our goal is minimizing the objective value while
|
||||
// executing the clustering steps at a maximum number of iterations
|
||||
$lastObjective = 0.0;
|
||||
$iterations = 0;
|
||||
do {
|
||||
// Update the membership matrix and cluster centers, respectively
|
||||
$this->updateMembershipMatrix();
|
||||
$this->updateClusters();
|
||||
|
||||
// Calculate the new value of the objective function
|
||||
$objectiveVal = $this->getObjective();
|
||||
$difference = abs($lastObjective - $objectiveVal);
|
||||
$lastObjective = $objectiveVal;
|
||||
} while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
|
||||
|
||||
// Attach (hard cluster) each data point to the nearest cluster
|
||||
for ($k = 0; $k < $this->sampleCount; ++$k) {
|
||||
$column = array_column($this->membership, $k);
|
||||
arsort($column);
|
||||
reset($column);
|
||||
$i = key($column);
|
||||
$cluster = $this->clusters[$i];
|
||||
$cluster->attach(new Point($this->samples[$k]));
|
||||
}
|
||||
|
||||
// Return grouped samples
|
||||
$grouped = [];
|
||||
foreach ($this->clusters as $cluster) {
|
||||
$grouped[] = $cluster->getPoints();
|
||||
}
|
||||
|
||||
return $grouped;
|
||||
}
|
||||
}
|
||||
|
@ -10,6 +10,7 @@ use Phpml\Exception\InvalidArgumentException;
|
||||
class KMeans implements Clusterer
|
||||
{
|
||||
public const INIT_RANDOM = 1;
|
||||
|
||||
public const INIT_KMEANS_PLUS_PLUS = 2;
|
||||
|
||||
/**
|
||||
@ -32,7 +33,7 @@ class KMeans implements Clusterer
|
||||
$this->initialization = $initialization;
|
||||
}
|
||||
|
||||
public function cluster(array $samples) : array
|
||||
public function cluster(array $samples): array
|
||||
{
|
||||
$space = new Space(count($samples[0]));
|
||||
foreach ($samples as $sample) {
|
||||
|
@ -28,7 +28,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
|
||||
$this->points = new SplObjectStorage();
|
||||
}
|
||||
|
||||
public function getPoints() : array
|
||||
public function getPoints(): array
|
||||
{
|
||||
$points = [];
|
||||
foreach ($this->points as $point) {
|
||||
@ -38,7 +38,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
|
||||
return $points;
|
||||
}
|
||||
|
||||
public function toArray() : array
|
||||
public function toArray(): array
|
||||
{
|
||||
return [
|
||||
'centroid' => parent::toArray(),
|
||||
@ -46,7 +46,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
|
||||
];
|
||||
}
|
||||
|
||||
public function attach(Point $point) : Point
|
||||
public function attach(Point $point): Point
|
||||
{
|
||||
if ($point instanceof self) {
|
||||
throw new LogicException('cannot attach a cluster to another');
|
||||
@ -57,7 +57,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
|
||||
return $point;
|
||||
}
|
||||
|
||||
public function detach(Point $point) : Point
|
||||
public function detach(Point $point): Point
|
||||
{
|
||||
$this->points->detach($point);
|
||||
|
||||
@ -76,7 +76,8 @@ class Cluster extends Point implements IteratorAggregate, Countable
|
||||
|
||||
public function updateCentroid(): void
|
||||
{
|
||||
if (!$count = count($this->points)) {
|
||||
$count = count($this->points);
|
||||
if (!$count) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ class Point implements ArrayAccess
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $coordinates;
|
||||
protected $coordinates = [];
|
||||
|
||||
public function __construct(array $coordinates)
|
||||
{
|
||||
@ -24,7 +24,7 @@ class Point implements ArrayAccess
|
||||
$this->coordinates = $coordinates;
|
||||
}
|
||||
|
||||
public function toArray() : array
|
||||
public function toArray(): array
|
||||
{
|
||||
return $this->coordinates;
|
||||
}
|
||||
@ -66,7 +66,7 @@ class Point implements ArrayAccess
|
||||
return $minPoint;
|
||||
}
|
||||
|
||||
public function getCoordinates() : array
|
||||
public function getCoordinates(): array
|
||||
{
|
||||
return $this->coordinates;
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ class Space extends SplObjectStorage
|
||||
$this->dimension = $dimension;
|
||||
}
|
||||
|
||||
public function toArray() : array
|
||||
public function toArray(): array
|
||||
{
|
||||
$points = [];
|
||||
foreach ($this as $point) {
|
||||
@ -35,7 +35,7 @@ class Space extends SplObjectStorage
|
||||
return ['points' => $points];
|
||||
}
|
||||
|
||||
public function newPoint(array $coordinates) : Point
|
||||
public function newPoint(array $coordinates): Point
|
||||
{
|
||||
if (count($coordinates) != $this->dimension) {
|
||||
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
|
||||
@ -65,7 +65,7 @@ class Space extends SplObjectStorage
|
||||
parent::attach($point, $data);
|
||||
}
|
||||
|
||||
public function getDimension() : int
|
||||
public function getDimension(): int
|
||||
{
|
||||
return $this->dimension;
|
||||
}
|
||||
@ -92,7 +92,7 @@ class Space extends SplObjectStorage
|
||||
return [$min, $max];
|
||||
}
|
||||
|
||||
public function getRandomPoint(Point $min, Point $max) : Point
|
||||
public function getRandomPoint(Point $min, Point $max): Point
|
||||
{
|
||||
$point = $this->newPoint(array_fill(0, $this->dimension, null));
|
||||
|
||||
@ -106,7 +106,7 @@ class Space extends SplObjectStorage
|
||||
/**
|
||||
* @return array|Cluster[]
|
||||
*/
|
||||
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM) : array
|
||||
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
|
||||
{
|
||||
$clusters = $this->initializeClusters($clustersNumber, $initMethod);
|
||||
|
||||
@ -119,7 +119,7 @@ class Space extends SplObjectStorage
|
||||
/**
|
||||
* @return array|Cluster[]
|
||||
*/
|
||||
protected function initializeClusters(int $clustersNumber, int $initMethod) : array
|
||||
protected function initializeClusters(int $clustersNumber, int $initMethod): array
|
||||
{
|
||||
switch ($initMethod) {
|
||||
case KMeans::INIT_RANDOM:
|
||||
@ -139,7 +139,7 @@ class Space extends SplObjectStorage
|
||||
return $clusters;
|
||||
}
|
||||
|
||||
protected function iterate($clusters) : bool
|
||||
protected function iterate($clusters): bool
|
||||
{
|
||||
$convergence = true;
|
||||
|
||||
@ -177,19 +177,7 @@ class Space extends SplObjectStorage
|
||||
return $convergence;
|
||||
}
|
||||
|
||||
private function initializeRandomClusters(int $clustersNumber) : array
|
||||
{
|
||||
$clusters = [];
|
||||
[$min, $max] = $this->getBoundaries();
|
||||
|
||||
for ($n = 0; $n < $clustersNumber; ++$n) {
|
||||
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
|
||||
}
|
||||
|
||||
return $clusters;
|
||||
}
|
||||
|
||||
protected function initializeKMPPClusters(int $clustersNumber) : array
|
||||
protected function initializeKMPPClusters(int $clustersNumber): array
|
||||
{
|
||||
$clusters = [];
|
||||
$this->rewind();
|
||||
@ -218,4 +206,16 @@ class Space extends SplObjectStorage
|
||||
|
||||
return $clusters;
|
||||
}
|
||||
|
||||
private function initializeRandomClusters(int $clustersNumber): array
|
||||
{
|
||||
$clusters = [];
|
||||
[$min, $max] = $this->getBoundaries();
|
||||
|
||||
for ($n = 0; $n < $clustersNumber; ++$n) {
|
||||
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
|
||||
}
|
||||
|
||||
return $clusters;
|
||||
}
|
||||
}
|
||||
|
@ -31,39 +31,40 @@ abstract class Split
|
||||
|
||||
public function __construct(Dataset $dataset, float $testSize = 0.3, ?int $seed = null)
|
||||
{
|
||||
if (0 >= $testSize || 1 <= $testSize) {
|
||||
if ($testSize <= 0 || $testSize >= 1) {
|
||||
throw InvalidArgumentException::percentNotInRange('testSize');
|
||||
}
|
||||
|
||||
$this->seedGenerator($seed);
|
||||
|
||||
$this->splitDataset($dataset, $testSize);
|
||||
}
|
||||
|
||||
abstract protected function splitDataset(Dataset $dataset, float $testSize);
|
||||
|
||||
public function getTrainSamples() : array
|
||||
public function getTrainSamples(): array
|
||||
{
|
||||
return $this->trainSamples;
|
||||
}
|
||||
|
||||
public function getTestSamples() : array
|
||||
public function getTestSamples(): array
|
||||
{
|
||||
return $this->testSamples;
|
||||
}
|
||||
|
||||
public function getTrainLabels() : array
|
||||
public function getTrainLabels(): array
|
||||
{
|
||||
return $this->trainLabels;
|
||||
}
|
||||
|
||||
public function getTestLabels() : array
|
||||
public function getTestLabels(): array
|
||||
{
|
||||
return $this->testLabels;
|
||||
}
|
||||
|
||||
abstract protected function splitDataset(Dataset $dataset, float $testSize);
|
||||
|
||||
protected function seedGenerator(?int $seed = null): void
|
||||
{
|
||||
if (null === $seed) {
|
||||
if ($seed === null) {
|
||||
mt_srand();
|
||||
} else {
|
||||
mt_srand($seed);
|
||||
|
@ -21,7 +21,7 @@ class StratifiedRandomSplit extends RandomSplit
|
||||
/**
|
||||
* @return Dataset[]|array
|
||||
*/
|
||||
private function splitByTarget(Dataset $dataset) : array
|
||||
private function splitByTarget(Dataset $dataset): array
|
||||
{
|
||||
$targets = $dataset->getTargets();
|
||||
$samples = $dataset->getSamples();
|
||||
@ -38,7 +38,7 @@ class StratifiedRandomSplit extends RandomSplit
|
||||
return $datasets;
|
||||
}
|
||||
|
||||
private function createDatasets(array $uniqueTargets, array $split) : array
|
||||
private function createDatasets(array $uniqueTargets, array $split): array
|
||||
{
|
||||
$datasets = [];
|
||||
foreach ($uniqueTargets as $target) {
|
||||
|
@ -31,12 +31,12 @@ class ArrayDataset implements Dataset
|
||||
$this->targets = $targets;
|
||||
}
|
||||
|
||||
public function getSamples() : array
|
||||
public function getSamples(): array
|
||||
{
|
||||
return $this->samples;
|
||||
}
|
||||
|
||||
public function getTargets() : array
|
||||
public function getTargets(): array
|
||||
{
|
||||
return $this->targets;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ class CsvDataset extends ArrayDataset
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $columnNames;
|
||||
protected $columnNames = [];
|
||||
|
||||
/**
|
||||
* @throws FileException
|
||||
@ -22,7 +22,8 @@ class CsvDataset extends ArrayDataset
|
||||
throw FileException::missingFile(basename($filepath));
|
||||
}
|
||||
|
||||
if (false === $handle = fopen($filepath, 'rb')) {
|
||||
$handle = fopen($filepath, 'rb');
|
||||
if ($handle === false) {
|
||||
throw FileException::cantOpenFile(basename($filepath));
|
||||
}
|
||||
|
||||
@ -44,7 +45,7 @@ class CsvDataset extends ArrayDataset
|
||||
parent::__construct($samples, $targets);
|
||||
}
|
||||
|
||||
public function getColumnNames() : array
|
||||
public function getColumnNames(): array
|
||||
{
|
||||
return $this->columnNames;
|
||||
}
|
||||
|
@ -9,10 +9,10 @@ interface Dataset
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getSamples() : array;
|
||||
public function getSamples(): array;
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getTargets() : array;
|
||||
public function getTargets(): array;
|
||||
}
|
||||
|
@ -84,7 +84,7 @@ abstract class EigenTransformerBase
|
||||
/**
|
||||
* Returns the reduced data
|
||||
*/
|
||||
protected function reduce(array $data) : array
|
||||
protected function reduce(array $data): array
|
||||
{
|
||||
$m1 = new Matrix($data);
|
||||
$m2 = new Matrix($this->eigVectors);
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\DimensionReduction;
|
||||
|
||||
use Closure;
|
||||
use Exception;
|
||||
use Phpml\Math\Distance\Euclidean;
|
||||
use Phpml\Math\Distance\Manhattan;
|
||||
use Phpml\Math\Matrix;
|
||||
@ -11,8 +13,11 @@ use Phpml\Math\Matrix;
|
||||
class KernelPCA extends PCA
|
||||
{
|
||||
public const KERNEL_RBF = 1;
|
||||
|
||||
public const KERNEL_SIGMOID = 2;
|
||||
|
||||
public const KERNEL_LAPLACIAN = 3;
|
||||
|
||||
public const KERNEL_LINEAR = 4;
|
||||
|
||||
/**
|
||||
@ -34,7 +39,7 @@ class KernelPCA extends PCA
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $data;
|
||||
protected $data = [];
|
||||
|
||||
/**
|
||||
* Kernel principal component analysis (KernelPCA) is an extension of PCA using
|
||||
@ -54,7 +59,7 @@ class KernelPCA extends PCA
|
||||
{
|
||||
$availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
|
||||
if (!in_array($kernel, $availableKernels)) {
|
||||
throw new \Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
|
||||
throw new Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
|
||||
}
|
||||
|
||||
parent::__construct($totalVariance, $numFeatures);
|
||||
@ -69,7 +74,7 @@ class KernelPCA extends PCA
|
||||
* $data is an n-by-m matrix and returned array is
|
||||
* n-by-k matrix where k <= m
|
||||
*/
|
||||
public function fit(array $data) : array
|
||||
public function fit(array $data): array
|
||||
{
|
||||
$numRows = count($data);
|
||||
$this->data = $data;
|
||||
@ -88,11 +93,32 @@ class KernelPCA extends PCA
|
||||
return Matrix::transposeArray($this->eigVectors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms the given sample to a lower dimensional vector by using
|
||||
* the variables obtained during the last run of <code>fit</code>.
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function transform(array $sample): array
|
||||
{
|
||||
if (!$this->fit) {
|
||||
throw new Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
|
||||
}
|
||||
|
||||
if (is_array($sample[0])) {
|
||||
throw new Exception('KernelPCA::transform() accepts only one-dimensional arrays');
|
||||
}
|
||||
|
||||
$pairs = $this->getDistancePairs($sample);
|
||||
|
||||
return $this->projectSample($pairs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates similarity matrix by use of selected kernel function<br>
|
||||
* An n-by-m matrix is given and an n-by-n matrix is returned
|
||||
*/
|
||||
protected function calculateKernelMatrix(array $data, int $numRows) : array
|
||||
protected function calculateKernelMatrix(array $data, int $numRows): array
|
||||
{
|
||||
$kernelFunc = $this->getKernel();
|
||||
|
||||
@ -116,7 +142,7 @@ class KernelPCA extends PCA
|
||||
*
|
||||
* K′ = K − N.K − K.N + N.K.N where N is n-by-n matrix filled with 1/n
|
||||
*/
|
||||
protected function centerMatrix(array $matrix, int $n) : array
|
||||
protected function centerMatrix(array $matrix, int $n): array
|
||||
{
|
||||
$N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
|
||||
$N = new Matrix($N, false);
|
||||
@ -140,7 +166,7 @@ class KernelPCA extends PCA
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function getKernel(): \Closure
|
||||
protected function getKernel(): Closure
|
||||
{
|
||||
switch ($this->kernel) {
|
||||
case self::KERNEL_LINEAR:
|
||||
@ -173,11 +199,11 @@ class KernelPCA extends PCA
|
||||
};
|
||||
|
||||
default:
|
||||
throw new \Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
|
||||
throw new Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
|
||||
}
|
||||
}
|
||||
|
||||
protected function getDistancePairs(array $sample) : array
|
||||
protected function getDistancePairs(array $sample): array
|
||||
{
|
||||
$kernel = $this->getKernel();
|
||||
|
||||
@ -189,7 +215,7 @@ class KernelPCA extends PCA
|
||||
return $pairs;
|
||||
}
|
||||
|
||||
protected function projectSample(array $pairs) : array
|
||||
protected function projectSample(array $pairs): array
|
||||
{
|
||||
// Normalize eigenvectors by eig = eigVectors / eigValues
|
||||
$func = function ($eigVal, $eigVect) {
|
||||
@ -203,25 +229,4 @@ class KernelPCA extends PCA
|
||||
// return k.dot(eig)
|
||||
return Matrix::dot($pairs, $eig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms the given sample to a lower dimensional vector by using
|
||||
* the variables obtained during the last run of <code>fit</code>.
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function transform(array $sample) : array
|
||||
{
|
||||
if (!$this->fit) {
|
||||
throw new \Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
|
||||
}
|
||||
|
||||
if (is_array($sample[0])) {
|
||||
throw new \Exception('KernelPCA::transform() accepts only one-dimensional arrays');
|
||||
}
|
||||
|
||||
$pairs = $this->getDistancePairs($sample);
|
||||
|
||||
return $this->projectSample($pairs);
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\DimensionReduction;
|
||||
|
||||
use Exception;
|
||||
use Phpml\Math\Matrix;
|
||||
|
||||
class LDA extends EigenTransformerBase
|
||||
@ -16,22 +17,22 @@ class LDA extends EigenTransformerBase
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
public $labels;
|
||||
public $labels = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
public $means;
|
||||
public $means = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
public $counts;
|
||||
public $counts = [];
|
||||
|
||||
/**
|
||||
* @var float[]
|
||||
*/
|
||||
public $overallMean;
|
||||
public $overallMean = [];
|
||||
|
||||
/**
|
||||
* Linear Discriminant Analysis (LDA) is used to reduce the dimensionality
|
||||
@ -50,18 +51,21 @@ class LDA extends EigenTransformerBase
|
||||
public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
|
||||
{
|
||||
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
|
||||
throw new \Exception('Total variance can be a value between 0.1 and 0.99');
|
||||
throw new Exception('Total variance can be a value between 0.1 and 0.99');
|
||||
}
|
||||
|
||||
if ($numFeatures !== null && $numFeatures <= 0) {
|
||||
throw new \Exception('Number of features to be preserved should be greater than 0');
|
||||
throw new Exception('Number of features to be preserved should be greater than 0');
|
||||
}
|
||||
|
||||
if ($totalVariance !== null && $numFeatures !== null) {
|
||||
throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
|
||||
throw new Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
|
||||
}
|
||||
|
||||
if ($numFeatures !== null) {
|
||||
$this->numFeatures = $numFeatures;
|
||||
}
|
||||
|
||||
if ($totalVariance !== null) {
|
||||
$this->totalVariance = $totalVariance;
|
||||
}
|
||||
@ -70,7 +74,7 @@ class LDA extends EigenTransformerBase
|
||||
/**
|
||||
* Trains the algorithm to transform the given data to a lower dimensional space.
|
||||
*/
|
||||
public function fit(array $data, array $classes) : array
|
||||
public function fit(array $data, array $classes): array
|
||||
{
|
||||
$this->labels = $this->getLabels($classes);
|
||||
$this->means = $this->calculateMeans($data, $classes);
|
||||
@ -86,10 +90,29 @@ class LDA extends EigenTransformerBase
|
||||
return $this->reduce($data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms the given sample to a lower dimensional vector by using
|
||||
* the eigenVectors obtained in the last run of <code>fit</code>.
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function transform(array $sample): array
|
||||
{
|
||||
if (!$this->fit) {
|
||||
throw new Exception('LDA has not been fitted with respect to original dataset, please run LDA::fit() first');
|
||||
}
|
||||
|
||||
if (!is_array($sample[0])) {
|
||||
$sample = [$sample];
|
||||
}
|
||||
|
||||
return $this->reduce($sample);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns unique labels in the dataset
|
||||
*/
|
||||
protected function getLabels(array $classes) : array
|
||||
protected function getLabels(array $classes): array
|
||||
{
|
||||
$counts = array_count_values($classes);
|
||||
|
||||
@ -100,7 +123,7 @@ class LDA extends EigenTransformerBase
|
||||
* Calculates mean of each column for each class and returns
|
||||
* n by m matrix where n is number of labels and m is number of columns
|
||||
*/
|
||||
protected function calculateMeans(array $data, array $classes) : array
|
||||
protected function calculateMeans(array $data, array $classes): array
|
||||
{
|
||||
$means = [];
|
||||
$counts = [];
|
||||
@ -113,6 +136,7 @@ class LDA extends EigenTransformerBase
|
||||
if (!isset($means[$label][$col])) {
|
||||
$means[$label][$col] = 0.0;
|
||||
}
|
||||
|
||||
$means[$label][$col] += $val;
|
||||
$overallMean[$col] += $val;
|
||||
}
|
||||
@ -146,7 +170,7 @@ class LDA extends EigenTransformerBase
|
||||
* is a n by m matrix where n is number of classes and
|
||||
* m is number of columns
|
||||
*/
|
||||
protected function calculateClassVar(array $data, array $classes) : Matrix
|
||||
protected function calculateClassVar(array $data, array $classes): Matrix
|
||||
{
|
||||
// s is an n (number of classes) by m (number of column) matrix
|
||||
$s = array_fill(0, count($data[0]), array_fill(0, count($data[0]), 0));
|
||||
@ -169,7 +193,7 @@ class LDA extends EigenTransformerBase
|
||||
* is an n by m matrix where n is number of classes and
|
||||
* m is number of columns
|
||||
*/
|
||||
protected function calculateClassCov() : Matrix
|
||||
protected function calculateClassCov(): Matrix
|
||||
{
|
||||
// s is an n (number of classes) by m (number of column) matrix
|
||||
$s = array_fill(0, count($this->overallMean), array_fill(0, count($this->overallMean), 0));
|
||||
@ -187,7 +211,7 @@ class LDA extends EigenTransformerBase
|
||||
/**
|
||||
* Returns the result of the calculation (x - m)T.(x - m)
|
||||
*/
|
||||
protected function calculateVar(array $row, array $means) : Matrix
|
||||
protected function calculateVar(array $row, array $means): Matrix
|
||||
{
|
||||
$x = new Matrix($row, false);
|
||||
$m = new Matrix($means, false);
|
||||
@ -195,23 +219,4 @@ class LDA extends EigenTransformerBase
|
||||
|
||||
return $diff->transpose()->multiply($diff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms the given sample to a lower dimensional vector by using
|
||||
* the eigenVectors obtained in the last run of <code>fit</code>.
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function transform(array $sample) : array
|
||||
{
|
||||
if (!$this->fit) {
|
||||
throw new \Exception('LDA has not been fitted with respect to original dataset, please run LDA::fit() first');
|
||||
}
|
||||
|
||||
if (!is_array($sample[0])) {
|
||||
$sample = [$sample];
|
||||
}
|
||||
|
||||
return $this->reduce($sample);
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\DimensionReduction;
|
||||
|
||||
use Exception;
|
||||
use Phpml\Math\Statistic\Covariance;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
@ -35,18 +36,21 @@ class PCA extends EigenTransformerBase
|
||||
public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
|
||||
{
|
||||
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
|
||||
throw new \Exception('Total variance can be a value between 0.1 and 0.99');
|
||||
throw new Exception('Total variance can be a value between 0.1 and 0.99');
|
||||
}
|
||||
|
||||
if ($numFeatures !== null && $numFeatures <= 0) {
|
||||
throw new \Exception('Number of features to be preserved should be greater than 0');
|
||||
throw new Exception('Number of features to be preserved should be greater than 0');
|
||||
}
|
||||
|
||||
if ($totalVariance !== null && $numFeatures !== null) {
|
||||
throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
|
||||
throw new Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
|
||||
}
|
||||
|
||||
if ($numFeatures !== null) {
|
||||
$this->numFeatures = $numFeatures;
|
||||
}
|
||||
|
||||
if ($totalVariance !== null) {
|
||||
$this->totalVariance = $totalVariance;
|
||||
}
|
||||
@ -58,7 +62,7 @@ class PCA extends EigenTransformerBase
|
||||
* $data is an n-by-m matrix and returned array is
|
||||
* n-by-k matrix where k <= m
|
||||
*/
|
||||
public function fit(array $data) : array
|
||||
public function fit(array $data): array
|
||||
{
|
||||
$n = count($data[0]);
|
||||
|
||||
@ -73,6 +77,27 @@ class PCA extends EigenTransformerBase
|
||||
return $this->reduce($data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms the given sample to a lower dimensional vector by using
|
||||
* the eigenVectors obtained in the last run of <code>fit</code>.
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function transform(array $sample): array
|
||||
{
|
||||
if (!$this->fit) {
|
||||
throw new Exception('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
|
||||
}
|
||||
|
||||
if (!is_array($sample[0])) {
|
||||
$sample = [$sample];
|
||||
}
|
||||
|
||||
$sample = $this->normalize($sample, count($sample[0]));
|
||||
|
||||
return $this->reduce($sample);
|
||||
}
|
||||
|
||||
protected function calculateMeans(array $data, int $n): void
|
||||
{
|
||||
// Calculate means for each dimension
|
||||
@ -87,7 +112,7 @@ class PCA extends EigenTransformerBase
|
||||
* Normalization of the data includes subtracting mean from
|
||||
* each dimension therefore dimensions will be centered to zero
|
||||
*/
|
||||
protected function normalize(array $data, int $n) : array
|
||||
protected function normalize(array $data, int $n): array
|
||||
{
|
||||
if (empty($this->means)) {
|
||||
$this->calculateMeans($data, $n);
|
||||
@ -102,25 +127,4 @@ class PCA extends EigenTransformerBase
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms the given sample to a lower dimensional vector by using
|
||||
* the eigenVectors obtained in the last run of <code>fit</code>.
|
||||
*
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function transform(array $sample) : array
|
||||
{
|
||||
if (!$this->fit) {
|
||||
throw new \Exception('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
|
||||
}
|
||||
|
||||
if (!is_array($sample[0])) {
|
||||
$sample = [$sample];
|
||||
}
|
||||
|
||||
$sample = $this->normalize($sample, count($sample[0]));
|
||||
|
||||
return $this->reduce($sample);
|
||||
}
|
||||
}
|
||||
|
@ -4,9 +4,11 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class DatasetException extends \Exception
|
||||
use Exception;
|
||||
|
||||
class DatasetException extends Exception
|
||||
{
|
||||
public static function missingFolder(string $path) : DatasetException
|
||||
public static function missingFolder(string $path): self
|
||||
{
|
||||
return new self(sprintf('Dataset root folder "%s" missing.', $path));
|
||||
}
|
||||
|
@ -4,19 +4,21 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class FileException extends \Exception
|
||||
use Exception;
|
||||
|
||||
class FileException extends Exception
|
||||
{
|
||||
public static function missingFile(string $filepath) : FileException
|
||||
public static function missingFile(string $filepath): self
|
||||
{
|
||||
return new self(sprintf('File "%s" missing.', $filepath));
|
||||
}
|
||||
|
||||
public static function cantOpenFile(string $filepath) : FileException
|
||||
public static function cantOpenFile(string $filepath): self
|
||||
{
|
||||
return new self(sprintf('File "%s" can\'t be open.', $filepath));
|
||||
}
|
||||
|
||||
public static function cantSaveFile(string $filepath) : FileException
|
||||
public static function cantSaveFile(string $filepath): self
|
||||
{
|
||||
return new self(sprintf('File "%s" can\'t be saved.', $filepath));
|
||||
}
|
||||
|
@ -4,39 +4,41 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class InvalidArgumentException extends \Exception
|
||||
use Exception;
|
||||
|
||||
class InvalidArgumentException extends Exception
|
||||
{
|
||||
public static function arraySizeNotMatch() : InvalidArgumentException
|
||||
public static function arraySizeNotMatch(): self
|
||||
{
|
||||
return new self('Size of given arrays does not match');
|
||||
}
|
||||
|
||||
public static function percentNotInRange($name) : InvalidArgumentException
|
||||
public static function percentNotInRange($name): self
|
||||
{
|
||||
return new self(sprintf('%s must be between 0.0 and 1.0', $name));
|
||||
}
|
||||
|
||||
public static function arrayCantBeEmpty() : InvalidArgumentException
|
||||
public static function arrayCantBeEmpty(): self
|
||||
{
|
||||
return new self('The array has zero elements');
|
||||
}
|
||||
|
||||
public static function arraySizeToSmall(int $minimumSize = 2) : InvalidArgumentException
|
||||
public static function arraySizeToSmall(int $minimumSize = 2): self
|
||||
{
|
||||
return new self(sprintf('The array must have at least %d elements', $minimumSize));
|
||||
}
|
||||
|
||||
public static function matrixDimensionsDidNotMatch() : InvalidArgumentException
|
||||
public static function matrixDimensionsDidNotMatch(): self
|
||||
{
|
||||
return new self('Matrix dimensions did not match');
|
||||
}
|
||||
|
||||
public static function inconsistentMatrixSupplied() : InvalidArgumentException
|
||||
public static function inconsistentMatrixSupplied(): self
|
||||
{
|
||||
return new self('Inconsistent matrix supplied');
|
||||
}
|
||||
|
||||
public static function invalidClustersNumber() : InvalidArgumentException
|
||||
public static function invalidClustersNumber(): self
|
||||
{
|
||||
return new self('Invalid clusters number');
|
||||
}
|
||||
@ -44,57 +46,57 @@ class InvalidArgumentException extends \Exception
|
||||
/**
|
||||
* @param mixed $target
|
||||
*/
|
||||
public static function invalidTarget($target) : InvalidArgumentException
|
||||
public static function invalidTarget($target): self
|
||||
{
|
||||
return new self(sprintf('Target with value "%s" is not part of the accepted classes', $target));
|
||||
}
|
||||
|
||||
public static function invalidStopWordsLanguage(string $language) : InvalidArgumentException
|
||||
public static function invalidStopWordsLanguage(string $language): self
|
||||
{
|
||||
return new self(sprintf('Can\'t find "%s" language for StopWords', $language));
|
||||
}
|
||||
|
||||
public static function invalidLayerNodeClass() : InvalidArgumentException
|
||||
public static function invalidLayerNodeClass(): self
|
||||
{
|
||||
return new self('Layer node class must implement Node interface');
|
||||
}
|
||||
|
||||
public static function invalidLayersNumber() : InvalidArgumentException
|
||||
public static function invalidLayersNumber(): self
|
||||
{
|
||||
return new self('Provide at least 1 hidden layer');
|
||||
}
|
||||
|
||||
public static function invalidClassesNumber() : InvalidArgumentException
|
||||
public static function invalidClassesNumber(): self
|
||||
{
|
||||
return new self('Provide at least 2 different classes');
|
||||
}
|
||||
|
||||
public static function inconsistentClasses() : InvalidArgumentException
|
||||
public static function inconsistentClasses(): self
|
||||
{
|
||||
return new self('The provided classes don\'t match the classes provided in the constructor');
|
||||
}
|
||||
|
||||
public static function fileNotFound(string $file) : InvalidArgumentException
|
||||
public static function fileNotFound(string $file): self
|
||||
{
|
||||
return new self(sprintf('File "%s" not found', $file));
|
||||
}
|
||||
|
||||
public static function fileNotExecutable(string $file) : InvalidArgumentException
|
||||
public static function fileNotExecutable(string $file): self
|
||||
{
|
||||
return new self(sprintf('File "%s" is not executable', $file));
|
||||
}
|
||||
|
||||
public static function pathNotFound(string $path) : InvalidArgumentException
|
||||
public static function pathNotFound(string $path): self
|
||||
{
|
||||
return new self(sprintf('The specified path "%s" does not exist', $path));
|
||||
}
|
||||
|
||||
public static function pathNotWritable(string $path) : InvalidArgumentException
|
||||
public static function pathNotWritable(string $path): self
|
||||
{
|
||||
return new self(sprintf('The specified path "%s" is not writable', $path));
|
||||
}
|
||||
|
||||
public static function invalidOperator(string $operator) : InvalidArgumentException
|
||||
public static function invalidOperator(string $operator): self
|
||||
{
|
||||
return new self(sprintf('Invalid operator "%s" provided', $operator));
|
||||
}
|
||||
|
@ -4,19 +4,21 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class MatrixException extends \Exception
|
||||
use Exception;
|
||||
|
||||
class MatrixException extends Exception
|
||||
{
|
||||
public static function notSquareMatrix() : MatrixException
|
||||
public static function notSquareMatrix(): self
|
||||
{
|
||||
return new self('Matrix is not square matrix');
|
||||
}
|
||||
|
||||
public static function columnOutOfRange() : MatrixException
|
||||
public static function columnOutOfRange(): self
|
||||
{
|
||||
return new self('Column out of range');
|
||||
}
|
||||
|
||||
public static function singularMatrix() : MatrixException
|
||||
public static function singularMatrix(): self
|
||||
{
|
||||
return new self('Matrix is singular');
|
||||
}
|
||||
|
@ -4,9 +4,11 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class NormalizerException extends \Exception
|
||||
use Exception;
|
||||
|
||||
class NormalizerException extends Exception
|
||||
{
|
||||
public static function unknownNorm() : NormalizerException
|
||||
public static function unknownNorm(): self
|
||||
{
|
||||
return new self('Unknown norm supplied.');
|
||||
}
|
||||
|
@ -4,14 +4,16 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class SerializeException extends \Exception
|
||||
use Exception;
|
||||
|
||||
class SerializeException extends Exception
|
||||
{
|
||||
public static function cantUnserialize(string $filepath) : SerializeException
|
||||
public static function cantUnserialize(string $filepath): self
|
||||
{
|
||||
return new self(sprintf('"%s" can not be unserialized.', $filepath));
|
||||
}
|
||||
|
||||
public static function cantSerialize(string $classname) : SerializeException
|
||||
public static function cantSerialize(string $classname): self
|
||||
{
|
||||
return new self(sprintf('Class "%s" can not be serialized.', $classname));
|
||||
}
|
||||
|
@ -11,19 +11,19 @@ class StopWords
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $stopWords;
|
||||
protected $stopWords = [];
|
||||
|
||||
public function __construct(array $stopWords)
|
||||
{
|
||||
$this->stopWords = array_fill_keys($stopWords, true);
|
||||
}
|
||||
|
||||
public function isStopWord(string $token) : bool
|
||||
public function isStopWord(string $token): bool
|
||||
{
|
||||
return isset($this->stopWords[$token]);
|
||||
}
|
||||
|
||||
public static function factory(string $language = 'English') : StopWords
|
||||
public static function factory(string $language = 'English'): self
|
||||
{
|
||||
$className = __NAMESPACE__."\\StopWords\\$language";
|
||||
|
||||
|
@ -11,7 +11,7 @@ class TfIdfTransformer implements Transformer
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $idf;
|
||||
private $idf = [];
|
||||
|
||||
public function __construct(?array $samples = null)
|
||||
{
|
||||
|
@ -27,21 +27,18 @@ class TokenCountVectorizer implements Transformer
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $vocabulary;
|
||||
private $vocabulary = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $frequencies;
|
||||
private $frequencies = [];
|
||||
|
||||
public function __construct(Tokenizer $tokenizer, ?StopWords $stopWords = null, float $minDF = 0.0)
|
||||
{
|
||||
$this->tokenizer = $tokenizer;
|
||||
$this->stopWords = $stopWords;
|
||||
$this->minDF = $minDF;
|
||||
|
||||
$this->vocabulary = [];
|
||||
$this->frequencies = [];
|
||||
}
|
||||
|
||||
public function fit(array $samples): void
|
||||
@ -58,7 +55,7 @@ class TokenCountVectorizer implements Transformer
|
||||
$this->checkDocumentFrequency($samples);
|
||||
}
|
||||
|
||||
public function getVocabulary() : array
|
||||
public function getVocabulary(): array
|
||||
{
|
||||
return array_flip($this->vocabulary);
|
||||
}
|
||||
@ -80,7 +77,7 @@ class TokenCountVectorizer implements Transformer
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
$index = $this->getTokenIndex($token);
|
||||
if (false !== $index) {
|
||||
if ($index !== false) {
|
||||
$this->updateFrequency($token);
|
||||
if (!isset($counts[$index])) {
|
||||
$counts[$index] = 0;
|
||||
@ -155,7 +152,7 @@ class TokenCountVectorizer implements Transformer
|
||||
}
|
||||
}
|
||||
|
||||
private function getBeyondMinimumIndexes(int $samplesCount) : array
|
||||
private function getBeyondMinimumIndexes(int $samplesCount): array
|
||||
{
|
||||
$indexes = [];
|
||||
foreach ($this->frequencies as $token => $frequency) {
|
||||
|
@ -36,6 +36,18 @@ trait OneVsRest
|
||||
$this->trainBylabel($samples, $targets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers.
|
||||
*/
|
||||
public function reset(): void
|
||||
{
|
||||
$this->classifiers = [];
|
||||
$this->allLabels = [];
|
||||
$this->costValues = [];
|
||||
|
||||
$this->resetBinary();
|
||||
}
|
||||
|
||||
protected function trainByLabel(array $samples, array $targets, array $allLabels = []): void
|
||||
{
|
||||
// Overwrites the current value if it exist. $allLabels must be provided for each partialTrain run.
|
||||
@ -44,6 +56,7 @@ trait OneVsRest
|
||||
} else {
|
||||
$this->allLabels = array_keys(array_count_values($targets));
|
||||
}
|
||||
|
||||
sort($this->allLabels, SORT_STRING);
|
||||
|
||||
// If there are only two targets, then there is no need to perform OvR
|
||||
@ -77,18 +90,6 @@ trait OneVsRest
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers.
|
||||
*/
|
||||
public function reset(): void
|
||||
{
|
||||
$this->classifiers = [];
|
||||
$this->allLabels = [];
|
||||
$this->costValues = [];
|
||||
|
||||
$this->resetBinary();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instance of the current class after cleaning up OneVsRest stuff.
|
||||
*
|
||||
@ -105,29 +106,6 @@ trait OneVsRest
|
||||
return $classifier;
|
||||
}
|
||||
|
||||
/**
|
||||
* Groups all targets into two groups: Targets equal to
|
||||
* the given label and the others
|
||||
*
|
||||
* $targets is not passed by reference nor contains objects so this method
|
||||
* changes will not affect the caller $targets array.
|
||||
*
|
||||
* @param mixed $label
|
||||
*
|
||||
* @return array Binarized targets and target's labels
|
||||
*/
|
||||
private function binarizeTargets(array $targets, $label) : array
|
||||
{
|
||||
$notLabel = "not_$label";
|
||||
foreach ($targets as $key => $target) {
|
||||
$targets[$key] = $target == $label ? $label : $notLabel;
|
||||
}
|
||||
|
||||
$labels = [$label, $notLabel];
|
||||
|
||||
return [$targets, $labels];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
@ -155,8 +133,6 @@ trait OneVsRest
|
||||
|
||||
/**
|
||||
* To be overwritten by OneVsRest classifiers.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
abstract protected function resetBinary(): void;
|
||||
|
||||
@ -174,4 +150,27 @@ trait OneVsRest
|
||||
* @return mixed
|
||||
*/
|
||||
abstract protected function predictSampleBinary(array $sample);
|
||||
|
||||
/**
|
||||
* Groups all targets into two groups: Targets equal to
|
||||
* the given label and the others
|
||||
*
|
||||
* $targets is not passed by reference nor contains objects so this method
|
||||
* changes will not affect the caller $targets array.
|
||||
*
|
||||
* @param mixed $label
|
||||
*
|
||||
* @return array Binarized targets and target's labels
|
||||
*/
|
||||
private function binarizeTargets(array $targets, $label): array
|
||||
{
|
||||
$notLabel = "not_$label";
|
||||
foreach ($targets as $key => $target) {
|
||||
$targets[$key] = $target == $label ? $label : $notLabel;
|
||||
}
|
||||
|
||||
$labels = [$label, $notLabel];
|
||||
|
||||
return [$targets, $labels];
|
||||
}
|
||||
}
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Helper\Optimizer;
|
||||
|
||||
use Closure;
|
||||
|
||||
/**
|
||||
* Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
|
||||
* See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
|
||||
@ -17,7 +19,7 @@ namespace Phpml\Helper\Optimizer;
|
||||
*/
|
||||
class ConjugateGradient extends GD
|
||||
{
|
||||
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array
|
||||
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
|
||||
{
|
||||
$this->samples = $samples;
|
||||
$this->targets = $targets;
|
||||
@ -25,7 +27,7 @@ class ConjugateGradient extends GD
|
||||
$this->sampleCount = count($samples);
|
||||
$this->costValues = [];
|
||||
|
||||
$d = mp::muls($this->gradient($this->theta), -1);
|
||||
$d = MP::muls($this->gradient($this->theta), -1);
|
||||
|
||||
for ($i = 0; $i < $this->maxIterations; ++$i) {
|
||||
// Obtain α that minimizes f(θ + α.d)
|
||||
@ -59,7 +61,7 @@ class ConjugateGradient extends GD
|
||||
* Executes the callback function for the problem and returns
|
||||
* sum of the gradient for all samples & targets.
|
||||
*/
|
||||
protected function gradient(array $theta) : array
|
||||
protected function gradient(array $theta): array
|
||||
{
|
||||
[, $gradient] = parent::gradient($theta);
|
||||
|
||||
@ -69,7 +71,7 @@ class ConjugateGradient extends GD
|
||||
/**
|
||||
* Returns the value of f(x) for given solution
|
||||
*/
|
||||
protected function cost(array $theta) : float
|
||||
protected function cost(array $theta): float
|
||||
{
|
||||
[$cost] = parent::gradient($theta);
|
||||
|
||||
@ -90,14 +92,14 @@ class ConjugateGradient extends GD
|
||||
* b-1) If cost function decreases, continue enlarging alpha
|
||||
* b-2) If cost function increases, take the midpoint and try again
|
||||
*/
|
||||
protected function getAlpha(float $d) : float
|
||||
protected function getAlpha(float $d): float
|
||||
{
|
||||
$small = 0.0001 * $d;
|
||||
$large = 0.01 * $d;
|
||||
|
||||
// Obtain θ + α.d for two initial values, x0 and x1
|
||||
$x0 = mp::adds($this->theta, $small);
|
||||
$x1 = mp::adds($this->theta, $large);
|
||||
$x0 = MP::adds($this->theta, $small);
|
||||
$x1 = MP::adds($this->theta, $large);
|
||||
|
||||
$epsilon = 0.0001;
|
||||
$iteration = 0;
|
||||
@ -113,9 +115,9 @@ class ConjugateGradient extends GD
|
||||
|
||||
if ($fx1 < $fx0) {
|
||||
$x0 = $x1;
|
||||
$x1 = mp::adds($x1, 0.01); // Enlarge second
|
||||
$x1 = MP::adds($x1, 0.01); // Enlarge second
|
||||
} else {
|
||||
$x1 = mp::divs(mp::add($x1, $x0), 2.0);
|
||||
$x1 = MP::divs(MP::add($x1, $x0), 2.0);
|
||||
} // Get to the midpoint
|
||||
|
||||
$error = $fx1 / $this->dimensions;
|
||||
@ -135,7 +137,7 @@ class ConjugateGradient extends GD
|
||||
*
|
||||
* θ(k+1) = θ(k) + α.d
|
||||
*/
|
||||
protected function getNewTheta(float $alpha, array $d) : array
|
||||
protected function getNewTheta(float $alpha, array $d): array
|
||||
{
|
||||
$theta = $this->theta;
|
||||
|
||||
@ -164,7 +166,7 @@ class ConjugateGradient extends GD
|
||||
* See:
|
||||
* R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154.
|
||||
*/
|
||||
protected function getBeta(array $newTheta) : float
|
||||
protected function getBeta(array $newTheta): float
|
||||
{
|
||||
$dNew = array_sum($this->gradient($newTheta));
|
||||
$dOld = array_sum($this->gradient($this->theta)) + 1e-100;
|
||||
@ -177,11 +179,11 @@ class ConjugateGradient extends GD
|
||||
*
|
||||
* d(k+1) =–∇f(x(k+1)) + β(k).d(k)
|
||||
*/
|
||||
protected function getNewDirection(array $theta, float $beta, array $d) : array
|
||||
protected function getNewDirection(array $theta, float $beta, array $d): array
|
||||
{
|
||||
$grad = $this->gradient($theta);
|
||||
|
||||
return mp::add(mp::muls($grad, -1), mp::muls($d, $beta));
|
||||
return MP::add(MP::muls($grad, -1), MP::muls($d, $beta));
|
||||
}
|
||||
}
|
||||
|
||||
@ -189,12 +191,12 @@ class ConjugateGradient extends GD
|
||||
* Handles element-wise vector operations between vector-vector
|
||||
* and vector-scalar variables
|
||||
*/
|
||||
class mp
|
||||
class MP
|
||||
{
|
||||
/**
|
||||
* Element-wise <b>multiplication</b> of two vectors of the same size
|
||||
*/
|
||||
public static function mul(array $m1, array $m2) : array
|
||||
public static function mul(array $m1, array $m2): array
|
||||
{
|
||||
$res = [];
|
||||
foreach ($m1 as $i => $val) {
|
||||
@ -207,7 +209,7 @@ class mp
|
||||
/**
|
||||
* Element-wise <b>division</b> of two vectors of the same size
|
||||
*/
|
||||
public static function div(array $m1, array $m2) : array
|
||||
public static function div(array $m1, array $m2): array
|
||||
{
|
||||
$res = [];
|
||||
foreach ($m1 as $i => $val) {
|
||||
@ -220,7 +222,7 @@ class mp
|
||||
/**
|
||||
* Element-wise <b>addition</b> of two vectors of the same size
|
||||
*/
|
||||
public static function add(array $m1, array $m2, int $mag = 1) : array
|
||||
public static function add(array $m1, array $m2, int $mag = 1): array
|
||||
{
|
||||
$res = [];
|
||||
foreach ($m1 as $i => $val) {
|
||||
@ -233,7 +235,7 @@ class mp
|
||||
/**
|
||||
* Element-wise <b>subtraction</b> of two vectors of the same size
|
||||
*/
|
||||
public static function sub(array $m1, array $m2) : array
|
||||
public static function sub(array $m1, array $m2): array
|
||||
{
|
||||
return self::add($m1, $m2, -1);
|
||||
}
|
||||
@ -241,7 +243,7 @@ class mp
|
||||
/**
|
||||
* Element-wise <b>multiplication</b> of a vector with a scalar
|
||||
*/
|
||||
public static function muls(array $m1, float $m2) : array
|
||||
public static function muls(array $m1, float $m2): array
|
||||
{
|
||||
$res = [];
|
||||
foreach ($m1 as $val) {
|
||||
@ -254,7 +256,7 @@ class mp
|
||||
/**
|
||||
* Element-wise <b>division</b> of a vector with a scalar
|
||||
*/
|
||||
public static function divs(array $m1, float $m2) : array
|
||||
public static function divs(array $m1, float $m2): array
|
||||
{
|
||||
$res = [];
|
||||
foreach ($m1 as $val) {
|
||||
@ -267,7 +269,7 @@ class mp
|
||||
/**
|
||||
* Element-wise <b>addition</b> of a vector with a scalar
|
||||
*/
|
||||
public static function adds(array $m1, float $m2, int $mag = 1) : array
|
||||
public static function adds(array $m1, float $m2, int $mag = 1): array
|
||||
{
|
||||
$res = [];
|
||||
foreach ($m1 as $val) {
|
||||
@ -280,7 +282,7 @@ class mp
|
||||
/**
|
||||
* Element-wise <b>subtraction</b> of a vector with a scalar
|
||||
*/
|
||||
public static function subs(array $m1, float $m2) : array
|
||||
public static function subs(array $m1, float $m2): array
|
||||
{
|
||||
return self::adds($m1, $m2, -1);
|
||||
}
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Helper\Optimizer;
|
||||
|
||||
use Closure;
|
||||
|
||||
/**
|
||||
* Batch version of Gradient Descent to optimize the weights
|
||||
* of a classifier given samples, targets and the objective function to minimize
|
||||
@ -17,7 +19,7 @@ class GD extends StochasticGD
|
||||
*/
|
||||
protected $sampleCount = null;
|
||||
|
||||
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array
|
||||
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
|
||||
{
|
||||
$this->samples = $samples;
|
||||
$this->targets = $targets;
|
||||
@ -51,7 +53,7 @@ class GD extends StochasticGD
|
||||
* Calculates gradient, cost function and penalty term for each sample
|
||||
* then returns them as an array of values
|
||||
*/
|
||||
protected function gradient(array $theta) : array
|
||||
protected function gradient(array $theta): array
|
||||
{
|
||||
$costs = [];
|
||||
$gradient = [];
|
||||
|
@ -4,6 +4,9 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Helper\Optimizer;
|
||||
|
||||
use Closure;
|
||||
use Exception;
|
||||
|
||||
abstract class Optimizer
|
||||
{
|
||||
/**
|
||||
@ -11,7 +14,7 @@ abstract class Optimizer
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $theta;
|
||||
protected $theta = [];
|
||||
|
||||
/**
|
||||
* Number of dimensions
|
||||
@ -30,7 +33,7 @@ abstract class Optimizer
|
||||
// Inits the weights randomly
|
||||
$this->theta = [];
|
||||
for ($i = 0; $i < $this->dimensions; ++$i) {
|
||||
$this->theta[] = rand() / (float) getrandmax();
|
||||
$this->theta[] = random_int(0, getrandmax()) / (float) getrandmax();
|
||||
}
|
||||
}
|
||||
|
||||
@ -44,7 +47,7 @@ abstract class Optimizer
|
||||
public function setInitialTheta(array $theta)
|
||||
{
|
||||
if (count($theta) != $this->dimensions) {
|
||||
throw new \Exception("Number of values in the weights array should be $this->dimensions");
|
||||
throw new Exception("Number of values in the weights array should be $this->dimensions");
|
||||
}
|
||||
|
||||
$this->theta = $theta;
|
||||
@ -56,5 +59,5 @@ abstract class Optimizer
|
||||
* Executes the optimization with the given samples & targets
|
||||
* and returns the weights
|
||||
*/
|
||||
abstract public function runOptimization(array $samples, array $targets, \Closure $gradientCb);
|
||||
abstract public function runOptimization(array $samples, array $targets, Closure $gradientCb);
|
||||
}
|
||||
|
@ -4,6 +4,8 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Helper\Optimizer;
|
||||
|
||||
use Closure;
|
||||
|
||||
/**
|
||||
* Stochastic Gradient Descent optimization method
|
||||
* to find a solution for the equation A.ϴ = y where
|
||||
@ -66,6 +68,7 @@ class StochasticGD extends Optimizer
|
||||
* @var bool
|
||||
*/
|
||||
protected $enableEarlyStop = true;
|
||||
|
||||
/**
|
||||
* List of values obtained by evaluating the cost function at each iteration
|
||||
* of the algorithm
|
||||
@ -141,7 +144,7 @@ class StochasticGD extends Optimizer
|
||||
* The cost function to minimize and the gradient of the function are to be
|
||||
* handled by the callback function provided as the third parameter of the method.
|
||||
*/
|
||||
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array
|
||||
public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
|
||||
{
|
||||
$this->samples = $samples;
|
||||
$this->targets = $targets;
|
||||
@ -181,7 +184,16 @@ class StochasticGD extends Optimizer
|
||||
return $this->theta = $bestTheta;
|
||||
}
|
||||
|
||||
protected function updateTheta() : float
|
||||
/**
|
||||
* Returns the list of cost values for each iteration executed in
|
||||
* last run of the optimization
|
||||
*/
|
||||
public function getCostValues(): array
|
||||
{
|
||||
return $this->costValues;
|
||||
}
|
||||
|
||||
protected function updateTheta(): float
|
||||
{
|
||||
$jValue = 0.0;
|
||||
$theta = $this->theta;
|
||||
@ -237,15 +249,6 @@ class StochasticGD extends Optimizer
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the list of cost values for each iteration executed in
|
||||
* last run of the optimization
|
||||
*/
|
||||
public function getCostValues() : array
|
||||
{
|
||||
return $this->costValues;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the optimizer internal vars after the optimization process.
|
||||
*/
|
||||
|
@ -10,5 +10,5 @@ interface Distance
|
||||
* @param array $a
|
||||
* @param array $b
|
||||
*/
|
||||
public function distance(array $a, array $b) : float;
|
||||
public function distance(array $a, array $b): float;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ class Chebyshev implements Distance
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function distance(array $a, array $b) : float
|
||||
public function distance(array $a, array $b): float
|
||||
{
|
||||
if (count($a) !== count($b)) {
|
||||
throw InvalidArgumentException::arraySizeNotMatch();
|
||||
|
@ -12,7 +12,7 @@ class Euclidean implements Distance
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function distance(array $a, array $b) : float
|
||||
public function distance(array $a, array $b): float
|
||||
{
|
||||
if (count($a) !== count($b)) {
|
||||
throw InvalidArgumentException::arraySizeNotMatch();
|
||||
@ -30,7 +30,7 @@ class Euclidean implements Distance
|
||||
/**
|
||||
* Square of Euclidean distance
|
||||
*/
|
||||
public function sqDistance(array $a, array $b) : float
|
||||
public function sqDistance(array $a, array $b): float
|
||||
{
|
||||
return $this->distance($a, $b) ** 2;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ class Manhattan implements Distance
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function distance(array $a, array $b) : float
|
||||
public function distance(array $a, array $b): float
|
||||
{
|
||||
if (count($a) !== count($b)) {
|
||||
throw InvalidArgumentException::arraySizeNotMatch();
|
||||
|
@ -22,7 +22,7 @@ class Minkowski implements Distance
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function distance(array $a, array $b) : float
|
||||
public function distance(array $a, array $b): float
|
||||
{
|
||||
if (count($a) !== count($b)) {
|
||||
throw InvalidArgumentException::arraySizeNotMatch();
|
||||
|
@ -7,10 +7,10 @@ namespace Phpml\Math;
|
||||
interface Kernel
|
||||
{
|
||||
/**
|
||||
* @param float $a
|
||||
* @param float $b
|
||||
* @param float|array $a
|
||||
* @param float|array $b
|
||||
*
|
||||
* @return float
|
||||
* @return float|array
|
||||
*/
|
||||
public function compute($a, $b);
|
||||
}
|
||||
|
@ -23,12 +23,11 @@ class RBF implements Kernel
|
||||
* @param array $a
|
||||
* @param array $b
|
||||
*/
|
||||
public function compute($a, $b)
|
||||
public function compute($a, $b): float
|
||||
{
|
||||
$score = 2 * Product::scalar($a, $b);
|
||||
$squares = Product::scalar($a, $a) + Product::scalar($b, $b);
|
||||
$result = exp(-$this->gamma * ($squares - $score));
|
||||
|
||||
return $result;
|
||||
return exp(-$this->gamma * ($squares - $score));
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* Class to obtain eigenvalues and eigenvectors of a real matrix.
|
||||
*
|
||||
@ -54,6 +55,7 @@ class EigenvalueDecomposition
|
||||
* @var array
|
||||
*/
|
||||
private $d = [];
|
||||
|
||||
private $e = [];
|
||||
|
||||
/**
|
||||
@ -64,25 +66,26 @@ class EigenvalueDecomposition
|
||||
private $V = [];
|
||||
|
||||
/**
|
||||
* Array for internal storage of nonsymmetric Hessenberg form.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
* Array for internal storage of nonsymmetric Hessenberg form.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $H = [];
|
||||
|
||||
/**
|
||||
* Working storage for nonsymmetric algorithm.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $ort;
|
||||
* Working storage for nonsymmetric algorithm.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $ort = [];
|
||||
|
||||
/**
|
||||
* Used for complex scalar division.
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
* Used for complex scalar division.
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $cdivr;
|
||||
|
||||
private $cdivi;
|
||||
|
||||
/**
|
||||
@ -116,6 +119,71 @@ class EigenvalueDecomposition
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the eigenvector matrix
|
||||
*/
|
||||
public function getEigenvectors(): array
|
||||
{
|
||||
$vectors = $this->V;
|
||||
|
||||
// Always return the eigenvectors of length 1.0
|
||||
$vectors = new Matrix($vectors);
|
||||
$vectors = array_map(function ($vect) {
|
||||
$sum = 0;
|
||||
for ($i = 0; $i < count($vect); ++$i) {
|
||||
$sum += $vect[$i] ** 2;
|
||||
}
|
||||
|
||||
$sum = sqrt($sum);
|
||||
for ($i = 0; $i < count($vect); ++$i) {
|
||||
$vect[$i] /= $sum;
|
||||
}
|
||||
|
||||
return $vect;
|
||||
}, $vectors->transpose()->toArray());
|
||||
|
||||
return $vectors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the real parts of the eigenvalues<br>
|
||||
* d = real(diag(D));
|
||||
*/
|
||||
public function getRealEigenvalues(): array
|
||||
{
|
||||
return $this->d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the imaginary parts of the eigenvalues <br>
|
||||
* d = imag(diag(D))
|
||||
*/
|
||||
public function getImagEigenvalues(): array
|
||||
{
|
||||
return $this->e;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the block diagonal eigenvalue matrix
|
||||
*/
|
||||
public function getDiagonalEigenvalues(): array
|
||||
{
|
||||
$D = [];
|
||||
|
||||
for ($i = 0; $i < $this->n; ++$i) {
|
||||
$D[$i] = array_fill(0, $this->n, 0.0);
|
||||
$D[$i][$i] = $this->d[$i];
|
||||
if ($this->e[$i] == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$o = ($this->e[$i] > 0) ? $i + 1 : $i - 1;
|
||||
$D[$i][$o] = $this->e[$i];
|
||||
}
|
||||
|
||||
return $D;
|
||||
}
|
||||
|
||||
/**
|
||||
* Symmetric Householder reduction to tridiagonal form.
|
||||
*/
|
||||
@ -158,6 +226,7 @@ class EigenvalueDecomposition
|
||||
for ($j = 0; $j < $i; ++$j) {
|
||||
$this->e[$j] = 0.0;
|
||||
}
|
||||
|
||||
// Apply similarity transformation to remaining columns.
|
||||
for ($j = 0; $j < $i; ++$j) {
|
||||
$f = $this->d[$j];
|
||||
@ -168,6 +237,7 @@ class EigenvalueDecomposition
|
||||
$g += $this->V[$k][$j] * $this->d[$k];
|
||||
$this->e[$k] += $this->V[$k][$j] * $f;
|
||||
}
|
||||
|
||||
$this->e[$j] = $g;
|
||||
}
|
||||
|
||||
@ -185,16 +255,19 @@ class EigenvalueDecomposition
|
||||
for ($j = 0; $j < $i; ++$j) {
|
||||
$this->e[$j] -= $hh * $this->d[$j];
|
||||
}
|
||||
|
||||
for ($j = 0; $j < $i; ++$j) {
|
||||
$f = $this->d[$j];
|
||||
$g = $this->e[$j];
|
||||
for ($k = $j; $k <= $i_; ++$k) {
|
||||
$this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]);
|
||||
}
|
||||
|
||||
$this->d[$j] = $this->V[$i - 1][$j];
|
||||
$this->V[$i][$j] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
$this->d[$i] = $h;
|
||||
}
|
||||
|
||||
@ -207,16 +280,19 @@ class EigenvalueDecomposition
|
||||
for ($k = 0; $k <= $i; ++$k) {
|
||||
$this->d[$k] = $this->V[$k][$i + 1] / $h;
|
||||
}
|
||||
|
||||
for ($j = 0; $j <= $i; ++$j) {
|
||||
$g = 0.0;
|
||||
for ($k = 0; $k <= $i; ++$k) {
|
||||
$g += $this->V[$k][$i + 1] * $this->V[$k][$j];
|
||||
}
|
||||
|
||||
for ($k = 0; $k <= $i; ++$k) {
|
||||
$this->V[$k][$j] -= $g * $this->d[$k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ($k = 0; $k <= $i; ++$k) {
|
||||
$this->V[$k][$i + 1] = 0.0;
|
||||
}
|
||||
@ -241,6 +317,7 @@ class EigenvalueDecomposition
|
||||
for ($i = 1; $i < $this->n; ++$i) {
|
||||
$this->e[$i - 1] = $this->e[$i];
|
||||
}
|
||||
|
||||
$this->e[$this->n - 1] = 0.0;
|
||||
$f = 0.0;
|
||||
$tst1 = 0.0;
|
||||
@ -254,8 +331,10 @@ class EigenvalueDecomposition
|
||||
if (abs($this->e[$m]) <= $eps * $tst1) {
|
||||
break;
|
||||
}
|
||||
|
||||
++$m;
|
||||
}
|
||||
|
||||
// If m == l, $this->d[l] is an eigenvalue,
|
||||
// otherwise, iterate.
|
||||
if ($m > $l) {
|
||||
@ -270,6 +349,7 @@ class EigenvalueDecomposition
|
||||
if ($p < 0) {
|
||||
$r *= -1;
|
||||
}
|
||||
|
||||
$this->d[$l] = $this->e[$l] / ($p + $r);
|
||||
$this->d[$l + 1] = $this->e[$l] * ($p + $r);
|
||||
$dl1 = $this->d[$l + 1];
|
||||
@ -277,6 +357,7 @@ class EigenvalueDecomposition
|
||||
for ($i = $l + 2; $i < $this->n; ++$i) {
|
||||
$this->d[$i] -= $h;
|
||||
}
|
||||
|
||||
$f += $h;
|
||||
// Implicit QL transformation.
|
||||
$p = $this->d[$m];
|
||||
@ -303,12 +384,14 @@ class EigenvalueDecomposition
|
||||
$this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h;
|
||||
}
|
||||
}
|
||||
|
||||
$p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1;
|
||||
$this->e[$l] = $s * $p;
|
||||
$this->d[$l] = $c * $p;
|
||||
// Check for convergence.
|
||||
} while (abs($this->e[$l]) > $eps * $tst1);
|
||||
}
|
||||
|
||||
$this->d[$l] = $this->d[$l] + $f;
|
||||
$this->e[$l] = 0.0;
|
||||
}
|
||||
@ -323,6 +406,7 @@ class EigenvalueDecomposition
|
||||
$p = $this->d[$j];
|
||||
}
|
||||
}
|
||||
|
||||
if ($k != $i) {
|
||||
$this->d[$k] = $this->d[$i];
|
||||
$this->d[$i] = $p;
|
||||
@ -354,6 +438,7 @@ class EigenvalueDecomposition
|
||||
for ($i = $m; $i <= $high; ++$i) {
|
||||
$scale = $scale + abs($this->H[$i][$m - 1]);
|
||||
}
|
||||
|
||||
if ($scale != 0.0) {
|
||||
// Compute Householder transformation.
|
||||
$h = 0.0;
|
||||
@ -361,10 +446,12 @@ class EigenvalueDecomposition
|
||||
$this->ort[$i] = $this->H[$i][$m - 1] / $scale;
|
||||
$h += $this->ort[$i] * $this->ort[$i];
|
||||
}
|
||||
|
||||
$g = sqrt($h);
|
||||
if ($this->ort[$m] > 0) {
|
||||
$g *= -1;
|
||||
}
|
||||
|
||||
$h -= $this->ort[$m] * $g;
|
||||
$this->ort[$m] -= $g;
|
||||
// Apply Householder similarity transformation
|
||||
@ -374,21 +461,25 @@ class EigenvalueDecomposition
|
||||
for ($i = $high; $i >= $m; --$i) {
|
||||
$f += $this->ort[$i] * $this->H[$i][$j];
|
||||
}
|
||||
|
||||
$f /= $h;
|
||||
for ($i = $m; $i <= $high; ++$i) {
|
||||
$this->H[$i][$j] -= $f * $this->ort[$i];
|
||||
}
|
||||
}
|
||||
|
||||
for ($i = 0; $i <= $high; ++$i) {
|
||||
$f = 0.0;
|
||||
for ($j = $high; $j >= $m; --$j) {
|
||||
$f += $this->ort[$j] * $this->H[$i][$j];
|
||||
}
|
||||
|
||||
$f = $f / $h;
|
||||
for ($j = $m; $j <= $high; ++$j) {
|
||||
$this->H[$i][$j] -= $f * $this->ort[$j];
|
||||
}
|
||||
}
|
||||
|
||||
$this->ort[$m] = $scale * $this->ort[$m];
|
||||
$this->H[$m][$m - 1] = $scale * $g;
|
||||
}
|
||||
@ -400,16 +491,19 @@ class EigenvalueDecomposition
|
||||
$this->V[$i][$j] = ($i == $j ? 1.0 : 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
for ($m = $high - 1; $m >= $low + 1; --$m) {
|
||||
if ($this->H[$m][$m - 1] != 0.0) {
|
||||
for ($i = $m + 1; $i <= $high; ++$i) {
|
||||
$this->ort[$i] = $this->H[$i][$m - 1];
|
||||
}
|
||||
|
||||
for ($j = $m; $j <= $high; ++$j) {
|
||||
$g = 0.0;
|
||||
for ($i = $m; $i <= $high; ++$i) {
|
||||
$g += $this->ort[$i] * $this->V[$i][$j];
|
||||
}
|
||||
|
||||
// Double division avoids possible underflow
|
||||
$g = ($g / $this->ort[$m]) / $this->H[$m][$m - 1];
|
||||
for ($i = $m; $i <= $high; ++$i) {
|
||||
@ -469,6 +563,7 @@ class EigenvalueDecomposition
|
||||
$this->d[$i] = $this->H[$i][$i];
|
||||
$this->e[$i] = 0.0;
|
||||
}
|
||||
|
||||
for ($j = max($i - 1, 0); $j < $nn; ++$j) {
|
||||
$norm = $norm + abs($this->H[$i][$j]);
|
||||
}
|
||||
@ -484,11 +579,14 @@ class EigenvalueDecomposition
|
||||
if ($s == 0.0) {
|
||||
$s = $norm;
|
||||
}
|
||||
|
||||
if (abs($this->H[$l][$l - 1]) < $eps * $s) {
|
||||
break;
|
||||
}
|
||||
|
||||
--$l;
|
||||
}
|
||||
|
||||
// Check for convergence
|
||||
// One root found
|
||||
if ($l == $n) {
|
||||
@ -513,11 +611,13 @@ class EigenvalueDecomposition
|
||||
} else {
|
||||
$z = $p - $z;
|
||||
}
|
||||
|
||||
$this->d[$n - 1] = $x + $z;
|
||||
$this->d[$n] = $this->d[$n - 1];
|
||||
if ($z != 0.0) {
|
||||
$this->d[$n] = $x - $w / $z;
|
||||
}
|
||||
|
||||
$this->e[$n - 1] = 0.0;
|
||||
$this->e[$n] = 0.0;
|
||||
$x = $this->H[$n][$n - 1];
|
||||
@ -533,18 +633,21 @@ class EigenvalueDecomposition
|
||||
$this->H[$n - 1][$j] = $q * $z + $p * $this->H[$n][$j];
|
||||
$this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z;
|
||||
}
|
||||
|
||||
// Column modification
|
||||
for ($i = 0; $i <= $n; ++$i) {
|
||||
$z = $this->H[$i][$n - 1];
|
||||
$this->H[$i][$n - 1] = $q * $z + $p * $this->H[$i][$n];
|
||||
$this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z;
|
||||
}
|
||||
|
||||
// Accumulate transformations
|
||||
for ($i = $low; $i <= $high; ++$i) {
|
||||
$z = $this->V[$i][$n - 1];
|
||||
$this->V[$i][$n - 1] = $q * $z + $p * $this->V[$i][$n];
|
||||
$this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z;
|
||||
}
|
||||
|
||||
// Complex pair
|
||||
} else {
|
||||
$this->d[$n - 1] = $x + $p;
|
||||
@ -552,6 +655,7 @@ class EigenvalueDecomposition
|
||||
$this->e[$n - 1] = $z;
|
||||
$this->e[$n] = -$z;
|
||||
}
|
||||
|
||||
$n = $n - 2;
|
||||
$iter = 0;
|
||||
// No convergence yet
|
||||
@ -564,16 +668,19 @@ class EigenvalueDecomposition
|
||||
$y = $this->H[$n - 1][$n - 1];
|
||||
$w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n];
|
||||
}
|
||||
|
||||
// Wilkinson's original ad hoc shift
|
||||
if ($iter == 10) {
|
||||
$exshift += $x;
|
||||
for ($i = $low; $i <= $n; ++$i) {
|
||||
$this->H[$i][$i] -= $x;
|
||||
}
|
||||
|
||||
$s = abs($this->H[$n][$n - 1]) + abs($this->H[$n - 1][$n - 2]);
|
||||
$x = $y = 0.75 * $s;
|
||||
$w = -0.4375 * $s * $s;
|
||||
}
|
||||
|
||||
// MATLAB's new ad hoc shift
|
||||
if ($iter == 30) {
|
||||
$s = ($y - $x) / 2.0;
|
||||
@ -583,14 +690,17 @@ class EigenvalueDecomposition
|
||||
if ($y < $x) {
|
||||
$s = -$s;
|
||||
}
|
||||
|
||||
$s = $x - $w / (($y - $x) / 2.0 + $s);
|
||||
for ($i = $low; $i <= $n; ++$i) {
|
||||
$this->H[$i][$i] -= $s;
|
||||
}
|
||||
|
||||
$exshift += $s;
|
||||
$x = $y = $w = 0.964;
|
||||
}
|
||||
}
|
||||
|
||||
// Could check iteration count here.
|
||||
$iter = $iter + 1;
|
||||
// Look for two consecutive small sub-diagonal elements
|
||||
@ -609,18 +719,22 @@ class EigenvalueDecomposition
|
||||
if ($m == $l) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (abs($this->H[$m][$m - 1]) * (abs($q) + abs($r)) <
|
||||
$eps * (abs($p) * (abs($this->H[$m - 1][$m - 1]) + abs($z) + abs($this->H[$m + 1][$m + 1])))) {
|
||||
break;
|
||||
}
|
||||
|
||||
--$m;
|
||||
}
|
||||
|
||||
for ($i = $m + 2; $i <= $n; ++$i) {
|
||||
$this->H[$i][$i - 2] = 0.0;
|
||||
if ($i > $m + 2) {
|
||||
$this->H[$i][$i - 3] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
// Double QR step involving rows l:n and columns m:n
|
||||
for ($k = $m; $k <= $n - 1; ++$k) {
|
||||
$notlast = ($k != $n - 1);
|
||||
@ -635,19 +749,23 @@ class EigenvalueDecomposition
|
||||
$r = $r / $x;
|
||||
}
|
||||
}
|
||||
|
||||
if ($x == 0.0) {
|
||||
break;
|
||||
}
|
||||
|
||||
$s = sqrt($p * $p + $q * $q + $r * $r);
|
||||
if ($p < 0) {
|
||||
$s = -$s;
|
||||
}
|
||||
|
||||
if ($s != 0) {
|
||||
if ($k != $m) {
|
||||
$this->H[$k][$k - 1] = -$s * $x;
|
||||
} elseif ($l != $m) {
|
||||
$this->H[$k][$k - 1] = -$this->H[$k][$k - 1];
|
||||
}
|
||||
|
||||
$p = $p + $s;
|
||||
$x = $p / $s;
|
||||
$y = $q / $s;
|
||||
@ -661,9 +779,11 @@ class EigenvalueDecomposition
|
||||
$p = $p + $r * $this->H[$k + 2][$j];
|
||||
$this->H[$k + 2][$j] = $this->H[$k + 2][$j] - $p * $z;
|
||||
}
|
||||
|
||||
$this->H[$k][$j] = $this->H[$k][$j] - $p * $x;
|
||||
$this->H[$k + 1][$j] = $this->H[$k + 1][$j] - $p * $y;
|
||||
}
|
||||
|
||||
// Column modification
|
||||
for ($i = 0; $i <= min($n, $k + 3); ++$i) {
|
||||
$p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k + 1];
|
||||
@ -671,9 +791,11 @@ class EigenvalueDecomposition
|
||||
$p = $p + $z * $this->H[$i][$k + 2];
|
||||
$this->H[$i][$k + 2] = $this->H[$i][$k + 2] - $p * $r;
|
||||
}
|
||||
|
||||
$this->H[$i][$k] = $this->H[$i][$k] - $p;
|
||||
$this->H[$i][$k + 1] = $this->H[$i][$k + 1] - $p * $q;
|
||||
}
|
||||
|
||||
// Accumulate transformations
|
||||
for ($i = $low; $i <= $high; ++$i) {
|
||||
$p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k + 1];
|
||||
@ -681,6 +803,7 @@ class EigenvalueDecomposition
|
||||
$p = $p + $z * $this->V[$i][$k + 2];
|
||||
$this->V[$i][$k + 2] = $this->V[$i][$k + 2] - $p * $r;
|
||||
}
|
||||
|
||||
$this->V[$i][$k] = $this->V[$i][$k] - $p;
|
||||
$this->V[$i][$k + 1] = $this->V[$i][$k + 1] - $p * $q;
|
||||
}
|
||||
@ -719,6 +842,7 @@ class EigenvalueDecomposition
|
||||
} else {
|
||||
$this->H[$i][$n] = -$r / ($eps * $norm);
|
||||
}
|
||||
|
||||
// Solve real equations
|
||||
} else {
|
||||
$x = $this->H[$i][$i + 1];
|
||||
@ -732,6 +856,7 @@ class EigenvalueDecomposition
|
||||
$this->H[$i + 1][$n] = (-$s - $y * $t) / $z;
|
||||
}
|
||||
}
|
||||
|
||||
// Overflow control
|
||||
$t = abs($this->H[$i][$n]);
|
||||
if (($eps * $t) * $t > 1) {
|
||||
@ -741,6 +866,7 @@ class EigenvalueDecomposition
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Complex vector
|
||||
} elseif ($q < 0) {
|
||||
$l = $n - 1;
|
||||
@ -753,6 +879,7 @@ class EigenvalueDecomposition
|
||||
$this->H[$n - 1][$n - 1] = $this->cdivr;
|
||||
$this->H[$n - 1][$n] = $this->cdivi;
|
||||
}
|
||||
|
||||
$this->H[$n][$n - 1] = 0.0;
|
||||
$this->H[$n][$n] = 1.0;
|
||||
for ($i = $n - 2; $i >= 0; --$i) {
|
||||
@ -763,6 +890,7 @@ class EigenvalueDecomposition
|
||||
$ra = $ra + $this->H[$i][$j] * $this->H[$j][$n - 1];
|
||||
$sa = $sa + $this->H[$i][$j] * $this->H[$j][$n];
|
||||
}
|
||||
|
||||
$w = $this->H[$i][$i] - $p;
|
||||
if ($this->e[$i] < 0.0) {
|
||||
$z = $w;
|
||||
@ -783,6 +911,7 @@ class EigenvalueDecomposition
|
||||
if ($vr == 0.0 & $vi == 0.0) {
|
||||
$vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z));
|
||||
}
|
||||
|
||||
$this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi);
|
||||
$this->H[$i][$n - 1] = $this->cdivr;
|
||||
$this->H[$i][$n] = $this->cdivi;
|
||||
@ -795,6 +924,7 @@ class EigenvalueDecomposition
|
||||
$this->H[$i + 1][$n] = $this->cdivi;
|
||||
}
|
||||
}
|
||||
|
||||
// Overflow control
|
||||
$t = max(abs($this->H[$i][$n - 1]), abs($this->H[$i][$n]));
|
||||
if (($eps * $t) * $t > 1) {
|
||||
@ -824,81 +954,9 @@ class EigenvalueDecomposition
|
||||
for ($k = $low; $k <= min($j, $high); ++$k) {
|
||||
$z = $z + $this->V[$i][$k] * $this->H[$k][$j];
|
||||
}
|
||||
|
||||
$this->V[$i][$j] = $z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the eigenvector matrix
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getEigenvectors()
|
||||
{
|
||||
$vectors = $this->V;
|
||||
|
||||
// Always return the eigenvectors of length 1.0
|
||||
$vectors = new Matrix($vectors);
|
||||
$vectors = array_map(function ($vect) {
|
||||
$sum = 0;
|
||||
for ($i = 0; $i < count($vect); ++$i) {
|
||||
$sum += $vect[$i] ** 2;
|
||||
}
|
||||
|
||||
$sum = sqrt($sum);
|
||||
for ($i = 0; $i < count($vect); ++$i) {
|
||||
$vect[$i] /= $sum;
|
||||
}
|
||||
|
||||
return $vect;
|
||||
}, $vectors->transpose()->toArray());
|
||||
|
||||
return $vectors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the real parts of the eigenvalues<br>
|
||||
* d = real(diag(D));
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getRealEigenvalues()
|
||||
{
|
||||
return $this->d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the imaginary parts of the eigenvalues <br>
|
||||
* d = imag(diag(D))
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getImagEigenvalues()
|
||||
{
|
||||
return $this->e;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the block diagonal eigenvalue matrix
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getDiagonalEigenvalues()
|
||||
{
|
||||
$D = [];
|
||||
|
||||
for ($i = 0; $i < $this->n; ++$i) {
|
||||
$D[$i] = array_fill(0, $this->n, 0.0);
|
||||
$D[$i][$i] = $this->d[$i];
|
||||
if ($this->e[$i] == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$o = ($this->e[$i] > 0) ? $i + 1 : $i - 1;
|
||||
$D[$i][$o] = $this->e[$i];
|
||||
}
|
||||
|
||||
return $D;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* @package JAMA
|
||||
*
|
||||
@ -90,6 +91,7 @@ class LUDecomposition
|
||||
for ($i = 0; $i < $this->m; ++$i) {
|
||||
$this->piv[$i] = $i;
|
||||
}
|
||||
|
||||
$this->pivsign = 1;
|
||||
$LUcolj = [];
|
||||
|
||||
@ -99,6 +101,7 @@ class LUDecomposition
|
||||
for ($i = 0; $i < $this->m; ++$i) {
|
||||
$LUcolj[$i] = &$this->LU[$i][$j];
|
||||
}
|
||||
|
||||
// Apply previous transformations.
|
||||
for ($i = 0; $i < $this->m; ++$i) {
|
||||
$LUrowi = $this->LU[$i];
|
||||
@ -108,8 +111,10 @@ class LUDecomposition
|
||||
for ($k = 0; $k < $kmax; ++$k) {
|
||||
$s += $LUrowi[$k] * $LUcolj[$k];
|
||||
}
|
||||
|
||||
$LUrowi[$j] = $LUcolj[$i] -= $s;
|
||||
}
|
||||
|
||||
// Find pivot and exchange if necessary.
|
||||
$p = $j;
|
||||
for ($i = $j + 1; $i < $this->m; ++$i) {
|
||||
@ -117,17 +122,20 @@ class LUDecomposition
|
||||
$p = $i;
|
||||
}
|
||||
}
|
||||
|
||||
if ($p != $j) {
|
||||
for ($k = 0; $k < $this->n; ++$k) {
|
||||
$t = $this->LU[$p][$k];
|
||||
$this->LU[$p][$k] = $this->LU[$j][$k];
|
||||
$this->LU[$j][$k] = $t;
|
||||
}
|
||||
|
||||
$k = $this->piv[$p];
|
||||
$this->piv[$p] = $this->piv[$j];
|
||||
$this->piv[$j] = $k;
|
||||
$this->pivsign = $this->pivsign * -1;
|
||||
}
|
||||
|
||||
// Compute multipliers.
|
||||
if (($j < $this->m) && ($this->LU[$j][$j] != 0.0)) {
|
||||
for ($i = $j + 1; $i < $this->m; ++$i) {
|
||||
@ -142,7 +150,7 @@ class LUDecomposition
|
||||
*
|
||||
* @return Matrix Lower triangular factor
|
||||
*/
|
||||
public function getL() : Matrix
|
||||
public function getL(): Matrix
|
||||
{
|
||||
$L = [];
|
||||
for ($i = 0; $i < $this->m; ++$i) {
|
||||
@ -165,7 +173,7 @@ class LUDecomposition
|
||||
*
|
||||
* @return Matrix Upper triangular factor
|
||||
*/
|
||||
public function getU() : Matrix
|
||||
public function getU(): Matrix
|
||||
{
|
||||
$U = [];
|
||||
for ($i = 0; $i < $this->n; ++$i) {
|
||||
@ -186,7 +194,7 @@ class LUDecomposition
|
||||
*
|
||||
* @return array Pivot vector
|
||||
*/
|
||||
public function getPivot() : array
|
||||
public function getPivot(): array
|
||||
{
|
||||
return $this->piv;
|
||||
}
|
||||
@ -247,7 +255,7 @@ class LUDecomposition
|
||||
*
|
||||
* @throws MatrixException
|
||||
*/
|
||||
public function solve(Matrix $B) : array
|
||||
public function solve(Matrix $B): array
|
||||
{
|
||||
if ($B->getRows() != $this->m) {
|
||||
throw MatrixException::notSquareMatrix();
|
||||
@ -268,11 +276,13 @@ class LUDecomposition
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Solve U*X = Y;
|
||||
for ($k = $this->n - 1; $k >= 0; --$k) {
|
||||
for ($j = 0; $j < $nx; ++$j) {
|
||||
$X[$k][$j] /= $this->LU[$k][$k];
|
||||
}
|
||||
|
||||
for ($i = 0; $i < $k; ++$i) {
|
||||
for ($j = 0; $j < $nx; ++$j) {
|
||||
$X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k];
|
||||
@ -283,7 +293,7 @@ class LUDecomposition
|
||||
return $X;
|
||||
}
|
||||
|
||||
protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF) : array
|
||||
protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF): array
|
||||
{
|
||||
$m = count($RL);
|
||||
$n = $jF - $j0;
|
||||
|
@ -13,7 +13,7 @@ class Matrix
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $matrix;
|
||||
private $matrix = [];
|
||||
|
||||
/**
|
||||
* @var int
|
||||
@ -56,7 +56,7 @@ class Matrix
|
||||
$this->matrix = $matrix;
|
||||
}
|
||||
|
||||
public static function fromFlatArray(array $array) : Matrix
|
||||
public static function fromFlatArray(array $array): self
|
||||
{
|
||||
$matrix = [];
|
||||
foreach ($array as $value) {
|
||||
@ -66,12 +66,12 @@ class Matrix
|
||||
return new self($matrix);
|
||||
}
|
||||
|
||||
public function toArray() : array
|
||||
public function toArray(): array
|
||||
{
|
||||
return $this->matrix;
|
||||
}
|
||||
|
||||
public function toScalar() : float
|
||||
public function toScalar(): float
|
||||
{
|
||||
return $this->matrix[0][0];
|
||||
}
|
||||
@ -89,7 +89,7 @@ class Matrix
|
||||
/**
|
||||
* @throws MatrixException
|
||||
*/
|
||||
public function getColumnValues($column) : array
|
||||
public function getColumnValues($column): array
|
||||
{
|
||||
if ($column >= $this->columns) {
|
||||
throw MatrixException::columnOutOfRange();
|
||||
@ -123,7 +123,7 @@ class Matrix
|
||||
return $this->columns === $this->rows;
|
||||
}
|
||||
|
||||
public function transpose() : Matrix
|
||||
public function transpose(): self
|
||||
{
|
||||
if ($this->rows == 1) {
|
||||
$matrix = array_map(function ($el) {
|
||||
@ -136,7 +136,7 @@ class Matrix
|
||||
return new self($matrix, false);
|
||||
}
|
||||
|
||||
public function multiply(Matrix $matrix) : Matrix
|
||||
public function multiply(self $matrix): self
|
||||
{
|
||||
if ($this->columns != $matrix->getRows()) {
|
||||
throw InvalidArgumentException::inconsistentMatrixSupplied();
|
||||
@ -157,7 +157,7 @@ class Matrix
|
||||
return new self($product, false);
|
||||
}
|
||||
|
||||
public function divideByScalar($value) : Matrix
|
||||
public function divideByScalar($value): self
|
||||
{
|
||||
$newMatrix = [];
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
@ -169,7 +169,7 @@ class Matrix
|
||||
return new self($newMatrix, false);
|
||||
}
|
||||
|
||||
public function multiplyByScalar($value) : Matrix
|
||||
public function multiplyByScalar($value): self
|
||||
{
|
||||
$newMatrix = [];
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
@ -184,7 +184,7 @@ class Matrix
|
||||
/**
|
||||
* Element-wise addition of the matrix with another one
|
||||
*/
|
||||
public function add(Matrix $other) : Matrix
|
||||
public function add(self $other): self
|
||||
{
|
||||
return $this->_add($other);
|
||||
}
|
||||
@ -192,15 +192,74 @@ class Matrix
|
||||
/**
|
||||
* Element-wise subtracting of another matrix from this one
|
||||
*/
|
||||
public function subtract(Matrix $other) : Matrix
|
||||
public function subtract(self $other): self
|
||||
{
|
||||
return $this->_add($other, -1);
|
||||
}
|
||||
|
||||
public function inverse(): self
|
||||
{
|
||||
if (!$this->isSquare()) {
|
||||
throw MatrixException::notSquareMatrix();
|
||||
}
|
||||
|
||||
$LU = new LUDecomposition($this);
|
||||
$identity = $this->getIdentity();
|
||||
$inverse = $LU->solve($identity);
|
||||
|
||||
return new self($inverse, false);
|
||||
}
|
||||
|
||||
public function crossOut(int $row, int $column): self
|
||||
{
|
||||
$newMatrix = [];
|
||||
$r = 0;
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
$c = 0;
|
||||
if ($row != $i) {
|
||||
for ($j = 0; $j < $this->columns; ++$j) {
|
||||
if ($column != $j) {
|
||||
$newMatrix[$r][$c] = $this->matrix[$i][$j];
|
||||
++$c;
|
||||
}
|
||||
}
|
||||
|
||||
++$r;
|
||||
}
|
||||
}
|
||||
|
||||
return new self($newMatrix, false);
|
||||
}
|
||||
|
||||
public function isSingular(): bool
|
||||
{
|
||||
return $this->getDeterminant() == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the transpose of given array
|
||||
*/
|
||||
public static function transposeArray(array $array): array
|
||||
{
|
||||
return (new self($array, false))->transpose()->toArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dot product of two arrays<br>
|
||||
* Matrix::dot(x, y) ==> x.y'
|
||||
*/
|
||||
public static function dot(array $array1, array $array2): array
|
||||
{
|
||||
$m1 = new self($array1, false);
|
||||
$m2 = new self($array2, false);
|
||||
|
||||
return $m1->multiply($m2->transpose())->toArray()[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Element-wise addition or substraction depending on the given sign parameter
|
||||
*/
|
||||
protected function _add(Matrix $other, int $sign = 1) : Matrix
|
||||
protected function _add(self $other, int $sign = 1): self
|
||||
{
|
||||
$a1 = $this->toArray();
|
||||
$a2 = $other->toArray();
|
||||
@ -215,23 +274,10 @@ class Matrix
|
||||
return new self($newMatrix, false);
|
||||
}
|
||||
|
||||
public function inverse() : Matrix
|
||||
{
|
||||
if (!$this->isSquare()) {
|
||||
throw MatrixException::notSquareMatrix();
|
||||
}
|
||||
|
||||
$LU = new LUDecomposition($this);
|
||||
$identity = $this->getIdentity();
|
||||
$inverse = $LU->solve($identity);
|
||||
|
||||
return new self($inverse, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns diagonal identity matrix of the same size of this matrix
|
||||
*/
|
||||
protected function getIdentity() : Matrix
|
||||
protected function getIdentity(): self
|
||||
{
|
||||
$array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0));
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
@ -240,49 +286,4 @@ class Matrix
|
||||
|
||||
return new self($array, false);
|
||||
}
|
||||
|
||||
public function crossOut(int $row, int $column) : Matrix
|
||||
{
|
||||
$newMatrix = [];
|
||||
$r = 0;
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
$c = 0;
|
||||
if ($row != $i) {
|
||||
for ($j = 0; $j < $this->columns; ++$j) {
|
||||
if ($column != $j) {
|
||||
$newMatrix[$r][$c] = $this->matrix[$i][$j];
|
||||
++$c;
|
||||
}
|
||||
}
|
||||
++$r;
|
||||
}
|
||||
}
|
||||
|
||||
return new self($newMatrix, false);
|
||||
}
|
||||
|
||||
public function isSingular() : bool
|
||||
{
|
||||
return 0 == $this->getDeterminant();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the transpose of given array
|
||||
*/
|
||||
public static function transposeArray(array $array) : array
|
||||
{
|
||||
return (new self($array, false))->transpose()->toArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the dot product of two arrays<br>
|
||||
* Matrix::dot(x, y) ==> x.y'
|
||||
*/
|
||||
public static function dot(array $array1, array $array2) : array
|
||||
{
|
||||
$m1 = new self($array1, false);
|
||||
$m2 = new self($array2, false);
|
||||
|
||||
return $m1->multiply($m2->transpose())->toArray()[0];
|
||||
}
|
||||
}
|
||||
|
@ -4,12 +4,15 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Math;
|
||||
|
||||
class Set implements \IteratorAggregate
|
||||
use ArrayIterator;
|
||||
use IteratorAggregate;
|
||||
|
||||
class Set implements IteratorAggregate
|
||||
{
|
||||
/**
|
||||
* @var string[]|int[]|float[]
|
||||
*/
|
||||
private $elements;
|
||||
private $elements = [];
|
||||
|
||||
/**
|
||||
* @param string[]|int[]|float[] $elements
|
||||
@ -22,7 +25,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* Creates the union of A and B.
|
||||
*/
|
||||
public static function union(Set $a, Set $b) : Set
|
||||
public static function union(self $a, self $b): self
|
||||
{
|
||||
return new self(array_merge($a->toArray(), $b->toArray()));
|
||||
}
|
||||
@ -30,7 +33,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* Creates the intersection of A and B.
|
||||
*/
|
||||
public static function intersection(Set $a, Set $b) : Set
|
||||
public static function intersection(self $a, self $b): self
|
||||
{
|
||||
return new self(array_intersect($a->toArray(), $b->toArray()));
|
||||
}
|
||||
@ -38,7 +41,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* Creates the difference of A and B.
|
||||
*/
|
||||
public static function difference(Set $a, Set $b) : Set
|
||||
public static function difference(self $a, self $b): self
|
||||
{
|
||||
return new self(array_diff($a->toArray(), $b->toArray()));
|
||||
}
|
||||
@ -48,7 +51,7 @@ class Set implements \IteratorAggregate
|
||||
*
|
||||
* @return Set[]
|
||||
*/
|
||||
public static function cartesian(Set $a, Set $b) : array
|
||||
public static function cartesian(self $a, self $b): array
|
||||
{
|
||||
$cartesian = [];
|
||||
|
||||
@ -66,7 +69,7 @@ class Set implements \IteratorAggregate
|
||||
*
|
||||
* @return Set[]
|
||||
*/
|
||||
public static function power(Set $a) : array
|
||||
public static function power(self $a): array
|
||||
{
|
||||
$power = [new self()];
|
||||
|
||||
@ -79,24 +82,10 @@ class Set implements \IteratorAggregate
|
||||
return $power;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes duplicates and rewrites index.
|
||||
*
|
||||
* @param string[]|int[]|float[] $elements
|
||||
*
|
||||
* @return string[]|int[]|float[]
|
||||
*/
|
||||
private static function sanitize(array $elements) : array
|
||||
{
|
||||
sort($elements, SORT_ASC);
|
||||
|
||||
return array_values(array_unique($elements, SORT_ASC));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string|int|float $element
|
||||
*/
|
||||
public function add($element) : Set
|
||||
public function add($element): self
|
||||
{
|
||||
return $this->addAll([$element]);
|
||||
}
|
||||
@ -104,7 +93,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* @param string[]|int[]|float[] $elements
|
||||
*/
|
||||
public function addAll(array $elements) : Set
|
||||
public function addAll(array $elements): self
|
||||
{
|
||||
$this->elements = self::sanitize(array_merge($this->elements, $elements));
|
||||
|
||||
@ -114,7 +103,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* @param string|int|float $element
|
||||
*/
|
||||
public function remove($element) : Set
|
||||
public function remove($element): self
|
||||
{
|
||||
return $this->removeAll([$element]);
|
||||
}
|
||||
@ -122,7 +111,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* @param string[]|int[]|float[] $elements
|
||||
*/
|
||||
public function removeAll(array $elements) : Set
|
||||
public function removeAll(array $elements): self
|
||||
{
|
||||
$this->elements = self::sanitize(array_diff($this->elements, $elements));
|
||||
|
||||
@ -132,7 +121,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* @param string|int|float $element
|
||||
*/
|
||||
public function contains($element) : bool
|
||||
public function contains($element): bool
|
||||
{
|
||||
return $this->containsAll([$element]);
|
||||
}
|
||||
@ -140,7 +129,7 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* @param string[]|int[]|float[] $elements
|
||||
*/
|
||||
public function containsAll(array $elements) : bool
|
||||
public function containsAll(array $elements): bool
|
||||
{
|
||||
return !array_diff($elements, $this->elements);
|
||||
}
|
||||
@ -148,23 +137,37 @@ class Set implements \IteratorAggregate
|
||||
/**
|
||||
* @return string[]|int[]|float[]
|
||||
*/
|
||||
public function toArray() : array
|
||||
public function toArray(): array
|
||||
{
|
||||
return $this->elements;
|
||||
}
|
||||
|
||||
public function getIterator() : \ArrayIterator
|
||||
public function getIterator(): ArrayIterator
|
||||
{
|
||||
return new \ArrayIterator($this->elements);
|
||||
return new ArrayIterator($this->elements);
|
||||
}
|
||||
|
||||
public function isEmpty() : bool
|
||||
public function isEmpty(): bool
|
||||
{
|
||||
return $this->cardinality() == 0;
|
||||
}
|
||||
|
||||
public function cardinality() : int
|
||||
public function cardinality(): int
|
||||
{
|
||||
return count($this->elements);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes duplicates and rewrites index.
|
||||
*
|
||||
* @param string[]|int[]|float[] $elements
|
||||
*
|
||||
* @return string[]|int[]|float[]
|
||||
*/
|
||||
private static function sanitize(array $elements): array
|
||||
{
|
||||
sort($elements, SORT_ASC);
|
||||
|
||||
return array_values(array_unique($elements, SORT_ASC));
|
||||
}
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ class Correlation
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function pearson(array $x, array $y) : float
|
||||
public static function pearson(array $x, array $y): float
|
||||
{
|
||||
if (count($x) !== count($y)) {
|
||||
throw InvalidArgumentException::arraySizeNotMatch();
|
||||
|
@ -4,6 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Math\Statistic;
|
||||
|
||||
use Exception;
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
class Covariance
|
||||
@ -13,7 +14,7 @@ class Covariance
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null) : float
|
||||
public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
|
||||
{
|
||||
if (empty($x) || empty($y)) {
|
||||
throw InvalidArgumentException::arrayCantBeEmpty();
|
||||
@ -51,7 +52,7 @@ class Covariance
|
||||
* @throws InvalidArgumentException
|
||||
* @throws \Exception
|
||||
*/
|
||||
public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null) : float
|
||||
public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
|
||||
{
|
||||
if (empty($data)) {
|
||||
throw InvalidArgumentException::arrayCantBeEmpty();
|
||||
@ -63,7 +64,7 @@ class Covariance
|
||||
}
|
||||
|
||||
if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) {
|
||||
throw new \Exception('Given indices i and k do not match with the dimensionality of data');
|
||||
throw new Exception('Given indices i and k do not match with the dimensionality of data');
|
||||
}
|
||||
|
||||
if ($meanX === null || $meanY === null) {
|
||||
@ -92,10 +93,12 @@ class Covariance
|
||||
if ($index == $i) {
|
||||
$val[0] = $col - $meanX;
|
||||
}
|
||||
|
||||
if ($index == $k) {
|
||||
$val[1] = $col - $meanY;
|
||||
}
|
||||
}
|
||||
|
||||
$sum += $val[0] * $val[1];
|
||||
}
|
||||
}
|
||||
@ -112,7 +115,7 @@ class Covariance
|
||||
*
|
||||
* @param array|null $means
|
||||
*/
|
||||
public static function covarianceMatrix(array $data, ?array $means = null) : array
|
||||
public static function covarianceMatrix(array $data, ?array $means = null): array
|
||||
{
|
||||
$n = count($data[0]);
|
||||
|
||||
|
@ -41,7 +41,7 @@ class Gaussian
|
||||
* Returns probability density value of the given <i>$value</i> based on
|
||||
* given standard deviation and the mean
|
||||
*/
|
||||
public static function distributionPdf(float $mean, float $std, float $value) : float
|
||||
public static function distributionPdf(float $mean, float $std, float $value): float
|
||||
{
|
||||
$normal = new self($mean, $std);
|
||||
|
||||
|
@ -11,7 +11,7 @@ class Mean
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function arithmetic(array $numbers) : float
|
||||
public static function arithmetic(array $numbers): float
|
||||
{
|
||||
self::checkArrayLength($numbers);
|
||||
|
||||
@ -32,7 +32,7 @@ class Mean
|
||||
sort($numbers, SORT_NUMERIC);
|
||||
$median = $numbers[$middleIndex];
|
||||
|
||||
if (0 === $count % 2) {
|
||||
if ($count % 2 === 0) {
|
||||
$median = ($median + $numbers[$middleIndex - 1]) / 2;
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ class StandardDeviation
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function population(array $a, bool $sample = true) : float
|
||||
public static function population(array $a, bool $sample = true): float
|
||||
{
|
||||
if (empty($a)) {
|
||||
throw InvalidArgumentException::arrayCantBeEmpty();
|
||||
|
@ -51,27 +51,27 @@ class ClassificationReport
|
||||
$this->computeAverage();
|
||||
}
|
||||
|
||||
public function getPrecision() : array
|
||||
public function getPrecision(): array
|
||||
{
|
||||
return $this->precision;
|
||||
}
|
||||
|
||||
public function getRecall() : array
|
||||
public function getRecall(): array
|
||||
{
|
||||
return $this->recall;
|
||||
}
|
||||
|
||||
public function getF1score() : array
|
||||
public function getF1score(): array
|
||||
{
|
||||
return $this->f1score;
|
||||
}
|
||||
|
||||
public function getSupport() : array
|
||||
public function getSupport(): array
|
||||
{
|
||||
return $this->support;
|
||||
}
|
||||
|
||||
public function getAverage() : array
|
||||
public function getAverage(): array
|
||||
{
|
||||
return $this->average;
|
||||
}
|
||||
@ -93,6 +93,7 @@ class ClassificationReport
|
||||
$this->average[$metric] = 0.0;
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->average[$metric] = array_sum($values) / count($values);
|
||||
}
|
||||
}
|
||||
@ -102,7 +103,8 @@ class ClassificationReport
|
||||
*/
|
||||
private function computePrecision(int $truePositive, int $falsePositive)
|
||||
{
|
||||
if (0 == ($divider = $truePositive + $falsePositive)) {
|
||||
$divider = $truePositive + $falsePositive;
|
||||
if ($divider == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
@ -114,23 +116,25 @@ class ClassificationReport
|
||||
*/
|
||||
private function computeRecall(int $truePositive, int $falseNegative)
|
||||
{
|
||||
if (0 == ($divider = $truePositive + $falseNegative)) {
|
||||
$divider = $truePositive + $falseNegative;
|
||||
if ($divider == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return $truePositive / $divider;
|
||||
}
|
||||
|
||||
private function computeF1Score(float $precision, float $recall) : float
|
||||
private function computeF1Score(float $precision, float $recall): float
|
||||
{
|
||||
if (0 == ($divider = $precision + $recall)) {
|
||||
$divider = $precision + $recall;
|
||||
if ($divider == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
return 2.0 * (($precision * $recall) / $divider);
|
||||
}
|
||||
|
||||
private static function getLabelIndexedArray(array $actualLabels, array $predictedLabels) : array
|
||||
private static function getLabelIndexedArray(array $actualLabels, array $predictedLabels): array
|
||||
{
|
||||
$labels = array_values(array_unique(array_merge($actualLabels, $predictedLabels)));
|
||||
sort($labels);
|
||||
|
@ -6,7 +6,7 @@ namespace Phpml\Metric;
|
||||
|
||||
class ConfusionMatrix
|
||||
{
|
||||
public static function compute(array $actualLabels, array $predictedLabels, ?array $labels = null) : array
|
||||
public static function compute(array $actualLabels, array $predictedLabels, ?array $labels = null): array
|
||||
{
|
||||
$labels = $labels ? array_flip($labels) : self::getUniqueLabels($actualLabels);
|
||||
$matrix = self::generateMatrixWithZeros($labels);
|
||||
@ -31,7 +31,7 @@ class ConfusionMatrix
|
||||
return $matrix;
|
||||
}
|
||||
|
||||
private static function generateMatrixWithZeros(array $labels) : array
|
||||
private static function generateMatrixWithZeros(array $labels): array
|
||||
{
|
||||
$count = count($labels);
|
||||
$matrix = [];
|
||||
@ -43,7 +43,7 @@ class ConfusionMatrix
|
||||
return $matrix;
|
||||
}
|
||||
|
||||
private static function getUniqueLabels(array $labels) : array
|
||||
private static function getUniqueLabels(array $labels): array
|
||||
{
|
||||
$labels = array_values(array_unique($labels));
|
||||
sort($labels);
|
||||
|
@ -26,7 +26,7 @@ class ModelManager
|
||||
}
|
||||
}
|
||||
|
||||
public function restoreFromFile(string $filepath) : Estimator
|
||||
public function restoreFromFile(string $filepath): Estimator
|
||||
{
|
||||
if (!file_exists($filepath) || !is_readable($filepath)) {
|
||||
throw FileException::cantOpenFile(basename($filepath));
|
||||
|
@ -9,5 +9,5 @@ interface ActivationFunction
|
||||
/**
|
||||
* @param float|int $value
|
||||
*/
|
||||
public function compute($value) : float;
|
||||
public function compute($value): float;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ class BinaryStep implements ActivationFunction
|
||||
/**
|
||||
* @param float|int $value
|
||||
*/
|
||||
public function compute($value) : float
|
||||
public function compute($value): float
|
||||
{
|
||||
return $value >= 0 ? 1.0 : 0.0;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ class Gaussian implements ActivationFunction
|
||||
/**
|
||||
* @param float|int $value
|
||||
*/
|
||||
public function compute($value) : float
|
||||
public function compute($value): float
|
||||
{
|
||||
return exp(-pow($value, 2));
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ class HyperbolicTangent implements ActivationFunction
|
||||
/**
|
||||
* @param float|int $value
|
||||
*/
|
||||
public function compute($value) : float
|
||||
public function compute($value): float
|
||||
{
|
||||
return tanh($this->beta * $value);
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ class PReLU implements ActivationFunction
|
||||
/**
|
||||
* @param float|int $value
|
||||
*/
|
||||
public function compute($value) : float
|
||||
public function compute($value): float
|
||||
{
|
||||
return $value >= 0 ? $value : $this->beta * $value;
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ class Sigmoid implements ActivationFunction
|
||||
/**
|
||||
* @param float|int $value
|
||||
*/
|
||||
public function compute($value) : float
|
||||
public function compute($value): float
|
||||
{
|
||||
return 1 / (1 + exp(-$this->beta * $value));
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ class ThresholdedReLU implements ActivationFunction
|
||||
/**
|
||||
* @param float|int $value
|
||||
*/
|
||||
public function compute($value) : float
|
||||
public function compute($value): float
|
||||
{
|
||||
return $value > $this->theta ? $value : 0.0;
|
||||
}
|
||||
|
@ -28,20 +28,6 @@ class Layer
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ActivationFunction|null $activationFunction
|
||||
*
|
||||
* @return Neuron
|
||||
*/
|
||||
private function createNode(string $nodeClass, ?ActivationFunction $activationFunction = null)
|
||||
{
|
||||
if (Neuron::class == $nodeClass) {
|
||||
return new Neuron($activationFunction);
|
||||
}
|
||||
|
||||
return new $nodeClass();
|
||||
}
|
||||
|
||||
public function addNode(Node $node): void
|
||||
{
|
||||
$this->nodes[] = $node;
|
||||
@ -50,8 +36,20 @@ class Layer
|
||||
/**
|
||||
* @return Node[]
|
||||
*/
|
||||
public function getNodes() : array
|
||||
public function getNodes(): array
|
||||
{
|
||||
return $this->nodes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Neuron
|
||||
*/
|
||||
private function createNode(string $nodeClass, ?ActivationFunction $activationFunction = null): Node
|
||||
{
|
||||
if ($nodeClass == Neuron::class) {
|
||||
return new Neuron($activationFunction);
|
||||
}
|
||||
|
||||
return new $nodeClass();
|
||||
}
|
||||
}
|
||||
|
@ -8,20 +8,15 @@ interface Network
|
||||
{
|
||||
/**
|
||||
* @param mixed $input
|
||||
*
|
||||
* @return self
|
||||
*/
|
||||
public function setInput($input);
|
||||
public function setInput($input): self;
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getOutput() : array;
|
||||
public function getOutput(): array;
|
||||
|
||||
public function addLayer(Layer $layer);
|
||||
|
||||
/**
|
||||
* @return Layer[]
|
||||
*/
|
||||
public function getLayers() : array;
|
||||
public function getLayers(): array;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ abstract class LayeredNetwork implements Network
|
||||
/**
|
||||
* @var Layer[]
|
||||
*/
|
||||
protected $layers;
|
||||
protected $layers = [];
|
||||
|
||||
public function addLayer(Layer $layer): void
|
||||
{
|
||||
@ -24,7 +24,7 @@ abstract class LayeredNetwork implements Network
|
||||
/**
|
||||
* @return Layer[]
|
||||
*/
|
||||
public function getLayers() : array
|
||||
public function getLayers(): array
|
||||
{
|
||||
return $this->layers;
|
||||
}
|
||||
@ -39,7 +39,7 @@ abstract class LayeredNetwork implements Network
|
||||
return $this->layers[count($this->layers) - 1];
|
||||
}
|
||||
|
||||
public function getOutput() : array
|
||||
public function getOutput(): array
|
||||
{
|
||||
$result = [];
|
||||
foreach ($this->getOutputLayer()->getNodes() as $neuron) {
|
||||
@ -54,7 +54,7 @@ abstract class LayeredNetwork implements Network
|
||||
*
|
||||
* @return $this
|
||||
*/
|
||||
public function setInput($input)
|
||||
public function setInput($input): Network
|
||||
{
|
||||
$firstLayer = $this->layers[0];
|
||||
|
||||
|
@ -20,41 +20,36 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
|
||||
{
|
||||
use Predictable;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $inputLayerFeatures;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $hiddenLayers;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $classes = [];
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $iterations;
|
||||
|
||||
/**
|
||||
* @var ActivationFunction
|
||||
*/
|
||||
protected $activationFunction;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $learningRate;
|
||||
|
||||
/**
|
||||
* @var Backpropagation
|
||||
*/
|
||||
protected $backpropagation = null;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $inputLayerFeatures;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $hiddenLayers = [];
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $learningRate;
|
||||
|
||||
/**
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
@ -78,18 +73,6 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
|
||||
$this->initNetwork();
|
||||
}
|
||||
|
||||
private function initNetwork(): void
|
||||
{
|
||||
$this->addInputLayer($this->inputLayerFeatures);
|
||||
$this->addNeuronLayers($this->hiddenLayers, $this->activationFunction);
|
||||
$this->addNeuronLayers([count($this->classes)], $this->activationFunction);
|
||||
|
||||
$this->addBiasNodes();
|
||||
$this->generateSynapses();
|
||||
|
||||
$this->backpropagation = new Backpropagation($this->learningRate);
|
||||
}
|
||||
|
||||
public function train(array $samples, array $targets): void
|
||||
{
|
||||
$this->reset();
|
||||
@ -127,6 +110,18 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
|
||||
$this->removeLayers();
|
||||
}
|
||||
|
||||
private function initNetwork(): void
|
||||
{
|
||||
$this->addInputLayer($this->inputLayerFeatures);
|
||||
$this->addNeuronLayers($this->hiddenLayers, $this->activationFunction);
|
||||
$this->addNeuronLayers([count($this->classes)], $this->activationFunction);
|
||||
|
||||
$this->addBiasNodes();
|
||||
$this->generateSynapses();
|
||||
|
||||
$this->backpropagation = new Backpropagation($this->learningRate);
|
||||
}
|
||||
|
||||
private function addInputLayer(int $nodes): void
|
||||
{
|
||||
$this->addLayer(new Layer($nodes, Input::class));
|
||||
|
@ -6,5 +6,5 @@ namespace Phpml\NeuralNetwork;
|
||||
|
||||
interface Node
|
||||
{
|
||||
public function getOutput() : float;
|
||||
public function getOutput(): float;
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ use Phpml\NeuralNetwork\Node;
|
||||
|
||||
class Bias implements Node
|
||||
{
|
||||
public function getOutput() : float
|
||||
public function getOutput(): float
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ class Input implements Node
|
||||
$this->input = $input;
|
||||
}
|
||||
|
||||
public function getOutput() : float
|
||||
public function getOutput(): float
|
||||
{
|
||||
return $this->input;
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace Phpml\NeuralNetwork\Node;
|
||||
|
||||
use Phpml\NeuralNetwork\ActivationFunction;
|
||||
use Phpml\NeuralNetwork\ActivationFunction\Sigmoid;
|
||||
use Phpml\NeuralNetwork\Node;
|
||||
use Phpml\NeuralNetwork\Node\Neuron\Synapse;
|
||||
|
||||
@ -13,7 +14,7 @@ class Neuron implements Node
|
||||
/**
|
||||
* @var Synapse[]
|
||||
*/
|
||||
protected $synapses;
|
||||
protected $synapses = [];
|
||||
|
||||
/**
|
||||
* @var ActivationFunction
|
||||
@ -27,7 +28,7 @@ class Neuron implements Node
|
||||
|
||||
public function __construct(?ActivationFunction $activationFunction = null)
|
||||
{
|
||||
$this->activationFunction = $activationFunction ?: new ActivationFunction\Sigmoid();
|
||||
$this->activationFunction = $activationFunction ?: new Sigmoid();
|
||||
$this->synapses = [];
|
||||
$this->output = 0;
|
||||
}
|
||||
@ -45,9 +46,9 @@ class Neuron implements Node
|
||||
return $this->synapses;
|
||||
}
|
||||
|
||||
public function getOutput() : float
|
||||
public function getOutput(): float
|
||||
{
|
||||
if (0 === $this->output) {
|
||||
if ($this->output === 0) {
|
||||
$sum = 0;
|
||||
foreach ($this->synapses as $synapse) {
|
||||
$sum += $synapse->getOutput();
|
||||
|
@ -27,12 +27,7 @@ class Synapse
|
||||
$this->weight = $weight ?: $this->generateRandomWeight();
|
||||
}
|
||||
|
||||
protected function generateRandomWeight() : float
|
||||
{
|
||||
return 1 / random_int(5, 25) * (random_int(0, 1) ? -1 : 1);
|
||||
}
|
||||
|
||||
public function getOutput() : float
|
||||
public function getOutput(): float
|
||||
{
|
||||
return $this->weight * $this->node->getOutput();
|
||||
}
|
||||
@ -42,7 +37,7 @@ class Synapse
|
||||
$this->weight += $delta;
|
||||
}
|
||||
|
||||
public function getWeight() : float
|
||||
public function getWeight(): float
|
||||
{
|
||||
return $this->weight;
|
||||
}
|
||||
@ -51,4 +46,9 @@ class Synapse
|
||||
{
|
||||
return $this->node;
|
||||
}
|
||||
|
||||
protected function generateRandomWeight(): float
|
||||
{
|
||||
return 1 / random_int(5, 25) * (random_int(0, 1) ? -1 : 1);
|
||||
}
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ class Backpropagation
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$this->prevSigmas = $this->sigmas;
|
||||
}
|
||||
|
||||
@ -55,7 +56,7 @@ class Backpropagation
|
||||
$this->prevSigmas = null;
|
||||
}
|
||||
|
||||
private function getSigma(Neuron $neuron, int $targetClass, int $key, bool $lastLayer) : float
|
||||
private function getSigma(Neuron $neuron, int $targetClass, int $key, bool $lastLayer): float
|
||||
{
|
||||
$neuronOutput = $neuron->getOutput();
|
||||
$sigma = $neuronOutput * (1 - $neuronOutput);
|
||||
@ -65,6 +66,7 @@ class Backpropagation
|
||||
if ($targetClass === $key) {
|
||||
$value = 1;
|
||||
}
|
||||
|
||||
$sigma *= ($value - $neuronOutput);
|
||||
} else {
|
||||
$sigma *= $this->getPrevSigma($neuron);
|
||||
@ -75,7 +77,7 @@ class Backpropagation
|
||||
return $sigma;
|
||||
}
|
||||
|
||||
private function getPrevSigma(Neuron $neuron) : float
|
||||
private function getPrevSigma(Neuron $neuron): float
|
||||
{
|
||||
$sigma = 0.0;
|
||||
|
||||
|
@ -29,12 +29,12 @@ class Sigma
|
||||
return $this->neuron;
|
||||
}
|
||||
|
||||
public function getSigma() : float
|
||||
public function getSigma(): float
|
||||
{
|
||||
return $this->sigma;
|
||||
}
|
||||
|
||||
public function getSigmaForNeuron(Neuron $neuron) : float
|
||||
public function getSigmaForNeuron(Neuron $neuron): float
|
||||
{
|
||||
$sigma = 0.0;
|
||||
|
||||
|
@ -9,7 +9,7 @@ class Pipeline implements Estimator
|
||||
/**
|
||||
* @var array|Transformer[]
|
||||
*/
|
||||
private $transformers;
|
||||
private $transformers = [];
|
||||
|
||||
/**
|
||||
* @var Estimator
|
||||
@ -41,7 +41,7 @@ class Pipeline implements Estimator
|
||||
/**
|
||||
* @return array|Transformer[]
|
||||
*/
|
||||
public function getTransformers() : array
|
||||
public function getTransformers(): array
|
||||
{
|
||||
return $this->transformers;
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
|
||||
class Imputer implements Preprocessor
|
||||
{
|
||||
public const AXIS_COLUMN = 0;
|
||||
|
||||
public const AXIS_ROW = 1;
|
||||
|
||||
/**
|
||||
@ -64,9 +65,9 @@ class Imputer implements Preprocessor
|
||||
}
|
||||
}
|
||||
|
||||
private function getAxis(int $column, array $currentSample) : array
|
||||
private function getAxis(int $column, array $currentSample): array
|
||||
{
|
||||
if (self::AXIS_ROW === $this->axis) {
|
||||
if ($this->axis === self::AXIS_ROW) {
|
||||
return array_diff($currentSample, [$this->missingValue]);
|
||||
}
|
||||
|
||||
|
@ -9,7 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
class MeanStrategy implements Strategy
|
||||
{
|
||||
public function replaceValue(array $currentAxis) : float
|
||||
public function replaceValue(array $currentAxis): float
|
||||
{
|
||||
return Mean::arithmetic($currentAxis);
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
class MedianStrategy implements Strategy
|
||||
{
|
||||
public function replaceValue(array $currentAxis) : float
|
||||
public function replaceValue(array $currentAxis): float
|
||||
{
|
||||
return Mean::median($currentAxis);
|
||||
}
|
||||
|
@ -11,7 +11,9 @@ use Phpml\Math\Statistic\StandardDeviation;
|
||||
class Normalizer implements Preprocessor
|
||||
{
|
||||
public const NORM_L1 = 1;
|
||||
|
||||
public const NORM_L2 = 2;
|
||||
|
||||
public const NORM_STD = 3;
|
||||
|
||||
/**
|
||||
@ -27,12 +29,12 @@ class Normalizer implements Preprocessor
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $std;
|
||||
private $std = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $mean;
|
||||
private $mean = [];
|
||||
|
||||
/**
|
||||
* @throws NormalizerException
|
||||
@ -69,7 +71,7 @@ class Normalizer implements Preprocessor
|
||||
$methods = [
|
||||
self::NORM_L1 => 'normalizeL1',
|
||||
self::NORM_L2 => 'normalizeL2',
|
||||
self::NORM_STD => 'normalizeSTD'
|
||||
self::NORM_STD => 'normalizeSTD',
|
||||
];
|
||||
$method = $methods[$this->norm];
|
||||
|
||||
@ -87,7 +89,7 @@ class Normalizer implements Preprocessor
|
||||
$norm1 += abs($feature);
|
||||
}
|
||||
|
||||
if (0 == $norm1) {
|
||||
if ($norm1 == 0) {
|
||||
$count = count($sample);
|
||||
$sample = array_fill(0, $count, 1.0 / $count);
|
||||
} else {
|
||||
@ -103,9 +105,10 @@ class Normalizer implements Preprocessor
|
||||
foreach ($sample as $feature) {
|
||||
$norm2 += $feature * $feature;
|
||||
}
|
||||
|
||||
$norm2 = sqrt((float) $norm2);
|
||||
|
||||
if (0 == $norm2) {
|
||||
if ($norm2 == 0) {
|
||||
$sample = array_fill(0, count($sample), 1);
|
||||
} else {
|
||||
foreach ($sample as &$feature) {
|
||||
|
@ -28,7 +28,7 @@ class LeastSquares implements Regression
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $coefficients;
|
||||
private $coefficients = [];
|
||||
|
||||
public function train(array $samples, array $targets): void
|
||||
{
|
||||
@ -51,12 +51,12 @@ class LeastSquares implements Regression
|
||||
return $result;
|
||||
}
|
||||
|
||||
public function getCoefficients() : array
|
||||
public function getCoefficients(): array
|
||||
{
|
||||
return $this->coefficients;
|
||||
}
|
||||
|
||||
public function getIntercept() : float
|
||||
public function getIntercept(): float
|
||||
{
|
||||
return $this->intercept;
|
||||
}
|
||||
@ -79,7 +79,7 @@ class LeastSquares implements Regression
|
||||
/**
|
||||
* Add one dimension for intercept calculation.
|
||||
*/
|
||||
private function getSamplesMatrix() : Matrix
|
||||
private function getSamplesMatrix(): Matrix
|
||||
{
|
||||
$samples = [];
|
||||
foreach ($this->samples as $sample) {
|
||||
@ -90,7 +90,7 @@ class LeastSquares implements Regression
|
||||
return new Matrix($samples);
|
||||
}
|
||||
|
||||
private function getTargetsMatrix() : Matrix
|
||||
private function getTargetsMatrix(): Matrix
|
||||
{
|
||||
if (is_array($this->targets[0])) {
|
||||
return new Matrix($this->targets);
|
||||
|
@ -34,7 +34,7 @@ class DataTransformer
|
||||
return $set;
|
||||
}
|
||||
|
||||
public static function predictions(string $rawPredictions, array $labels) : array
|
||||
public static function predictions(string $rawPredictions, array $labels): array
|
||||
{
|
||||
$numericLabels = self::numericLabels($labels);
|
||||
$results = [];
|
||||
@ -47,7 +47,7 @@ class DataTransformer
|
||||
return $results;
|
||||
}
|
||||
|
||||
public static function numericLabels(array $labels) : array
|
||||
public static function numericLabels(array $labels): array
|
||||
{
|
||||
$numericLabels = [];
|
||||
foreach ($labels as $label) {
|
||||
|
@ -167,7 +167,7 @@ class SupportVectorMachine
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
* @return array|string
|
||||
*/
|
||||
public function predict(array $samples)
|
||||
{
|
||||
|
@ -6,5 +6,5 @@ namespace Phpml\Tokenization;
|
||||
|
||||
interface Tokenizer
|
||||
{
|
||||
public function tokenize(string $text) : array;
|
||||
public function tokenize(string $text): array;
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ namespace Phpml\Tokenization;
|
||||
|
||||
class WhitespaceTokenizer implements Tokenizer
|
||||
{
|
||||
public function tokenize(string $text) : array
|
||||
public function tokenize(string $text): array
|
||||
{
|
||||
return preg_split('/[\pZ\pC]+/u', $text, -1, PREG_SPLIT_NO_EMPTY);
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ namespace Phpml\Tokenization;
|
||||
|
||||
class WordTokenizer implements Tokenizer
|
||||
{
|
||||
public function tokenize(string $text) : array
|
||||
public function tokenize(string $text): array
|
||||
{
|
||||
$tokens = [];
|
||||
preg_match_all('/\w\w+/u', $text, $tokens);
|
||||
|
@ -7,6 +7,7 @@ namespace tests\Phpml\Classification;
|
||||
use Phpml\Association\Apriori;
|
||||
use Phpml\ModelManager;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use ReflectionClass;
|
||||
|
||||
class AprioriTest extends TestCase
|
||||
{
|
||||
@ -172,7 +173,6 @@ class AprioriTest extends TestCase
|
||||
/**
|
||||
* Invokes objects method. Private/protected will be set accessible.
|
||||
*
|
||||
* @param object &$object Instantiated object to be called on
|
||||
* @param string $method Method name to be called
|
||||
* @param array $params Array of params to be passed
|
||||
*
|
||||
@ -180,7 +180,7 @@ class AprioriTest extends TestCase
|
||||
*/
|
||||
public function invoke(&$object, $method, array $params = [])
|
||||
{
|
||||
$reflection = new \ReflectionClass(get_class($object));
|
||||
$reflection = new ReflectionClass(get_class($object));
|
||||
$method = $reflection->getMethod($method);
|
||||
$method->setAccessible(true);
|
||||
|
||||
@ -195,7 +195,7 @@ class AprioriTest extends TestCase
|
||||
$testSamples = [['alpha', 'epsilon'], ['beta', 'theta']];
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'apriori-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filename = 'apriori-test-'.random_int(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
@ -24,7 +24,7 @@ class DecisionTreeTest extends TestCase
|
||||
['sunny', 75, 70, 'true', 'Play'],
|
||||
['overcast', 72, 90, 'true', 'Play'],
|
||||
['overcast', 81, 75, 'false', 'Play'],
|
||||
['rain', 71, 80, 'true', 'Dont_play']
|
||||
['rain', 71, 80, 'true', 'Dont_play'],
|
||||
];
|
||||
|
||||
private $extraData = [
|
||||
@ -32,16 +32,6 @@ class DecisionTreeTest extends TestCase
|
||||
['scorching', 100, 93, 'true', 'Dont_play'],
|
||||
];
|
||||
|
||||
private function getData($input)
|
||||
{
|
||||
$targets = array_column($input, 4);
|
||||
array_walk($input, function (&$v): void {
|
||||
array_splice($v, 4, 1);
|
||||
});
|
||||
|
||||
return [$input, $targets];
|
||||
}
|
||||
|
||||
public function testPredictSingleSample()
|
||||
{
|
||||
[$data, $targets] = $this->getData($this->data);
|
||||
@ -68,7 +58,7 @@ class DecisionTreeTest extends TestCase
|
||||
$testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']];
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'decision-tree-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filename = 'decision-tree-test-'.random_int(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
@ -83,6 +73,16 @@ class DecisionTreeTest extends TestCase
|
||||
[$data, $targets] = $this->getData($this->data);
|
||||
$classifier = new DecisionTree(5);
|
||||
$classifier->train($data, $targets);
|
||||
$this->assertTrue(5 >= $classifier->actualDepth);
|
||||
$this->assertTrue($classifier->actualDepth <= 5);
|
||||
}
|
||||
|
||||
private function getData($input)
|
||||
{
|
||||
$targets = array_column($input, 4);
|
||||
array_walk($input, function (&$v): void {
|
||||
array_splice($v, 4, 1);
|
||||
});
|
||||
|
||||
return [$input, $targets];
|
||||
}
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user