Added EasyCodingStandard + lots of code fixes (#156)

* travis: move coveralls here, decouple from package

* composer: use PSR4

* phpunit: simpler config

* travis: add ecs run

* composer: add ecs dev

* use standard vendor/bin directory for dependency bins, confuses with local bins and require gitignore handling

* ecs: add PSR2

* [cs] PSR2 spacing fixes

* [cs] PSR2 class name fix

* [cs] PHP7 fixes - return semicolon spaces, old rand functions, typehints

* [cs] fix less strict typehints

* fix typehints to make tests pass

* ecs: ignore typehint-less elements

* [cs] standardize arrays

* [cs] standardize docblock, remove unused comments

* [cs] use self where possible

* [cs] sort class elements, from public to private

* [cs] do not use yoda (found less yoda-cases, than non-yoda)

* space

* [cs] do not assign in condition

* [cs] use namespace imports if possible

* [cs] use ::class over strings

* [cs] fix defaults for arrays properties, properties and constants single spacing

* cleanup ecs comments

* [cs] use item per line in multi-items array

* missing line

* misc

* rebase
This commit is contained in:
Tomáš Votruba 2017-11-22 22:16:10 +01:00 committed by Arkadiusz Kondas
parent b1d40bfa30
commit 726cf4cddf
139 changed files with 3080 additions and 1514 deletions

4
.gitignore vendored
View File

@ -1,8 +1,4 @@
/vendor/ /vendor/
humbuglog.* humbuglog.*
/bin/phpunit
.coverage
.php_cs.cache .php_cs.cache
/bin/php-cs-fixer
/bin/coveralls
/build /build

View File

@ -6,7 +6,7 @@ matrix:
include: include:
- os: linux - os: linux
php: '7.1' php: '7.1'
env: DISABLE_XDEBUG="true" env: DISABLE_XDEBUG="true" STATIC_ANALYSIS="true"
- os: linux - os: linux
php: '7.2' php: '7.2'
@ -21,7 +21,7 @@ matrix:
before_install: before_install:
- if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/prepare_osx_env.sh ; fi - if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/prepare_osx_env.sh ; fi
- if [[ DISABLE_XDEBUG == "true" ]]; then phpenv config-rm xdebug.ini; fi - if [[ $DISABLE_XDEBUG == "true" ]]; then phpenv config-rm xdebug.ini; fi
install: install:
- if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/handle_brew_pkg.sh "${_PHP}" ; fi - if [[ "${TRAVIS_OS_NAME}" == "osx" ]]; then /usr/bin/env bash bin/handle_brew_pkg.sh "${_PHP}" ; fi
@ -29,10 +29,12 @@ install:
- php composer.phar install --dev --no-interaction --ignore-platform-reqs - php composer.phar install --dev --no-interaction --ignore-platform-reqs
script: script:
- bin/phpunit $PHPUNIT_FLAGS - vendor/bin/phpunit $PHPUNIT_FLAGS
- if [[ $STATIC_ANALYSIS != "" ]]; then vendor/bin/ecs check src tests; fi
after_success: after_success:
- | - |
if [[ $PHPUNIT_FLAGS != "" ]]; then if [[ $PHPUNIT_FLAGS != "" ]]; then
php bin/coveralls -v wget https://github.com/satooshi/php-coveralls/releases/download/v1.0.1/coveralls.phar;
php coveralls.phar --verbose;
fi fi

View File

@ -12,8 +12,8 @@
} }
], ],
"autoload": { "autoload": {
"psr-0": { "psr-4": {
"Phpml": "src/" "Phpml\\": "src/Phpml"
} }
}, },
"require": { "require": {
@ -22,9 +22,8 @@
"require-dev": { "require-dev": {
"phpunit/phpunit": "^6.0", "phpunit/phpunit": "^6.0",
"friendsofphp/php-cs-fixer": "^2.4", "friendsofphp/php-cs-fixer": "^2.4",
"php-coveralls/php-coveralls": "^1.0" "symplify/easy-coding-standard": "dev-master as 2.5",
}, "symplify/coding-standard": "dev-master as 2.5",
"config": { "symplify/package-builder": "dev-master#3604bea as 2.5"
"bin-dir": "bin"
} }
} }

1964
composer.lock generated

File diff suppressed because it is too large Load Diff

39
easy-coding-standard.neon Normal file
View File

@ -0,0 +1,39 @@
includes:
- vendor/symplify/easy-coding-standard/config/psr2.neon
- vendor/symplify/easy-coding-standard/config/php70.neon
- vendor/symplify/easy-coding-standard/config/clean-code.neon
- vendor/symplify/easy-coding-standard/config/common/array.neon
- vendor/symplify/easy-coding-standard/config/common/docblock.neon
- vendor/symplify/easy-coding-standard/config/common/namespaces.neon
- vendor/symplify/easy-coding-standard/config/common/control-structures.neon
# many errors, need help
#- vendor/symplify/easy-coding-standard/config/common/strict.neon
checkers:
- Symplify\CodingStandard\Fixer\Import\ImportNamespacedNameFixer
- Symplify\CodingStandard\Fixer\Php\ClassStringToClassConstantFixer
- Symplify\CodingStandard\Fixer\Property\ArrayPropertyDefaultValueFixer
- Symplify\CodingStandard\Fixer\ClassNotation\PropertyAndConstantSeparationFixer
- Symplify\CodingStandard\Fixer\ArrayNotation\StandaloneLineInMultilineArrayFixer
parameters:
exclude_checkers:
# from strict.neon
- PhpCsFixer\Fixer\PhpUnit\PhpUnitStrictFixer
skip:
PhpCsFixer\Fixer\Alias\RandomApiMigrationFixer:
# random_int() breaks code
- src/Phpml/CrossValidation/RandomSplit.php
SlevomatCodingStandard\Sniffs\Classes\UnusedPrivateElementsSniff:
# magic calls
- src/Phpml/Preprocessing/Normalizer.php
skip_codes:
# missing typehints
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingParameterTypeHint
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableParameterTypeHintSpecification
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingReturnTypeHint
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableReturnTypeHintSpecification
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingPropertyTypeHint
- SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversablePropertyTypeHintSpecification

View File

@ -6,11 +6,9 @@
beStrictAboutTestSize="true" beStrictAboutTestSize="true"
beStrictAboutChangesToGlobalState="true" beStrictAboutChangesToGlobalState="true"
> >
<testsuites> <testsuite name="PHP-ML Test Suite">
<testsuite name="PHP-ML Test Suite"> <directory>tests/*</directory>
<directory>tests/*</directory> </testsuite>
</testsuite>
</testsuites>
<filter> <filter>
<whitelist processUncoveredFilesFromWhitelist="true"> <whitelist processUncoveredFilesFromWhitelist="true">

View File

@ -31,7 +31,7 @@ class Apriori implements Associator
* *
* @var mixed[][][] * @var mixed[][][]
*/ */
private $large; private $large = [];
/** /**
* Minimum relative frequency of transactions. * Minimum relative frequency of transactions.
@ -45,7 +45,7 @@ class Apriori implements Associator
* *
* @var mixed[][] * @var mixed[][]
*/ */
private $rules; private $rules = [];
/** /**
* Apriori constructor. * Apriori constructor.
@ -61,7 +61,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
public function getRules() : array public function getRules(): array
{ {
if (!$this->large) { if (!$this->large) {
$this->large = $this->apriori(); $this->large = $this->apriori();
@ -83,7 +83,7 @@ class Apriori implements Associator
* *
* @return mixed[][][] * @return mixed[][][]
*/ */
public function apriori() : array public function apriori(): array
{ {
$L = []; $L = [];
$L[1] = $this->items(); $L[1] = $this->items();
@ -102,7 +102,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
protected function predictSample(array $sample) : array protected function predictSample(array $sample): array
{ {
$predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) { $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample); return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
@ -133,7 +133,8 @@ class Apriori implements Associator
private function generateRules(array $frequent): void private function generateRules(array $frequent): void
{ {
foreach ($this->antecedents($frequent) as $antecedent) { foreach ($this->antecedents($frequent) as $antecedent) {
if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) { $confidence = $this->confidence($frequent, $antecedent);
if ($this->confidence <= $confidence) {
$consequent = array_values(array_diff($frequent, $antecedent)); $consequent = array_values(array_diff($frequent, $antecedent));
$this->rules[] = [ $this->rules[] = [
self::ARRAY_KEY_ANTECEDENT => $antecedent, self::ARRAY_KEY_ANTECEDENT => $antecedent,
@ -152,7 +153,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function powerSet(array $sample) : array private function powerSet(array $sample): array
{ {
$results = [[]]; $results = [[]];
foreach ($sample as $item) { foreach ($sample as $item) {
@ -171,7 +172,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function antecedents(array $sample) : array private function antecedents(array $sample): array
{ {
$cardinality = count($sample); $cardinality = count($sample);
$antecedents = $this->powerSet($sample); $antecedents = $this->powerSet($sample);
@ -186,7 +187,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function items() : array private function items(): array
{ {
$items = []; $items = [];
@ -210,7 +211,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function frequent(array $samples) : array private function frequent(array $samples): array
{ {
return array_filter($samples, function ($entry) { return array_filter($samples, function ($entry) {
return $this->support($entry) >= $this->support; return $this->support($entry) >= $this->support;
@ -224,7 +225,7 @@ class Apriori implements Associator
* *
* @return mixed[][] * @return mixed[][]
*/ */
private function candidates(array $samples) : array private function candidates(array $samples): array
{ {
$candidates = []; $candidates = [];
@ -259,7 +260,7 @@ class Apriori implements Associator
* @param mixed[] $set * @param mixed[] $set
* @param mixed[] $subset * @param mixed[] $subset
*/ */
private function confidence(array $set, array $subset) : float private function confidence(array $set, array $subset): float
{ {
return $this->support($set) / $this->support($subset); return $this->support($set) / $this->support($subset);
} }
@ -272,7 +273,7 @@ class Apriori implements Associator
* *
* @param mixed[] $sample * @param mixed[] $sample
*/ */
private function support(array $sample) : float private function support(array $sample): float
{ {
return $this->frequency($sample) / count($this->samples); return $this->frequency($sample) / count($this->samples);
} }
@ -284,7 +285,7 @@ class Apriori implements Associator
* *
* @param mixed[] $sample * @param mixed[] $sample
*/ */
private function frequency(array $sample) : int private function frequency(array $sample): int
{ {
return count(array_filter($this->samples, function ($entry) use ($sample) { return count(array_filter($this->samples, function ($entry) use ($sample) {
return $this->subset($entry, $sample); return $this->subset($entry, $sample);
@ -299,7 +300,7 @@ class Apriori implements Associator
* @param mixed[][] $system * @param mixed[][] $system
* @param mixed[] $set * @param mixed[] $set
*/ */
private function contains(array $system, array $set) : bool private function contains(array $system, array $set): bool
{ {
return (bool) array_filter($system, function ($entry) use ($set) { return (bool) array_filter($system, function ($entry) use ($set) {
return $this->equals($entry, $set); return $this->equals($entry, $set);
@ -312,7 +313,7 @@ class Apriori implements Associator
* @param mixed[] $set * @param mixed[] $set
* @param mixed[] $subset * @param mixed[] $subset
*/ */
private function subset(array $set, array $subset) : bool private function subset(array $set, array $subset): bool
{ {
return !array_diff($subset, array_intersect($subset, $set)); return !array_diff($subset, array_intersect($subset, $set));
} }
@ -323,7 +324,7 @@ class Apriori implements Associator
* @param mixed[] $set1 * @param mixed[] $set1
* @param mixed[] $set2 * @param mixed[] $set2
*/ */
private function equals(array $set1, array $set2) : bool private function equals(array $set1, array $set2): bool
{ {
return array_diff($set1, $set2) == array_diff($set2, $set1); return array_diff($set1, $set2) == array_diff($set2, $set1);
} }

View File

@ -15,22 +15,18 @@ class DecisionTree implements Classifier
use Trainable, Predictable; use Trainable, Predictable;
public const CONTINUOUS = 1; public const CONTINUOUS = 1;
public const NOMINAL = 2; public const NOMINAL = 2;
/**
* @var array
*/
protected $columnTypes;
/**
* @var array
*/
private $labels = [];
/** /**
* @var int * @var int
*/ */
private $featureCount = 0; public $actualDepth = 0;
/**
* @var array
*/
protected $columnTypes = [];
/** /**
* @var DecisionTreeLeaf * @var DecisionTreeLeaf
@ -42,10 +38,15 @@ class DecisionTree implements Classifier
*/ */
protected $maxDepth; protected $maxDepth;
/**
* @var array
*/
private $labels = [];
/** /**
* @var int * @var int
*/ */
public $actualDepth = 0; private $featureCount = 0;
/** /**
* @var int * @var int
@ -55,7 +56,7 @@ class DecisionTree implements Classifier
/** /**
* @var array * @var array
*/ */
private $selectedFeatures; private $selectedFeatures = [];
/** /**
* @var array * @var array
@ -100,7 +101,7 @@ class DecisionTree implements Classifier
} }
} }
public static function getColumnTypes(array $samples) : array public static function getColumnTypes(array $samples): array
{ {
$types = []; $types = [];
$featureCount = count($samples[0]); $featureCount = count($samples[0]);
@ -113,7 +114,122 @@ class DecisionTree implements Classifier
return $types; return $types;
} }
protected function getSplitLeaf(array $records, int $depth = 0) : DecisionTreeLeaf /**
* @param mixed $baseValue
*/
public function getGiniIndex($baseValue, array $colValues, array $targets): float
{
$countMatrix = [];
foreach ($this->labels as $label) {
$countMatrix[$label] = [0, 0];
}
foreach ($colValues as $index => $value) {
$label = $targets[$index];
$rowIndex = $value === $baseValue ? 0 : 1;
++$countMatrix[$label][$rowIndex];
}
$giniParts = [0, 0];
for ($i = 0; $i <= 1; ++$i) {
$part = 0;
$sum = array_sum(array_column($countMatrix, $i));
if ($sum > 0) {
foreach ($this->labels as $label) {
$part += pow($countMatrix[$label][$i] / (float) $sum, 2);
}
}
$giniParts[$i] = (1 - $part) * $sum;
}
return array_sum($giniParts) / count($colValues);
}
/**
* This method is used to set number of columns to be used
* when deciding a split at an internal node of the tree. <br>
* If the value is given 0, then all features are used (default behaviour),
* otherwise the given value will be used as a maximum for number of columns
* randomly selected for each split operation.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setNumFeatures(int $numFeatures)
{
if ($numFeatures < 0) {
throw new InvalidArgumentException('Selected column count should be greater or equal to zero');
}
$this->numUsableFeatures = $numFeatures;
return $this;
}
/**
* A string array to represent columns. Useful when HTML output or
* column importances are desired to be inspected.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setColumnNames(array $names)
{
if ($this->featureCount !== 0 && count($names) !== $this->featureCount) {
throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount));
}
$this->columnNames = $names;
return $this;
}
public function getHtml(): string
{
return $this->tree->getHTML($this->columnNames);
}
/**
* This will return an array including an importance value for
* each column in the given dataset. The importance values are
* normalized and their total makes 1.<br/>
*/
public function getFeatureImportances(): array
{
if ($this->featureImportances !== null) {
return $this->featureImportances;
}
$sampleCount = count($this->samples);
$this->featureImportances = [];
foreach ($this->columnNames as $column => $columnName) {
$nodes = $this->getSplitNodesByColumn($column, $this->tree);
$importance = 0;
foreach ($nodes as $node) {
$importance += $node->getNodeImpurityDecrease($sampleCount);
}
$this->featureImportances[$columnName] = $importance;
}
// Normalize & sort the importances
$total = array_sum($this->featureImportances);
if ($total > 0) {
foreach ($this->featureImportances as &$importance) {
$importance /= $total;
}
arsort($this->featureImportances);
}
return $this->featureImportances;
}
protected function getSplitLeaf(array $records, int $depth = 0): DecisionTreeLeaf
{ {
$split = $this->getBestSplit($records); $split = $this->getBestSplit($records);
$split->level = $depth; $split->level = $depth;
@ -136,6 +252,7 @@ class DecisionTree implements Classifier
if ($prevRecord && $prevRecord != $record) { if ($prevRecord && $prevRecord != $record) {
$allSame = false; $allSame = false;
} }
$prevRecord = $record; $prevRecord = $record;
// According to the split criteron, this record will // According to the split criteron, this record will
@ -163,6 +280,7 @@ class DecisionTree implements Classifier
if ($leftRecords) { if ($leftRecords) {
$split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1);
} }
if ($rightRecords) { if ($rightRecords) {
$split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1);
} }
@ -171,7 +289,7 @@ class DecisionTree implements Classifier
return $split; return $split;
} }
protected function getBestSplit(array $records) : DecisionTreeLeaf protected function getBestSplit(array $records): DecisionTreeLeaf
{ {
$targets = array_intersect_key($this->targets, array_flip($records)); $targets = array_intersect_key($this->targets, array_flip($records));
$samples = array_intersect_key($this->samples, array_flip($records)); $samples = array_intersect_key($this->samples, array_flip($records));
@ -184,6 +302,7 @@ class DecisionTree implements Classifier
foreach ($samples as $index => $row) { foreach ($samples as $index => $row) {
$colValues[$index] = $row[$i]; $colValues[$index] = $row[$i];
} }
$counts = array_count_values($colValues); $counts = array_count_values($colValues);
arsort($counts); arsort($counts);
$baseValue = key($counts); $baseValue = key($counts);
@ -227,7 +346,7 @@ class DecisionTree implements Classifier
* If any of above methods were not called beforehand, then all features * If any of above methods were not called beforehand, then all features
* are returned by default. * are returned by default.
*/ */
protected function getSelectedFeatures() : array protected function getSelectedFeatures(): array
{ {
$allFeatures = range(0, $this->featureCount - 1); $allFeatures = range(0, $this->featureCount - 1);
if ($this->numUsableFeatures === 0 && !$this->selectedFeatures) { if ($this->numUsableFeatures === 0 && !$this->selectedFeatures) {
@ -242,6 +361,7 @@ class DecisionTree implements Classifier
if ($numFeatures > $this->featureCount) { if ($numFeatures > $this->featureCount) {
$numFeatures = $this->featureCount; $numFeatures = $this->featureCount;
} }
shuffle($allFeatures); shuffle($allFeatures);
$selectedFeatures = array_slice($allFeatures, 0, $numFeatures, false); $selectedFeatures = array_slice($allFeatures, 0, $numFeatures, false);
sort($selectedFeatures); sort($selectedFeatures);
@ -249,39 +369,7 @@ class DecisionTree implements Classifier
return $selectedFeatures; return $selectedFeatures;
} }
/** protected function preprocess(array $samples): array
* @param mixed $baseValue
*/
public function getGiniIndex($baseValue, array $colValues, array $targets) : float
{
$countMatrix = [];
foreach ($this->labels as $label) {
$countMatrix[$label] = [0, 0];
}
foreach ($colValues as $index => $value) {
$label = $targets[$index];
$rowIndex = $value === $baseValue ? 0 : 1;
++$countMatrix[$label][$rowIndex];
}
$giniParts = [0, 0];
for ($i = 0; $i <= 1; ++$i) {
$part = 0;
$sum = array_sum(array_column($countMatrix, $i));
if ($sum > 0) {
foreach ($this->labels as $label) {
$part += pow($countMatrix[$label][$i] / (float) $sum, 2);
}
}
$giniParts[$i] = (1 - $part) * $sum;
}
return array_sum($giniParts) / count($colValues);
}
protected function preprocess(array $samples) : array
{ {
// Detect and convert continuous data column values into // Detect and convert continuous data column values into
// discrete values by using the median as a threshold value // discrete values by using the median as a threshold value
@ -298,14 +386,16 @@ class DecisionTree implements Classifier
} }
} }
} }
$columns[] = $values; $columns[] = $values;
} }
// Below method is a strange yet very simple & efficient method // Below method is a strange yet very simple & efficient method
// to get the transpose of a 2D array // to get the transpose of a 2D array
return array_map(null, ...$columns); return array_map(null, ...$columns);
} }
protected static function isCategoricalColumn(array $columnValues) : bool protected static function isCategoricalColumn(array $columnValues): bool
{ {
$count = count($columnValues); $count = count($columnValues);
@ -329,28 +419,6 @@ class DecisionTree implements Classifier
return count($distinctValues) <= $count / 5; return count($distinctValues) <= $count / 5;
} }
/**
* This method is used to set number of columns to be used
* when deciding a split at an internal node of the tree. <br>
* If the value is given 0, then all features are used (default behaviour),
* otherwise the given value will be used as a maximum for number of columns
* randomly selected for each split operation.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setNumFeatures(int $numFeatures)
{
if ($numFeatures < 0) {
throw new InvalidArgumentException('Selected column count should be greater or equal to zero');
}
$this->numUsableFeatures = $numFeatures;
return $this;
}
/** /**
* Used to set predefined features to consider while deciding which column to use for a split * Used to set predefined features to consider while deciding which column to use for a split
*/ */
@ -359,71 +427,11 @@ class DecisionTree implements Classifier
$this->selectedFeatures = $selectedFeatures; $this->selectedFeatures = $selectedFeatures;
} }
/**
* A string array to represent columns. Useful when HTML output or
* column importances are desired to be inspected.
*
* @return $this
*
* @throws InvalidArgumentException
*/
public function setColumnNames(array $names)
{
if ($this->featureCount !== 0 && count($names) !== $this->featureCount) {
throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount));
}
$this->columnNames = $names;
return $this;
}
public function getHtml() : string
{
return $this->tree->getHTML($this->columnNames);
}
/**
* This will return an array including an importance value for
* each column in the given dataset. The importance values are
* normalized and their total makes 1.<br/>
*/
public function getFeatureImportances() : array
{
if ($this->featureImportances !== null) {
return $this->featureImportances;
}
$sampleCount = count($this->samples);
$this->featureImportances = [];
foreach ($this->columnNames as $column => $columnName) {
$nodes = $this->getSplitNodesByColumn($column, $this->tree);
$importance = 0;
foreach ($nodes as $node) {
$importance += $node->getNodeImpurityDecrease($sampleCount);
}
$this->featureImportances[$columnName] = $importance;
}
// Normalize & sort the importances
$total = array_sum($this->featureImportances);
if ($total > 0) {
foreach ($this->featureImportances as &$importance) {
$importance /= $total;
}
arsort($this->featureImportances);
}
return $this->featureImportances;
}
/** /**
* Collects and returns an array of internal nodes that use the given * Collects and returns an array of internal nodes that use the given
* column as a split criterion * column as a split criterion
*/ */
protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node) : array protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node): array
{ {
if (!$node || $node->isTerminal) { if (!$node || $node->isTerminal) {
return []; return [];

View File

@ -71,7 +71,15 @@ class DecisionTreeLeaf
*/ */
public $level = 0; public $level = 0;
public function evaluate(array $record) : bool /**
* HTML representation of the tree without column names
*/
public function __toString(): string
{
return $this->getHTML();
}
public function evaluate(array $record): bool
{ {
$recordField = $record[$this->columnIndex]; $recordField = $record[$this->columnIndex];
@ -86,7 +94,7 @@ class DecisionTreeLeaf
* Returns Mean Decrease Impurity (MDI) in the node. * Returns Mean Decrease Impurity (MDI) in the node.
* For terminal nodes, this value is equal to 0 * For terminal nodes, this value is equal to 0
*/ */
public function getNodeImpurityDecrease(int $parentRecordCount) : float public function getNodeImpurityDecrease(int $parentRecordCount): float
{ {
if ($this->isTerminal) { if ($this->isTerminal) {
return 0.0; return 0.0;
@ -111,7 +119,7 @@ class DecisionTreeLeaf
/** /**
* Returns HTML representation of the node including children nodes * Returns HTML representation of the node including children nodes
*/ */
public function getHTML($columnNames = null) : string public function getHTML($columnNames = null): string
{ {
if ($this->isTerminal) { if ($this->isTerminal) {
$value = "<b>$this->classValue</b>"; $value = "<b>$this->classValue</b>";
@ -154,12 +162,4 @@ class DecisionTreeLeaf
return $str; return $str;
} }
/**
* HTML representation of the tree without column names
*/
public function __toString() : string
{
return $this->getHTML();
}
} }

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Classification\Ensemble; namespace Phpml\Classification\Ensemble;
use Exception;
use Phpml\Classification\Classifier; use Phpml\Classification\Classifier;
use Phpml\Classification\Linear\DecisionStump; use Phpml\Classification\Linear\DecisionStump;
use Phpml\Classification\WeightedClassifier; use Phpml\Classification\WeightedClassifier;
@ -11,6 +12,7 @@ use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable; use Phpml\Helper\Trainable;
use Phpml\Math\Statistic\Mean; use Phpml\Math\Statistic\Mean;
use Phpml\Math\Statistic\StandardDeviation; use Phpml\Math\Statistic\StandardDeviation;
use ReflectionClass;
class AdaBoost implements Classifier class AdaBoost implements Classifier
{ {
@ -98,11 +100,14 @@ class AdaBoost implements Classifier
// Initialize usual variables // Initialize usual variables
$this->labels = array_keys(array_count_values($targets)); $this->labels = array_keys(array_count_values($targets));
if (count($this->labels) != 2) { if (count($this->labels) != 2) {
throw new \Exception('AdaBoost is a binary classifier and can classify between two classes only'); throw new Exception('AdaBoost is a binary classifier and can classify between two classes only');
} }
// Set all target values to either -1 or 1 // Set all target values to either -1 or 1
$this->labels = [1 => $this->labels[0], -1 => $this->labels[1]]; $this->labels = [
1 => $this->labels[0],
-1 => $this->labels[1],
];
foreach ($targets as $target) { foreach ($targets as $target) {
$this->targets[] = $target == $this->labels[1] ? 1 : -1; $this->targets[] = $target == $this->labels[1] ? 1 : -1;
} }
@ -132,13 +137,27 @@ class AdaBoost implements Classifier
} }
} }
/**
* @return mixed
*/
public function predictSample(array $sample)
{
$sum = 0;
foreach ($this->alpha as $index => $alpha) {
$h = $this->classifiers[$index]->predict($sample);
$sum += $h * $alpha;
}
return $this->labels[$sum > 0 ? 1 : -1];
}
/** /**
* Returns the classifier with the lowest error rate with the * Returns the classifier with the lowest error rate with the
* consideration of current sample weights * consideration of current sample weights
*/ */
protected function getBestClassifier() : Classifier protected function getBestClassifier(): Classifier
{ {
$ref = new \ReflectionClass($this->baseClassifier); $ref = new ReflectionClass($this->baseClassifier);
if ($this->classifierOptions) { if ($this->classifierOptions) {
$classifier = $ref->newInstanceArgs($this->classifierOptions); $classifier = $ref->newInstanceArgs($this->classifierOptions);
} else { } else {
@ -160,7 +179,7 @@ class AdaBoost implements Classifier
* Resamples the dataset in accordance with the weights and * Resamples the dataset in accordance with the weights and
* returns the new dataset * returns the new dataset
*/ */
protected function resample() : array protected function resample(): array
{ {
$weights = $this->weights; $weights = $this->weights;
$std = StandardDeviation::population($weights); $std = StandardDeviation::population($weights);
@ -173,9 +192,10 @@ class AdaBoost implements Classifier
foreach ($weights as $index => $weight) { foreach ($weights as $index => $weight) {
$z = (int) round(($weight - $mean) / $std) - $minZ + 1; $z = (int) round(($weight - $mean) / $std) - $minZ + 1;
for ($i = 0; $i < $z; ++$i) { for ($i = 0; $i < $z; ++$i) {
if (rand(0, 1) == 0) { if (random_int(0, 1) == 0) {
continue; continue;
} }
$samples[] = $this->samples[$index]; $samples[] = $this->samples[$index];
$targets[] = $this->targets[$index]; $targets[] = $this->targets[$index];
} }
@ -187,7 +207,7 @@ class AdaBoost implements Classifier
/** /**
* Evaluates the classifier and returns the classification error rate * Evaluates the classifier and returns the classification error rate
*/ */
protected function evaluateClassifier(Classifier $classifier) : float protected function evaluateClassifier(Classifier $classifier): float
{ {
$total = (float) array_sum($this->weights); $total = (float) array_sum($this->weights);
$wrong = 0; $wrong = 0;
@ -204,7 +224,7 @@ class AdaBoost implements Classifier
/** /**
* Calculates alpha of a classifier * Calculates alpha of a classifier
*/ */
protected function calculateAlpha(float $errorRate) : float protected function calculateAlpha(float $errorRate): float
{ {
if ($errorRate == 0) { if ($errorRate == 0) {
$errorRate = 1e-10; $errorRate = 1e-10;
@ -231,18 +251,4 @@ class AdaBoost implements Classifier
$this->weights = $weightsT1; $this->weights = $weightsT1;
} }
/**
* @return mixed
*/
public function predictSample(array $sample)
{
$sum = 0;
foreach ($this->alpha as $index => $alpha) {
$h = $this->classifiers[$index]->predict($sample);
$sum += $h * $alpha;
}
return $this->labels[$sum > 0 ? 1 : -1];
}
} }

View File

@ -4,10 +4,12 @@ declare(strict_types=1);
namespace Phpml\Classification\Ensemble; namespace Phpml\Classification\Ensemble;
use Exception;
use Phpml\Classification\Classifier; use Phpml\Classification\Classifier;
use Phpml\Classification\DecisionTree; use Phpml\Classification\DecisionTree;
use Phpml\Helper\Predictable; use Phpml\Helper\Predictable;
use Phpml\Helper\Trainable; use Phpml\Helper\Trainable;
use ReflectionClass;
class Bagging implements Classifier class Bagging implements Classifier
{ {
@ -18,11 +20,6 @@ class Bagging implements Classifier
*/ */
protected $numSamples; protected $numSamples;
/**
* @var array
*/
private $targets = [];
/** /**
* @var int * @var int
*/ */
@ -46,13 +43,18 @@ class Bagging implements Classifier
/** /**
* @var array * @var array
*/ */
protected $classifiers; protected $classifiers = [];
/** /**
* @var float * @var float
*/ */
protected $subsetRatio = 0.7; protected $subsetRatio = 0.7;
/**
* @var array
*/
private $targets = [];
/** /**
* @var array * @var array
*/ */
@ -80,7 +82,7 @@ class Bagging implements Classifier
public function setSubsetRatio(float $ratio) public function setSubsetRatio(float $ratio)
{ {
if ($ratio < 0.1 || $ratio > 1.0) { if ($ratio < 0.1 || $ratio > 1.0) {
throw new \Exception('Subset ratio should be between 0.1 and 1.0'); throw new Exception('Subset ratio should be between 0.1 and 1.0');
} }
$this->subsetRatio = $ratio; $this->subsetRatio = $ratio;
@ -123,14 +125,14 @@ class Bagging implements Classifier
} }
} }
protected function getRandomSubset(int $index) : array protected function getRandomSubset(int $index): array
{ {
$samples = []; $samples = [];
$targets = []; $targets = [];
srand($index); srand($index);
$bootstrapSize = $this->subsetRatio * $this->numSamples; $bootstrapSize = $this->subsetRatio * $this->numSamples;
for ($i = 0; $i < $bootstrapSize; ++$i) { for ($i = 0; $i < $bootstrapSize; ++$i) {
$rand = rand(0, $this->numSamples - 1); $rand = random_int(0, $this->numSamples - 1);
$samples[] = $this->samples[$rand]; $samples[] = $this->samples[$rand];
$targets[] = $this->targets[$rand]; $targets[] = $this->targets[$rand];
} }
@ -138,11 +140,11 @@ class Bagging implements Classifier
return [$samples, $targets]; return [$samples, $targets];
} }
protected function initClassifiers() : array protected function initClassifiers(): array
{ {
$classifiers = []; $classifiers = [];
for ($i = 0; $i < $this->numClassifier; ++$i) { for ($i = 0; $i < $this->numClassifier; ++$i) {
$ref = new \ReflectionClass($this->classifier); $ref = new ReflectionClass($this->classifier);
if ($this->classifierOptions) { if ($this->classifierOptions) {
$obj = $ref->newInstanceArgs($this->classifierOptions); $obj = $ref->newInstanceArgs($this->classifierOptions);
} else { } else {
@ -155,12 +157,7 @@ class Bagging implements Classifier
return $classifiers; return $classifiers;
} }
/** protected function initSingleClassifier(Classifier $classifier): Classifier
* @param Classifier $classifier
*
* @return Classifier
*/
protected function initSingleClassifier($classifier)
{ {
return $classifier; return $classifier;
} }

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Ensemble; namespace Phpml\Classification\Ensemble;
use Exception;
use Phpml\Classification\Classifier;
use Phpml\Classification\DecisionTree; use Phpml\Classification\DecisionTree;
class RandomForest extends Bagging class RandomForest extends Bagging
@ -48,11 +50,11 @@ class RandomForest extends Bagging
public function setFeatureSubsetRatio($ratio) public function setFeatureSubsetRatio($ratio)
{ {
if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) { if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) {
throw new \Exception('When a float given, feature subset ratio should be between 0.1 and 1.0'); throw new Exception('When a float given, feature subset ratio should be between 0.1 and 1.0');
} }
if (is_string($ratio) && $ratio != 'sqrt' && $ratio != 'log') { if (is_string($ratio) && $ratio != 'sqrt' && $ratio != 'log') {
throw new \Exception("When a string given, feature subset ratio can only be 'sqrt' or 'log' "); throw new Exception("When a string given, feature subset ratio can only be 'sqrt' or 'log' ");
} }
$this->featureSubsetRatio = $ratio; $this->featureSubsetRatio = $ratio;
@ -70,7 +72,7 @@ class RandomForest extends Bagging
public function setClassifer(string $classifier, array $classifierOptions = []) public function setClassifer(string $classifier, array $classifierOptions = [])
{ {
if ($classifier != DecisionTree::class) { if ($classifier != DecisionTree::class) {
throw new \Exception('RandomForest can only use DecisionTree as base classifier'); throw new Exception('RandomForest can only use DecisionTree as base classifier');
} }
return parent::setClassifer($classifier, $classifierOptions); return parent::setClassifer($classifier, $classifierOptions);
@ -81,7 +83,7 @@ class RandomForest extends Bagging
* each column in the given dataset. Importance values for a column * each column in the given dataset. Importance values for a column
* is the average importance of that column in all trees in the forest * is the average importance of that column in all trees in the forest
*/ */
public function getFeatureImportances() : array public function getFeatureImportances(): array
{ {
// Traverse each tree and sum importance of the columns // Traverse each tree and sum importance of the columns
$sum = []; $sum = [];
@ -127,7 +129,7 @@ class RandomForest extends Bagging
* *
* @return DecisionTree * @return DecisionTree
*/ */
protected function initSingleClassifier($classifier) protected function initSingleClassifier(Classifier $classifier): Classifier
{ {
if (is_float($this->featureSubsetRatio)) { if (is_float($this->featureSubsetRatio)) {
$featureCount = (int) ($this->featureSubsetRatio * $this->featureCount); $featureCount = (int) ($this->featureSubsetRatio * $this->featureCount);

View File

@ -28,7 +28,7 @@ class KNearestNeighbors implements Classifier
*/ */
public function __construct(int $k = 3, ?Distance $distanceMetric = null) public function __construct(int $k = 3, ?Distance $distanceMetric = null)
{ {
if (null === $distanceMetric) { if ($distanceMetric === null) {
$distanceMetric = new Euclidean(); $distanceMetric = new Euclidean();
} }
@ -60,7 +60,7 @@ class KNearestNeighbors implements Classifier
/** /**
* @throws \Phpml\Exception\InvalidArgumentException * @throws \Phpml\Exception\InvalidArgumentException
*/ */
private function kNeighborsDistances(array $sample) : array private function kNeighborsDistances(array $sample): array
{ {
$distances = []; $distances = [];

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear; namespace Phpml\Classification\Linear;
use Exception;
class Adaline extends Perceptron class Adaline extends Perceptron
{ {
/** /**
@ -41,7 +43,7 @@ class Adaline extends Perceptron
int $trainingType = self::BATCH_TRAINING int $trainingType = self::BATCH_TRAINING
) { ) {
if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) { if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) {
throw new \Exception('Adaline can only be trained with batch and online/stochastic gradient descent algorithm'); throw new Exception('Adaline can only be trained with batch and online/stochastic gradient descent algorithm');
} }
$this->trainingType = $trainingType; $this->trainingType = $trainingType;

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear; namespace Phpml\Classification\Linear;
use Exception;
use Phpml\Classification\DecisionTree; use Phpml\Classification\DecisionTree;
use Phpml\Classification\WeightedClassifier; use Phpml\Classification\WeightedClassifier;
use Phpml\Helper\OneVsRest; use Phpml\Helper\OneVsRest;
@ -24,7 +25,7 @@ class DecisionStump extends WeightedClassifier
/** /**
* @var array * @var array
*/ */
protected $binaryLabels; protected $binaryLabels = [];
/** /**
* Lowest error rate obtained while training/optimizing the model * Lowest error rate obtained while training/optimizing the model
@ -51,7 +52,7 @@ class DecisionStump extends WeightedClassifier
/** /**
* @var array * @var array
*/ */
protected $columnTypes; protected $columnTypes = [];
/** /**
* @var int * @var int
@ -68,7 +69,7 @@ class DecisionStump extends WeightedClassifier
* *
* @var array * @var array
*/ */
protected $prob; protected $prob = [];
/** /**
* A DecisionStump classifier is a one-level deep DecisionTree. It is generally * A DecisionStump classifier is a one-level deep DecisionTree. It is generally
@ -83,6 +84,25 @@ class DecisionStump extends WeightedClassifier
$this->givenColumnIndex = $columnIndex; $this->givenColumnIndex = $columnIndex;
} }
public function __toString(): string
{
return "IF $this->column $this->operator $this->value ".
'THEN '.$this->binaryLabels[0].' '.
'ELSE '.$this->binaryLabels[1];
}
/**
* While finding best split point for a numerical valued column,
* DecisionStump looks for equally distanced values between minimum and maximum
* values in the column. Given <i>$count</i> value determines how many split
* points to be probed. The more split counts, the better performance but
* worse processing time (Default value is 10.0)
*/
public function setNumericalSplitCount(float $count): void
{
$this->numSplitCount = $count;
}
/** /**
* @throws \Exception * @throws \Exception
*/ */
@ -101,7 +121,7 @@ class DecisionStump extends WeightedClassifier
if ($this->weights) { if ($this->weights) {
$numWeights = count($this->weights); $numWeights = count($this->weights);
if ($numWeights != count($samples)) { if ($numWeights != count($samples)) {
throw new \Exception('Number of sample weights does not match with number of samples'); throw new Exception('Number of sample weights does not match with number of samples');
} }
} else { } else {
$this->weights = array_fill(0, count($samples), 1); $this->weights = array_fill(0, count($samples), 1);
@ -118,9 +138,12 @@ class DecisionStump extends WeightedClassifier
} }
$bestSplit = [ $bestSplit = [
'value' => 0, 'operator' => '', 'value' => 0,
'prob' => [], 'column' => 0, 'operator' => '',
'trainingErrorRate' => 1.0]; 'prob' => [],
'column' => 0,
'trainingErrorRate' => 1.0,
];
foreach ($columns as $col) { foreach ($columns as $col) {
if ($this->columnTypes[$col] == DecisionTree::CONTINUOUS) { if ($this->columnTypes[$col] == DecisionTree::CONTINUOUS) {
$split = $this->getBestNumericalSplit($samples, $targets, $col); $split = $this->getBestNumericalSplit($samples, $targets, $col);
@ -139,22 +162,10 @@ class DecisionStump extends WeightedClassifier
} }
} }
/**
* While finding best split point for a numerical valued column,
* DecisionStump looks for equally distanced values between minimum and maximum
* values in the column. Given <i>$count</i> value determines how many split
* points to be probed. The more split counts, the better performance but
* worse processing time (Default value is 10.0)
*/
public function setNumericalSplitCount(float $count): void
{
$this->numSplitCount = $count;
}
/** /**
* Determines best split point for the given column * Determines best split point for the given column
*/ */
protected function getBestNumericalSplit(array $samples, array $targets, int $col) : array protected function getBestNumericalSplit(array $samples, array $targets, int $col): array
{ {
$values = array_column($samples, $col); $values = array_column($samples, $col);
// Trying all possible points may be accomplished in two general ways: // Trying all possible points may be accomplished in two general ways:
@ -173,9 +184,13 @@ class DecisionStump extends WeightedClassifier
$threshold = array_sum($values) / (float) count($values); $threshold = array_sum($values) / (float) count($values);
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values); [$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
if ($split == null || $errorRate < $split['trainingErrorRate']) { if ($split == null || $errorRate < $split['trainingErrorRate']) {
$split = ['value' => $threshold, 'operator' => $operator, $split = [
'prob' => $prob, 'column' => $col, 'value' => $threshold,
'trainingErrorRate' => $errorRate]; 'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
} }
// Try other possible points one by one // Try other possible points one by one
@ -183,9 +198,13 @@ class DecisionStump extends WeightedClassifier
$threshold = (float) $step; $threshold = (float) $step;
[$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values); [$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values);
if ($errorRate < $split['trainingErrorRate']) { if ($errorRate < $split['trainingErrorRate']) {
$split = ['value' => $threshold, 'operator' => $operator, $split = [
'prob' => $prob, 'column' => $col, 'value' => $threshold,
'trainingErrorRate' => $errorRate]; 'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
} }
}// for }// for
} }
@ -193,7 +212,7 @@ class DecisionStump extends WeightedClassifier
return $split; return $split;
} }
protected function getBestNominalSplit(array $samples, array $targets, int $col) : array protected function getBestNominalSplit(array $samples, array $targets, int $col): array
{ {
$values = array_column($samples, $col); $values = array_column($samples, $col);
$valueCounts = array_count_values($values); $valueCounts = array_count_values($values);
@ -206,9 +225,13 @@ class DecisionStump extends WeightedClassifier
[$errorRate, $prob] = $this->calculateErrorRate($targets, $val, $operator, $values); [$errorRate, $prob] = $this->calculateErrorRate($targets, $val, $operator, $values);
if ($split == null || $split['trainingErrorRate'] < $errorRate) { if ($split == null || $split['trainingErrorRate'] < $errorRate) {
$split = ['value' => $val, 'operator' => $operator, $split = [
'prob' => $prob, 'column' => $col, 'value' => $val,
'trainingErrorRate' => $errorRate]; 'operator' => $operator,
'prob' => $prob,
'column' => $col,
'trainingErrorRate' => $errorRate,
];
} }
} }
} }
@ -220,7 +243,7 @@ class DecisionStump extends WeightedClassifier
* Calculates the ratio of wrong predictions based on the new threshold * Calculates the ratio of wrong predictions based on the new threshold
* value given as the parameter * value given as the parameter
*/ */
protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values) : array protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values): array
{ {
$wrong = 0.0; $wrong = 0.0;
$prob = []; $prob = [];
@ -242,6 +265,7 @@ class DecisionStump extends WeightedClassifier
if (!isset($prob[$predicted][$target])) { if (!isset($prob[$predicted][$target])) {
$prob[$predicted][$target] = 0; $prob[$predicted][$target] = 0;
} }
++$prob[$predicted][$target]; ++$prob[$predicted][$target];
} }
@ -267,7 +291,7 @@ class DecisionStump extends WeightedClassifier
* *
* @param mixed $label * @param mixed $label
*/ */
protected function predictProbability(array $sample, $label) : float protected function predictProbability(array $sample, $label): float
{ {
$predicted = $this->predictSampleBinary($sample); $predicted = $this->predictSampleBinary($sample);
if ((string) $predicted == (string) $label) { if ((string) $predicted == (string) $label) {
@ -292,11 +316,4 @@ class DecisionStump extends WeightedClassifier
protected function resetBinary(): void protected function resetBinary(): void
{ {
} }
public function __toString() : string
{
return "IF $this->column $this->operator $this->value ".
'THEN '.$this->binaryLabels[0].' '.
'ELSE '.$this->binaryLabels[1];
}
} }

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear; namespace Phpml\Classification\Linear;
use Closure;
use Exception;
use Phpml\Helper\Optimizer\ConjugateGradient; use Phpml\Helper\Optimizer\ConjugateGradient;
class LogisticRegression extends Adaline class LogisticRegression extends Adaline
@ -70,18 +72,18 @@ class LogisticRegression extends Adaline
) { ) {
$trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING); $trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING);
if (!in_array($trainingType, $trainingTypes)) { if (!in_array($trainingType, $trainingTypes)) {
throw new \Exception('Logistic regression can only be trained with '. throw new Exception('Logistic regression can only be trained with '.
'batch (gradient descent), online (stochastic gradient descent) '. 'batch (gradient descent), online (stochastic gradient descent) '.
'or conjugate batch (conjugate gradients) algorithms'); 'or conjugate batch (conjugate gradients) algorithms');
} }
if (!in_array($cost, ['log', 'sse'])) { if (!in_array($cost, ['log', 'sse'])) {
throw new \Exception("Logistic regression cost function can be one of the following: \n". throw new Exception("Logistic regression cost function can be one of the following: \n".
"'log' for log-likelihood and 'sse' for sum of squared errors"); "'log' for log-likelihood and 'sse' for sum of squared errors");
} }
if ($penalty != '' && strtoupper($penalty) !== 'L2') { if ($penalty != '' && strtoupper($penalty) !== 'L2') {
throw new \Exception("Logistic regression supports only 'L2' regularization"); throw new Exception("Logistic regression supports only 'L2' regularization");
} }
$this->learningRate = 0.001; $this->learningRate = 0.001;
@ -132,14 +134,14 @@ class LogisticRegression extends Adaline
return $this->runConjugateGradient($samples, $targets, $callback); return $this->runConjugateGradient($samples, $targets, $callback);
default: default:
throw new \Exception('Logistic regression has invalid training type: %s.', $this->trainingType); throw new Exception('Logistic regression has invalid training type: %s.', $this->trainingType);
} }
} }
/** /**
* Executes Conjugate Gradient method to optimize the weights of the LogReg model * Executes Conjugate Gradient method to optimize the weights of the LogReg model
*/ */
protected function runConjugateGradient(array $samples, array $targets, \Closure $gradientFunc): void protected function runConjugateGradient(array $samples, array $targets, Closure $gradientFunc): void
{ {
if (empty($this->optimizer)) { if (empty($this->optimizer)) {
$this->optimizer = (new ConjugateGradient($this->featureCount)) $this->optimizer = (new ConjugateGradient($this->featureCount))
@ -155,7 +157,7 @@ class LogisticRegression extends Adaline
* *
* @throws \Exception * @throws \Exception
*/ */
protected function getCostFunction() : \Closure protected function getCostFunction(): Closure
{ {
$penalty = 0; $penalty = 0;
if ($this->penalty == 'L2') { if ($this->penalty == 'L2') {
@ -183,9 +185,11 @@ class LogisticRegression extends Adaline
if ($hX == 1) { if ($hX == 1) {
$hX = 1 - 1e-10; $hX = 1 - 1e-10;
} }
if ($hX == 0) { if ($hX == 0) {
$hX = 1e-10; $hX = 1e-10;
} }
$error = -$y * log($hX) - (1 - $y) * log(1 - $hX); $error = -$y * log($hX) - (1 - $y) * log(1 - $hX);
$gradient = $hX - $y; $gradient = $hX - $y;
@ -218,16 +222,14 @@ class LogisticRegression extends Adaline
return $callback; return $callback;
default: default:
throw new \Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction)); throw new Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction));
} }
} }
/** /**
* Returns the output of the network, a float value between 0.0 and 1.0 * Returns the output of the network, a float value between 0.0 and 1.0
*
* @return float
*/ */
protected function output(array $sample) protected function output(array $sample): float
{ {
$sum = parent::output($sample); $sum = parent::output($sample);
@ -237,7 +239,7 @@ class LogisticRegression extends Adaline
/** /**
* Returns the class value (either -1 or 1) for the given input * Returns the class value (either -1 or 1) for the given input
*/ */
protected function outputClass(array $sample) : int protected function outputClass(array $sample): int
{ {
$output = $this->output($sample); $output = $this->output($sample);
@ -253,10 +255,10 @@ class LogisticRegression extends Adaline
* *
* The probability is simply taken as the distance of the sample * The probability is simply taken as the distance of the sample
* to the decision plane. * to the decision plane.
*
* @param mixed $label * @param mixed $label
*/ */
protected function predictProbability(array $sample, $label) : float protected function predictProbability(array $sample, $label): float
{ {
$predicted = $this->predictSampleBinary($sample); $predicted = $this->predictSampleBinary($sample);

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Classification\Linear; namespace Phpml\Classification\Linear;
use Closure;
use Exception;
use Phpml\Classification\Classifier; use Phpml\Classification\Classifier;
use Phpml\Helper\OneVsRest; use Phpml\Helper\OneVsRest;
use Phpml\Helper\Optimizer\GD; use Phpml\Helper\Optimizer\GD;
@ -34,7 +36,7 @@ class Perceptron implements Classifier, IncrementalEstimator
/** /**
* @var array * @var array
*/ */
protected $weights; protected $weights = [];
/** /**
* @var float * @var float
@ -73,11 +75,11 @@ class Perceptron implements Classifier, IncrementalEstimator
public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, bool $normalizeInputs = true) public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, bool $normalizeInputs = true)
{ {
if ($learningRate <= 0.0 || $learningRate > 1.0) { if ($learningRate <= 0.0 || $learningRate > 1.0) {
throw new \Exception('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)'); throw new Exception('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)');
} }
if ($maxIterations <= 0) { if ($maxIterations <= 0) {
throw new \Exception('Maximum number of iterations must be an integer greater than 0'); throw new Exception('Maximum number of iterations must be an integer greater than 0');
} }
if ($normalizeInputs) { if ($normalizeInputs) {
@ -100,7 +102,10 @@ class Perceptron implements Classifier, IncrementalEstimator
} }
// Set all target values to either -1 or 1 // Set all target values to either -1 or 1
$this->labels = [1 => $labels[0], -1 => $labels[1]]; $this->labels = [
1 => $labels[0],
-1 => $labels[1],
];
foreach ($targets as $key => $target) { foreach ($targets as $key => $target) {
$targets[$key] = (string) $target == (string) $this->labels[1] ? 1 : -1; $targets[$key] = (string) $target == (string) $this->labels[1] ? 1 : -1;
} }
@ -111,15 +116,6 @@ class Perceptron implements Classifier, IncrementalEstimator
$this->runTraining($samples, $targets); $this->runTraining($samples, $targets);
} }
protected function resetBinary(): void
{
$this->labels = [];
$this->optimizer = null;
$this->featureCount = 0;
$this->weights = null;
$this->costValues = [];
}
/** /**
* Normally enabling early stopping for the optimization procedure may * Normally enabling early stopping for the optimization procedure may
* help saving processing time while in some cases it may result in * help saving processing time while in some cases it may result in
@ -140,16 +136,23 @@ class Perceptron implements Classifier, IncrementalEstimator
/** /**
* Returns the cost values obtained during the training. * Returns the cost values obtained during the training.
*/ */
public function getCostValues() : array public function getCostValues(): array
{ {
return $this->costValues; return $this->costValues;
} }
protected function resetBinary(): void
{
$this->labels = [];
$this->optimizer = null;
$this->featureCount = 0;
$this->weights = null;
$this->costValues = [];
}
/** /**
* Trains the perceptron model with Stochastic Gradient Descent optimization * Trains the perceptron model with Stochastic Gradient Descent optimization
* to get the correct set of weights * to get the correct set of weights
*
* @return void|mixed
*/ */
protected function runTraining(array $samples, array $targets) protected function runTraining(array $samples, array $targets)
{ {
@ -171,7 +174,7 @@ class Perceptron implements Classifier, IncrementalEstimator
* Executes a Gradient Descent algorithm for * Executes a Gradient Descent algorithm for
* the given cost function * the given cost function
*/ */
protected function runGradientDescent(array $samples, array $targets, \Closure $gradientFunc, bool $isBatch = false): void protected function runGradientDescent(array $samples, array $targets, Closure $gradientFunc, bool $isBatch = false): void
{ {
$class = $isBatch ? GD::class : StochasticGD::class; $class = $isBatch ? GD::class : StochasticGD::class;
@ -191,7 +194,7 @@ class Perceptron implements Classifier, IncrementalEstimator
* Checks if the sample should be normalized and if so, returns the * Checks if the sample should be normalized and if so, returns the
* normalized sample * normalized sample
*/ */
protected function checkNormalizedSample(array $sample) : array protected function checkNormalizedSample(array $sample): array
{ {
if ($this->normalizer) { if ($this->normalizer) {
$samples = [$sample]; $samples = [$sample];
@ -205,7 +208,7 @@ class Perceptron implements Classifier, IncrementalEstimator
/** /**
* Calculates net output of the network as a float value for the given input * Calculates net output of the network as a float value for the given input
* *
* @return int * @return int|float
*/ */
protected function output(array $sample) protected function output(array $sample)
{ {
@ -224,7 +227,7 @@ class Perceptron implements Classifier, IncrementalEstimator
/** /**
* Returns the class value (either -1 or 1) for the given input * Returns the class value (either -1 or 1) for the given input
*/ */
protected function outputClass(array $sample) : int protected function outputClass(array $sample): int
{ {
return $this->output($sample) > 0 ? 1 : -1; return $this->output($sample) > 0 ? 1 : -1;
} }
@ -237,7 +240,7 @@ class Perceptron implements Classifier, IncrementalEstimator
* *
* @param mixed $label * @param mixed $label
*/ */
protected function predictProbability(array $sample, $label) : float protected function predictProbability(array $sample, $label): float
{ {
$predicted = $this->predictSampleBinary($sample); $predicted = $this->predictSampleBinary($sample);

View File

@ -14,7 +14,7 @@ class MLPClassifier extends MultilayerPerceptron implements Classifier
* *
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public function getTargetClass($target) : int public function getTargetClass($target): int
{ {
if (!in_array($target, $this->classes)) { if (!in_array($target, $this->classes)) {
throw InvalidArgumentException::invalidTarget($target); throw InvalidArgumentException::invalidTarget($target);

View File

@ -14,7 +14,9 @@ class NaiveBayes implements Classifier
use Trainable, Predictable; use Trainable, Predictable;
public const CONTINUOS = 1; public const CONTINUOS = 1;
public const NOMINAL = 2; public const NOMINAL = 2;
public const EPSILON = 1e-10; public const EPSILON = 1e-10;
/** /**
@ -73,6 +75,31 @@ class NaiveBayes implements Classifier
} }
} }
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
// Use NaiveBayes assumption for each label using:
// P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label)
// Then compare probability for each class to determine which label is most likely
$predictions = [];
foreach ($this->labels as $label) {
$p = $this->p[$label];
for ($i = 0; $i < $this->featureCount; ++$i) {
$Plf = $this->sampleProbability($sample, $i, $label);
$p += $Plf;
}
$predictions[$label] = $p;
}
arsort($predictions, SORT_NUMERIC);
reset($predictions);
return key($predictions);
}
/** /**
* Calculates vital statistics for each label & feature. Stores these * Calculates vital statistics for each label & feature. Stores these
* values in private array in order to avoid repeated calculation * values in private array in order to avoid repeated calculation
@ -108,7 +135,7 @@ class NaiveBayes implements Classifier
/** /**
* Calculates the probability P(label|sample_n) * Calculates the probability P(label|sample_n)
*/ */
private function sampleProbability(array $sample, int $feature, string $label) : float private function sampleProbability(array $sample, int $feature, string $label): float
{ {
$value = $sample[$feature]; $value = $sample[$feature];
if ($this->dataType[$label][$feature] == self::NOMINAL) { if ($this->dataType[$label][$feature] == self::NOMINAL) {
@ -119,6 +146,7 @@ class NaiveBayes implements Classifier
return $this->discreteProb[$label][$feature][$value]; return $this->discreteProb[$label][$feature][$value];
} }
$std = $this->std[$label][$feature] ; $std = $this->std[$label][$feature] ;
$mean = $this->mean[$label][$feature]; $mean = $this->mean[$label][$feature];
// Calculate the probability density by use of normal/Gaussian distribution // Calculate the probability density by use of normal/Gaussian distribution
@ -137,7 +165,7 @@ class NaiveBayes implements Classifier
/** /**
* Return samples belonging to specific label * Return samples belonging to specific label
*/ */
private function getSamplesByLabel(string $label) : array private function getSamplesByLabel(string $label): array
{ {
$samples = []; $samples = [];
for ($i = 0; $i < $this->sampleCount; ++$i) { for ($i = 0; $i < $this->sampleCount; ++$i) {
@ -148,28 +176,4 @@ class NaiveBayes implements Classifier
return $samples; return $samples;
} }
/**
* @return mixed
*/
protected function predictSample(array $sample)
{
// Use NaiveBayes assumption for each label using:
// P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label)
// Then compare probability for each class to determine which label is most likely
$predictions = [];
foreach ($this->labels as $label) {
$p = $this->p[$label];
for ($i = 0; $i < $this->featureCount; ++$i) {
$Plf = $this->sampleProbability($sample, $i, $label);
$p += $Plf;
}
$predictions[$label] = $p;
}
arsort($predictions, SORT_NUMERIC);
reset($predictions);
return key($predictions);
}
} }

View File

@ -9,7 +9,7 @@ abstract class WeightedClassifier implements Classifier
/** /**
* @var array * @var array
*/ */
protected $weights; protected $weights = [];
/** /**
* Sets the array including a weight for each sample * Sets the array including a weight for each sample

View File

@ -6,5 +6,5 @@ namespace Phpml\Clustering;
interface Clusterer interface Clusterer
{ {
public function cluster(array $samples) : array; public function cluster(array $samples): array;
} }

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Clustering; namespace Phpml\Clustering;
use array_merge;
use Phpml\Math\Distance; use Phpml\Math\Distance;
use Phpml\Math\Distance\Euclidean; use Phpml\Math\Distance\Euclidean;
@ -26,7 +27,7 @@ class DBSCAN implements Clusterer
public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null) public function __construct(float $epsilon = 0.5, int $minSamples = 3, ?Distance $distanceMetric = null)
{ {
if (null === $distanceMetric) { if ($distanceMetric === null) {
$distanceMetric = new Euclidean(); $distanceMetric = new Euclidean();
} }
@ -35,7 +36,7 @@ class DBSCAN implements Clusterer
$this->distanceMetric = $distanceMetric; $this->distanceMetric = $distanceMetric;
} }
public function cluster(array $samples) : array public function cluster(array $samples): array
{ {
$clusters = []; $clusters = [];
$visited = []; $visited = [];
@ -44,6 +45,7 @@ class DBSCAN implements Clusterer
if (isset($visited[$index])) { if (isset($visited[$index])) {
continue; continue;
} }
$visited[$index] = true; $visited[$index] = true;
$regionSamples = $this->getSamplesInRegion($sample, $samples); $regionSamples = $this->getSamplesInRegion($sample, $samples);
@ -55,7 +57,7 @@ class DBSCAN implements Clusterer
return $clusters; return $clusters;
} }
private function getSamplesInRegion(array $localSample, array $samples) : array private function getSamplesInRegion(array $localSample, array $samples): array
{ {
$region = []; $region = [];
@ -68,7 +70,7 @@ class DBSCAN implements Clusterer
return $region; return $region;
} }
private function expandCluster(array $samples, array &$visited) : array private function expandCluster(array $samples, array &$visited): array
{ {
$cluster = []; $cluster = [];
@ -84,7 +86,8 @@ class DBSCAN implements Clusterer
$cluster[$index] = $sample; $cluster[$index] = $sample;
} }
$cluster = \array_merge($cluster, ...$clusterMerge);
$cluster = array_merge($cluster, ...$clusterMerge);
return $cluster; return $cluster;
} }

View File

@ -30,7 +30,7 @@ class FuzzyCMeans implements Clusterer
/** /**
* @var array|float[][] * @var array|float[][]
*/ */
private $membership; private $membership = [];
/** /**
* @var float * @var float
@ -55,7 +55,7 @@ class FuzzyCMeans implements Clusterer
/** /**
* @var array * @var array
*/ */
private $samples; private $samples = [];
/** /**
* @throws InvalidArgumentException * @throws InvalidArgumentException
@ -65,12 +65,63 @@ class FuzzyCMeans implements Clusterer
if ($clustersNumber <= 0) { if ($clustersNumber <= 0) {
throw InvalidArgumentException::invalidClustersNumber(); throw InvalidArgumentException::invalidClustersNumber();
} }
$this->clustersNumber = $clustersNumber; $this->clustersNumber = $clustersNumber;
$this->fuzziness = $fuzziness; $this->fuzziness = $fuzziness;
$this->epsilon = $epsilon; $this->epsilon = $epsilon;
$this->maxIterations = $maxIterations; $this->maxIterations = $maxIterations;
} }
public function getMembershipMatrix(): array
{
return $this->membership;
}
/**
* @param array|Point[] $samples
*/
public function cluster(array $samples): array
{
// Initialize variables, clusters and membership matrix
$this->sampleCount = count($samples);
$this->samples = &$samples;
$this->space = new Space(count($samples[0]));
$this->initClusters();
// Our goal is minimizing the objective value while
// executing the clustering steps at a maximum number of iterations
$lastObjective = 0.0;
$iterations = 0;
do {
// Update the membership matrix and cluster centers, respectively
$this->updateMembershipMatrix();
$this->updateClusters();
// Calculate the new value of the objective function
$objectiveVal = $this->getObjective();
$difference = abs($lastObjective - $objectiveVal);
$lastObjective = $objectiveVal;
} while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
// Attach (hard cluster) each data point to the nearest cluster
for ($k = 0; $k < $this->sampleCount; ++$k) {
$column = array_column($this->membership, $k);
arsort($column);
reset($column);
$i = key($column);
$cluster = $this->clusters[$i];
$cluster->attach(new Point($this->samples[$k]));
}
// Return grouped samples
$grouped = [];
foreach ($this->clusters as $cluster) {
$grouped[] = $cluster->getPoints();
}
return $grouped;
}
protected function initClusters(): void protected function initClusters(): void
{ {
// Membership array is a matrix of cluster number by sample counts // Membership array is a matrix of cluster number by sample counts
@ -87,7 +138,7 @@ class FuzzyCMeans implements Clusterer
$row = []; $row = [];
$total = 0.0; $total = 0.0;
for ($k = 0; $k < $cols; ++$k) { for ($k = 0; $k < $cols; ++$k) {
$val = rand(1, 5) / 10.0; $val = random_int(1, 5) / 10.0;
$row[] = $val; $row[] = $val;
$total += $val; $total += $val;
} }
@ -146,13 +197,13 @@ class FuzzyCMeans implements Clusterer
} }
} }
protected function getDistanceCalc(int $row, int $col) : float protected function getDistanceCalc(int $row, int $col): float
{ {
$sum = 0.0; $sum = 0.0;
$distance = new Euclidean(); $distance = new Euclidean();
$dist1 = $distance->distance( $dist1 = $distance->distance(
$this->clusters[$row]->getCoordinates(), $this->clusters[$row]->getCoordinates(),
$this->samples[$col] $this->samples[$col]
); );
for ($j = 0; $j < $this->clustersNumber; ++$j) { for ($j = 0; $j < $this->clustersNumber; ++$j) {
@ -187,54 +238,4 @@ class FuzzyCMeans implements Clusterer
return $sum; return $sum;
} }
public function getMembershipMatrix() : array
{
return $this->membership;
}
/**
* @param array|Point[] $samples
*/
public function cluster(array $samples) : array
{
// Initialize variables, clusters and membership matrix
$this->sampleCount = count($samples);
$this->samples = &$samples;
$this->space = new Space(count($samples[0]));
$this->initClusters();
// Our goal is minimizing the objective value while
// executing the clustering steps at a maximum number of iterations
$lastObjective = 0.0;
$iterations = 0;
do {
// Update the membership matrix and cluster centers, respectively
$this->updateMembershipMatrix();
$this->updateClusters();
// Calculate the new value of the objective function
$objectiveVal = $this->getObjective();
$difference = abs($lastObjective - $objectiveVal);
$lastObjective = $objectiveVal;
} while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
// Attach (hard cluster) each data point to the nearest cluster
for ($k = 0; $k < $this->sampleCount; ++$k) {
$column = array_column($this->membership, $k);
arsort($column);
reset($column);
$i = key($column);
$cluster = $this->clusters[$i];
$cluster->attach(new Point($this->samples[$k]));
}
// Return grouped samples
$grouped = [];
foreach ($this->clusters as $cluster) {
$grouped[] = $cluster->getPoints();
}
return $grouped;
}
} }

View File

@ -10,6 +10,7 @@ use Phpml\Exception\InvalidArgumentException;
class KMeans implements Clusterer class KMeans implements Clusterer
{ {
public const INIT_RANDOM = 1; public const INIT_RANDOM = 1;
public const INIT_KMEANS_PLUS_PLUS = 2; public const INIT_KMEANS_PLUS_PLUS = 2;
/** /**
@ -32,7 +33,7 @@ class KMeans implements Clusterer
$this->initialization = $initialization; $this->initialization = $initialization;
} }
public function cluster(array $samples) : array public function cluster(array $samples): array
{ {
$space = new Space(count($samples[0])); $space = new Space(count($samples[0]));
foreach ($samples as $sample) { foreach ($samples as $sample) {

View File

@ -28,7 +28,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
$this->points = new SplObjectStorage(); $this->points = new SplObjectStorage();
} }
public function getPoints() : array public function getPoints(): array
{ {
$points = []; $points = [];
foreach ($this->points as $point) { foreach ($this->points as $point) {
@ -38,7 +38,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
return $points; return $points;
} }
public function toArray() : array public function toArray(): array
{ {
return [ return [
'centroid' => parent::toArray(), 'centroid' => parent::toArray(),
@ -46,7 +46,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
]; ];
} }
public function attach(Point $point) : Point public function attach(Point $point): Point
{ {
if ($point instanceof self) { if ($point instanceof self) {
throw new LogicException('cannot attach a cluster to another'); throw new LogicException('cannot attach a cluster to another');
@ -57,7 +57,7 @@ class Cluster extends Point implements IteratorAggregate, Countable
return $point; return $point;
} }
public function detach(Point $point) : Point public function detach(Point $point): Point
{ {
$this->points->detach($point); $this->points->detach($point);
@ -76,7 +76,8 @@ class Cluster extends Point implements IteratorAggregate, Countable
public function updateCentroid(): void public function updateCentroid(): void
{ {
if (!$count = count($this->points)) { $count = count($this->points);
if (!$count) {
return; return;
} }

View File

@ -16,7 +16,7 @@ class Point implements ArrayAccess
/** /**
* @var array * @var array
*/ */
protected $coordinates; protected $coordinates = [];
public function __construct(array $coordinates) public function __construct(array $coordinates)
{ {
@ -24,7 +24,7 @@ class Point implements ArrayAccess
$this->coordinates = $coordinates; $this->coordinates = $coordinates;
} }
public function toArray() : array public function toArray(): array
{ {
return $this->coordinates; return $this->coordinates;
} }
@ -66,7 +66,7 @@ class Point implements ArrayAccess
return $minPoint; return $minPoint;
} }
public function getCoordinates() : array public function getCoordinates(): array
{ {
return $this->coordinates; return $this->coordinates;
} }

View File

@ -25,7 +25,7 @@ class Space extends SplObjectStorage
$this->dimension = $dimension; $this->dimension = $dimension;
} }
public function toArray() : array public function toArray(): array
{ {
$points = []; $points = [];
foreach ($this as $point) { foreach ($this as $point) {
@ -35,7 +35,7 @@ class Space extends SplObjectStorage
return ['points' => $points]; return ['points' => $points];
} }
public function newPoint(array $coordinates) : Point public function newPoint(array $coordinates): Point
{ {
if (count($coordinates) != $this->dimension) { if (count($coordinates) != $this->dimension) {
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space'); throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
@ -65,7 +65,7 @@ class Space extends SplObjectStorage
parent::attach($point, $data); parent::attach($point, $data);
} }
public function getDimension() : int public function getDimension(): int
{ {
return $this->dimension; return $this->dimension;
} }
@ -92,7 +92,7 @@ class Space extends SplObjectStorage
return [$min, $max]; return [$min, $max];
} }
public function getRandomPoint(Point $min, Point $max) : Point public function getRandomPoint(Point $min, Point $max): Point
{ {
$point = $this->newPoint(array_fill(0, $this->dimension, null)); $point = $this->newPoint(array_fill(0, $this->dimension, null));
@ -106,7 +106,7 @@ class Space extends SplObjectStorage
/** /**
* @return array|Cluster[] * @return array|Cluster[]
*/ */
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM) : array public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
{ {
$clusters = $this->initializeClusters($clustersNumber, $initMethod); $clusters = $this->initializeClusters($clustersNumber, $initMethod);
@ -119,7 +119,7 @@ class Space extends SplObjectStorage
/** /**
* @return array|Cluster[] * @return array|Cluster[]
*/ */
protected function initializeClusters(int $clustersNumber, int $initMethod) : array protected function initializeClusters(int $clustersNumber, int $initMethod): array
{ {
switch ($initMethod) { switch ($initMethod) {
case KMeans::INIT_RANDOM: case KMeans::INIT_RANDOM:
@ -139,7 +139,7 @@ class Space extends SplObjectStorage
return $clusters; return $clusters;
} }
protected function iterate($clusters) : bool protected function iterate($clusters): bool
{ {
$convergence = true; $convergence = true;
@ -177,19 +177,7 @@ class Space extends SplObjectStorage
return $convergence; return $convergence;
} }
private function initializeRandomClusters(int $clustersNumber) : array protected function initializeKMPPClusters(int $clustersNumber): array
{
$clusters = [];
[$min, $max] = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
return $clusters;
}
protected function initializeKMPPClusters(int $clustersNumber) : array
{ {
$clusters = []; $clusters = [];
$this->rewind(); $this->rewind();
@ -218,4 +206,16 @@ class Space extends SplObjectStorage
return $clusters; return $clusters;
} }
private function initializeRandomClusters(int $clustersNumber): array
{
$clusters = [];
[$min, $max] = $this->getBoundaries();
for ($n = 0; $n < $clustersNumber; ++$n) {
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
}
return $clusters;
}
} }

View File

@ -31,39 +31,40 @@ abstract class Split
public function __construct(Dataset $dataset, float $testSize = 0.3, ?int $seed = null) public function __construct(Dataset $dataset, float $testSize = 0.3, ?int $seed = null)
{ {
if (0 >= $testSize || 1 <= $testSize) { if ($testSize <= 0 || $testSize >= 1) {
throw InvalidArgumentException::percentNotInRange('testSize'); throw InvalidArgumentException::percentNotInRange('testSize');
} }
$this->seedGenerator($seed); $this->seedGenerator($seed);
$this->splitDataset($dataset, $testSize); $this->splitDataset($dataset, $testSize);
} }
abstract protected function splitDataset(Dataset $dataset, float $testSize); public function getTrainSamples(): array
public function getTrainSamples() : array
{ {
return $this->trainSamples; return $this->trainSamples;
} }
public function getTestSamples() : array public function getTestSamples(): array
{ {
return $this->testSamples; return $this->testSamples;
} }
public function getTrainLabels() : array public function getTrainLabels(): array
{ {
return $this->trainLabels; return $this->trainLabels;
} }
public function getTestLabels() : array public function getTestLabels(): array
{ {
return $this->testLabels; return $this->testLabels;
} }
abstract protected function splitDataset(Dataset $dataset, float $testSize);
protected function seedGenerator(?int $seed = null): void protected function seedGenerator(?int $seed = null): void
{ {
if (null === $seed) { if ($seed === null) {
mt_srand(); mt_srand();
} else { } else {
mt_srand($seed); mt_srand($seed);

View File

@ -21,7 +21,7 @@ class StratifiedRandomSplit extends RandomSplit
/** /**
* @return Dataset[]|array * @return Dataset[]|array
*/ */
private function splitByTarget(Dataset $dataset) : array private function splitByTarget(Dataset $dataset): array
{ {
$targets = $dataset->getTargets(); $targets = $dataset->getTargets();
$samples = $dataset->getSamples(); $samples = $dataset->getSamples();
@ -38,7 +38,7 @@ class StratifiedRandomSplit extends RandomSplit
return $datasets; return $datasets;
} }
private function createDatasets(array $uniqueTargets, array $split) : array private function createDatasets(array $uniqueTargets, array $split): array
{ {
$datasets = []; $datasets = [];
foreach ($uniqueTargets as $target) { foreach ($uniqueTargets as $target) {

View File

@ -31,12 +31,12 @@ class ArrayDataset implements Dataset
$this->targets = $targets; $this->targets = $targets;
} }
public function getSamples() : array public function getSamples(): array
{ {
return $this->samples; return $this->samples;
} }
public function getTargets() : array public function getTargets(): array
{ {
return $this->targets; return $this->targets;
} }

View File

@ -11,7 +11,7 @@ class CsvDataset extends ArrayDataset
/** /**
* @var array * @var array
*/ */
protected $columnNames; protected $columnNames = [];
/** /**
* @throws FileException * @throws FileException
@ -22,7 +22,8 @@ class CsvDataset extends ArrayDataset
throw FileException::missingFile(basename($filepath)); throw FileException::missingFile(basename($filepath));
} }
if (false === $handle = fopen($filepath, 'rb')) { $handle = fopen($filepath, 'rb');
if ($handle === false) {
throw FileException::cantOpenFile(basename($filepath)); throw FileException::cantOpenFile(basename($filepath));
} }
@ -44,7 +45,7 @@ class CsvDataset extends ArrayDataset
parent::__construct($samples, $targets); parent::__construct($samples, $targets);
} }
public function getColumnNames() : array public function getColumnNames(): array
{ {
return $this->columnNames; return $this->columnNames;
} }

View File

@ -9,10 +9,10 @@ interface Dataset
/** /**
* @return array * @return array
*/ */
public function getSamples() : array; public function getSamples(): array;
/** /**
* @return array * @return array
*/ */
public function getTargets() : array; public function getTargets(): array;
} }

View File

@ -84,7 +84,7 @@ abstract class EigenTransformerBase
/** /**
* Returns the reduced data * Returns the reduced data
*/ */
protected function reduce(array $data) : array protected function reduce(array $data): array
{ {
$m1 = new Matrix($data); $m1 = new Matrix($data);
$m2 = new Matrix($this->eigVectors); $m2 = new Matrix($this->eigVectors);

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\DimensionReduction; namespace Phpml\DimensionReduction;
use Closure;
use Exception;
use Phpml\Math\Distance\Euclidean; use Phpml\Math\Distance\Euclidean;
use Phpml\Math\Distance\Manhattan; use Phpml\Math\Distance\Manhattan;
use Phpml\Math\Matrix; use Phpml\Math\Matrix;
@ -11,8 +13,11 @@ use Phpml\Math\Matrix;
class KernelPCA extends PCA class KernelPCA extends PCA
{ {
public const KERNEL_RBF = 1; public const KERNEL_RBF = 1;
public const KERNEL_SIGMOID = 2; public const KERNEL_SIGMOID = 2;
public const KERNEL_LAPLACIAN = 3; public const KERNEL_LAPLACIAN = 3;
public const KERNEL_LINEAR = 4; public const KERNEL_LINEAR = 4;
/** /**
@ -34,7 +39,7 @@ class KernelPCA extends PCA
* *
* @var array * @var array
*/ */
protected $data; protected $data = [];
/** /**
* Kernel principal component analysis (KernelPCA) is an extension of PCA using * Kernel principal component analysis (KernelPCA) is an extension of PCA using
@ -54,7 +59,7 @@ class KernelPCA extends PCA
{ {
$availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR]; $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
if (!in_array($kernel, $availableKernels)) { if (!in_array($kernel, $availableKernels)) {
throw new \Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian'); throw new Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
} }
parent::__construct($totalVariance, $numFeatures); parent::__construct($totalVariance, $numFeatures);
@ -69,7 +74,7 @@ class KernelPCA extends PCA
* $data is an n-by-m matrix and returned array is * $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m * n-by-k matrix where k <= m
*/ */
public function fit(array $data) : array public function fit(array $data): array
{ {
$numRows = count($data); $numRows = count($data);
$this->data = $data; $this->data = $data;
@ -88,11 +93,32 @@ class KernelPCA extends PCA
return Matrix::transposeArray($this->eigVectors); return Matrix::transposeArray($this->eigVectors);
} }
/**
* Transforms the given sample to a lower dimensional vector by using
* the variables obtained during the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
}
if (is_array($sample[0])) {
throw new Exception('KernelPCA::transform() accepts only one-dimensional arrays');
}
$pairs = $this->getDistancePairs($sample);
return $this->projectSample($pairs);
}
/** /**
* Calculates similarity matrix by use of selected kernel function<br> * Calculates similarity matrix by use of selected kernel function<br>
* An n-by-m matrix is given and an n-by-n matrix is returned * An n-by-m matrix is given and an n-by-n matrix is returned
*/ */
protected function calculateKernelMatrix(array $data, int $numRows) : array protected function calculateKernelMatrix(array $data, int $numRows): array
{ {
$kernelFunc = $this->getKernel(); $kernelFunc = $this->getKernel();
@ -116,7 +142,7 @@ class KernelPCA extends PCA
* *
* K = K N.K K.N + N.K.N where N is n-by-n matrix filled with 1/n * K = K N.K K.N + N.K.N where N is n-by-n matrix filled with 1/n
*/ */
protected function centerMatrix(array $matrix, int $n) : array protected function centerMatrix(array $matrix, int $n): array
{ {
$N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n)); $N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
$N = new Matrix($N, false); $N = new Matrix($N, false);
@ -140,7 +166,7 @@ class KernelPCA extends PCA
* *
* @throws \Exception * @throws \Exception
*/ */
protected function getKernel(): \Closure protected function getKernel(): Closure
{ {
switch ($this->kernel) { switch ($this->kernel) {
case self::KERNEL_LINEAR: case self::KERNEL_LINEAR:
@ -173,11 +199,11 @@ class KernelPCA extends PCA
}; };
default: default:
throw new \Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel)); throw new Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
} }
} }
protected function getDistancePairs(array $sample) : array protected function getDistancePairs(array $sample): array
{ {
$kernel = $this->getKernel(); $kernel = $this->getKernel();
@ -189,7 +215,7 @@ class KernelPCA extends PCA
return $pairs; return $pairs;
} }
protected function projectSample(array $pairs) : array protected function projectSample(array $pairs): array
{ {
// Normalize eigenvectors by eig = eigVectors / eigValues // Normalize eigenvectors by eig = eigVectors / eigValues
$func = function ($eigVal, $eigVect) { $func = function ($eigVal, $eigVect) {
@ -203,25 +229,4 @@ class KernelPCA extends PCA
// return k.dot(eig) // return k.dot(eig)
return Matrix::dot($pairs, $eig); return Matrix::dot($pairs, $eig);
} }
/**
* Transforms the given sample to a lower dimensional vector by using
* the variables obtained during the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample) : array
{
if (!$this->fit) {
throw new \Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
}
if (is_array($sample[0])) {
throw new \Exception('KernelPCA::transform() accepts only one-dimensional arrays');
}
$pairs = $this->getDistancePairs($sample);
return $this->projectSample($pairs);
}
} }

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\DimensionReduction; namespace Phpml\DimensionReduction;
use Exception;
use Phpml\Math\Matrix; use Phpml\Math\Matrix;
class LDA extends EigenTransformerBase class LDA extends EigenTransformerBase
@ -16,22 +17,22 @@ class LDA extends EigenTransformerBase
/** /**
* @var array * @var array
*/ */
public $labels; public $labels = [];
/** /**
* @var array * @var array
*/ */
public $means; public $means = [];
/** /**
* @var array * @var array
*/ */
public $counts; public $counts = [];
/** /**
* @var float[] * @var float[]
*/ */
public $overallMean; public $overallMean = [];
/** /**
* Linear Discriminant Analysis (LDA) is used to reduce the dimensionality * Linear Discriminant Analysis (LDA) is used to reduce the dimensionality
@ -50,18 +51,21 @@ class LDA extends EigenTransformerBase
public function __construct(?float $totalVariance = null, ?int $numFeatures = null) public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
{ {
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
throw new \Exception('Total variance can be a value between 0.1 and 0.99'); throw new Exception('Total variance can be a value between 0.1 and 0.99');
} }
if ($numFeatures !== null && $numFeatures <= 0) { if ($numFeatures !== null && $numFeatures <= 0) {
throw new \Exception('Number of features to be preserved should be greater than 0'); throw new Exception('Number of features to be preserved should be greater than 0');
} }
if ($totalVariance !== null && $numFeatures !== null) { if ($totalVariance !== null && $numFeatures !== null) {
throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm'); throw new Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
} }
if ($numFeatures !== null) { if ($numFeatures !== null) {
$this->numFeatures = $numFeatures; $this->numFeatures = $numFeatures;
} }
if ($totalVariance !== null) { if ($totalVariance !== null) {
$this->totalVariance = $totalVariance; $this->totalVariance = $totalVariance;
} }
@ -70,7 +74,7 @@ class LDA extends EigenTransformerBase
/** /**
* Trains the algorithm to transform the given data to a lower dimensional space. * Trains the algorithm to transform the given data to a lower dimensional space.
*/ */
public function fit(array $data, array $classes) : array public function fit(array $data, array $classes): array
{ {
$this->labels = $this->getLabels($classes); $this->labels = $this->getLabels($classes);
$this->means = $this->calculateMeans($data, $classes); $this->means = $this->calculateMeans($data, $classes);
@ -86,10 +90,29 @@ class LDA extends EigenTransformerBase
return $this->reduce($data); return $this->reduce($data);
} }
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new Exception('LDA has not been fitted with respect to original dataset, please run LDA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
return $this->reduce($sample);
}
/** /**
* Returns unique labels in the dataset * Returns unique labels in the dataset
*/ */
protected function getLabels(array $classes) : array protected function getLabels(array $classes): array
{ {
$counts = array_count_values($classes); $counts = array_count_values($classes);
@ -100,7 +123,7 @@ class LDA extends EigenTransformerBase
* Calculates mean of each column for each class and returns * Calculates mean of each column for each class and returns
* n by m matrix where n is number of labels and m is number of columns * n by m matrix where n is number of labels and m is number of columns
*/ */
protected function calculateMeans(array $data, array $classes) : array protected function calculateMeans(array $data, array $classes): array
{ {
$means = []; $means = [];
$counts = []; $counts = [];
@ -113,6 +136,7 @@ class LDA extends EigenTransformerBase
if (!isset($means[$label][$col])) { if (!isset($means[$label][$col])) {
$means[$label][$col] = 0.0; $means[$label][$col] = 0.0;
} }
$means[$label][$col] += $val; $means[$label][$col] += $val;
$overallMean[$col] += $val; $overallMean[$col] += $val;
} }
@ -146,7 +170,7 @@ class LDA extends EigenTransformerBase
* is a n by m matrix where n is number of classes and * is a n by m matrix where n is number of classes and
* m is number of columns * m is number of columns
*/ */
protected function calculateClassVar(array $data, array $classes) : Matrix protected function calculateClassVar(array $data, array $classes): Matrix
{ {
// s is an n (number of classes) by m (number of column) matrix // s is an n (number of classes) by m (number of column) matrix
$s = array_fill(0, count($data[0]), array_fill(0, count($data[0]), 0)); $s = array_fill(0, count($data[0]), array_fill(0, count($data[0]), 0));
@ -169,7 +193,7 @@ class LDA extends EigenTransformerBase
* is an n by m matrix where n is number of classes and * is an n by m matrix where n is number of classes and
* m is number of columns * m is number of columns
*/ */
protected function calculateClassCov() : Matrix protected function calculateClassCov(): Matrix
{ {
// s is an n (number of classes) by m (number of column) matrix // s is an n (number of classes) by m (number of column) matrix
$s = array_fill(0, count($this->overallMean), array_fill(0, count($this->overallMean), 0)); $s = array_fill(0, count($this->overallMean), array_fill(0, count($this->overallMean), 0));
@ -187,7 +211,7 @@ class LDA extends EigenTransformerBase
/** /**
* Returns the result of the calculation (x - m)T.(x - m) * Returns the result of the calculation (x - m)T.(x - m)
*/ */
protected function calculateVar(array $row, array $means) : Matrix protected function calculateVar(array $row, array $means): Matrix
{ {
$x = new Matrix($row, false); $x = new Matrix($row, false);
$m = new Matrix($means, false); $m = new Matrix($means, false);
@ -195,23 +219,4 @@ class LDA extends EigenTransformerBase
return $diff->transpose()->multiply($diff); return $diff->transpose()->multiply($diff);
} }
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample) : array
{
if (!$this->fit) {
throw new \Exception('LDA has not been fitted with respect to original dataset, please run LDA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
return $this->reduce($sample);
}
} }

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\DimensionReduction; namespace Phpml\DimensionReduction;
use Exception;
use Phpml\Math\Statistic\Covariance; use Phpml\Math\Statistic\Covariance;
use Phpml\Math\Statistic\Mean; use Phpml\Math\Statistic\Mean;
@ -35,18 +36,21 @@ class PCA extends EigenTransformerBase
public function __construct(?float $totalVariance = null, ?int $numFeatures = null) public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
{ {
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
throw new \Exception('Total variance can be a value between 0.1 and 0.99'); throw new Exception('Total variance can be a value between 0.1 and 0.99');
} }
if ($numFeatures !== null && $numFeatures <= 0) { if ($numFeatures !== null && $numFeatures <= 0) {
throw new \Exception('Number of features to be preserved should be greater than 0'); throw new Exception('Number of features to be preserved should be greater than 0');
} }
if ($totalVariance !== null && $numFeatures !== null) { if ($totalVariance !== null && $numFeatures !== null) {
throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm'); throw new Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm');
} }
if ($numFeatures !== null) { if ($numFeatures !== null) {
$this->numFeatures = $numFeatures; $this->numFeatures = $numFeatures;
} }
if ($totalVariance !== null) { if ($totalVariance !== null) {
$this->totalVariance = $totalVariance; $this->totalVariance = $totalVariance;
} }
@ -58,7 +62,7 @@ class PCA extends EigenTransformerBase
* $data is an n-by-m matrix and returned array is * $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m * n-by-k matrix where k <= m
*/ */
public function fit(array $data) : array public function fit(array $data): array
{ {
$n = count($data[0]); $n = count($data[0]);
@ -73,6 +77,27 @@ class PCA extends EigenTransformerBase
return $this->reduce($data); return $this->reduce($data);
} }
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample): array
{
if (!$this->fit) {
throw new Exception('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
$sample = $this->normalize($sample, count($sample[0]));
return $this->reduce($sample);
}
protected function calculateMeans(array $data, int $n): void protected function calculateMeans(array $data, int $n): void
{ {
// Calculate means for each dimension // Calculate means for each dimension
@ -87,7 +112,7 @@ class PCA extends EigenTransformerBase
* Normalization of the data includes subtracting mean from * Normalization of the data includes subtracting mean from
* each dimension therefore dimensions will be centered to zero * each dimension therefore dimensions will be centered to zero
*/ */
protected function normalize(array $data, int $n) : array protected function normalize(array $data, int $n): array
{ {
if (empty($this->means)) { if (empty($this->means)) {
$this->calculateMeans($data, $n); $this->calculateMeans($data, $n);
@ -102,25 +127,4 @@ class PCA extends EigenTransformerBase
return $data; return $data;
} }
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @throws \Exception
*/
public function transform(array $sample) : array
{
if (!$this->fit) {
throw new \Exception('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
}
if (!is_array($sample[0])) {
$sample = [$sample];
}
$sample = $this->normalize($sample, count($sample[0]));
return $this->reduce($sample);
}
} }

View File

@ -4,9 +4,11 @@ declare(strict_types=1);
namespace Phpml\Exception; namespace Phpml\Exception;
class DatasetException extends \Exception use Exception;
class DatasetException extends Exception
{ {
public static function missingFolder(string $path) : DatasetException public static function missingFolder(string $path): self
{ {
return new self(sprintf('Dataset root folder "%s" missing.', $path)); return new self(sprintf('Dataset root folder "%s" missing.', $path));
} }

View File

@ -4,19 +4,21 @@ declare(strict_types=1);
namespace Phpml\Exception; namespace Phpml\Exception;
class FileException extends \Exception use Exception;
class FileException extends Exception
{ {
public static function missingFile(string $filepath) : FileException public static function missingFile(string $filepath): self
{ {
return new self(sprintf('File "%s" missing.', $filepath)); return new self(sprintf('File "%s" missing.', $filepath));
} }
public static function cantOpenFile(string $filepath) : FileException public static function cantOpenFile(string $filepath): self
{ {
return new self(sprintf('File "%s" can\'t be open.', $filepath)); return new self(sprintf('File "%s" can\'t be open.', $filepath));
} }
public static function cantSaveFile(string $filepath) : FileException public static function cantSaveFile(string $filepath): self
{ {
return new self(sprintf('File "%s" can\'t be saved.', $filepath)); return new self(sprintf('File "%s" can\'t be saved.', $filepath));
} }

View File

@ -4,39 +4,41 @@ declare(strict_types=1);
namespace Phpml\Exception; namespace Phpml\Exception;
class InvalidArgumentException extends \Exception use Exception;
class InvalidArgumentException extends Exception
{ {
public static function arraySizeNotMatch() : InvalidArgumentException public static function arraySizeNotMatch(): self
{ {
return new self('Size of given arrays does not match'); return new self('Size of given arrays does not match');
} }
public static function percentNotInRange($name) : InvalidArgumentException public static function percentNotInRange($name): self
{ {
return new self(sprintf('%s must be between 0.0 and 1.0', $name)); return new self(sprintf('%s must be between 0.0 and 1.0', $name));
} }
public static function arrayCantBeEmpty() : InvalidArgumentException public static function arrayCantBeEmpty(): self
{ {
return new self('The array has zero elements'); return new self('The array has zero elements');
} }
public static function arraySizeToSmall(int $minimumSize = 2) : InvalidArgumentException public static function arraySizeToSmall(int $minimumSize = 2): self
{ {
return new self(sprintf('The array must have at least %d elements', $minimumSize)); return new self(sprintf('The array must have at least %d elements', $minimumSize));
} }
public static function matrixDimensionsDidNotMatch() : InvalidArgumentException public static function matrixDimensionsDidNotMatch(): self
{ {
return new self('Matrix dimensions did not match'); return new self('Matrix dimensions did not match');
} }
public static function inconsistentMatrixSupplied() : InvalidArgumentException public static function inconsistentMatrixSupplied(): self
{ {
return new self('Inconsistent matrix supplied'); return new self('Inconsistent matrix supplied');
} }
public static function invalidClustersNumber() : InvalidArgumentException public static function invalidClustersNumber(): self
{ {
return new self('Invalid clusters number'); return new self('Invalid clusters number');
} }
@ -44,57 +46,57 @@ class InvalidArgumentException extends \Exception
/** /**
* @param mixed $target * @param mixed $target
*/ */
public static function invalidTarget($target) : InvalidArgumentException public static function invalidTarget($target): self
{ {
return new self(sprintf('Target with value "%s" is not part of the accepted classes', $target)); return new self(sprintf('Target with value "%s" is not part of the accepted classes', $target));
} }
public static function invalidStopWordsLanguage(string $language) : InvalidArgumentException public static function invalidStopWordsLanguage(string $language): self
{ {
return new self(sprintf('Can\'t find "%s" language for StopWords', $language)); return new self(sprintf('Can\'t find "%s" language for StopWords', $language));
} }
public static function invalidLayerNodeClass() : InvalidArgumentException public static function invalidLayerNodeClass(): self
{ {
return new self('Layer node class must implement Node interface'); return new self('Layer node class must implement Node interface');
} }
public static function invalidLayersNumber() : InvalidArgumentException public static function invalidLayersNumber(): self
{ {
return new self('Provide at least 1 hidden layer'); return new self('Provide at least 1 hidden layer');
} }
public static function invalidClassesNumber() : InvalidArgumentException public static function invalidClassesNumber(): self
{ {
return new self('Provide at least 2 different classes'); return new self('Provide at least 2 different classes');
} }
public static function inconsistentClasses() : InvalidArgumentException public static function inconsistentClasses(): self
{ {
return new self('The provided classes don\'t match the classes provided in the constructor'); return new self('The provided classes don\'t match the classes provided in the constructor');
} }
public static function fileNotFound(string $file) : InvalidArgumentException public static function fileNotFound(string $file): self
{ {
return new self(sprintf('File "%s" not found', $file)); return new self(sprintf('File "%s" not found', $file));
} }
public static function fileNotExecutable(string $file) : InvalidArgumentException public static function fileNotExecutable(string $file): self
{ {
return new self(sprintf('File "%s" is not executable', $file)); return new self(sprintf('File "%s" is not executable', $file));
} }
public static function pathNotFound(string $path) : InvalidArgumentException public static function pathNotFound(string $path): self
{ {
return new self(sprintf('The specified path "%s" does not exist', $path)); return new self(sprintf('The specified path "%s" does not exist', $path));
} }
public static function pathNotWritable(string $path) : InvalidArgumentException public static function pathNotWritable(string $path): self
{ {
return new self(sprintf('The specified path "%s" is not writable', $path)); return new self(sprintf('The specified path "%s" is not writable', $path));
} }
public static function invalidOperator(string $operator) : InvalidArgumentException public static function invalidOperator(string $operator): self
{ {
return new self(sprintf('Invalid operator "%s" provided', $operator)); return new self(sprintf('Invalid operator "%s" provided', $operator));
} }

View File

@ -4,19 +4,21 @@ declare(strict_types=1);
namespace Phpml\Exception; namespace Phpml\Exception;
class MatrixException extends \Exception use Exception;
class MatrixException extends Exception
{ {
public static function notSquareMatrix() : MatrixException public static function notSquareMatrix(): self
{ {
return new self('Matrix is not square matrix'); return new self('Matrix is not square matrix');
} }
public static function columnOutOfRange() : MatrixException public static function columnOutOfRange(): self
{ {
return new self('Column out of range'); return new self('Column out of range');
} }
public static function singularMatrix() : MatrixException public static function singularMatrix(): self
{ {
return new self('Matrix is singular'); return new self('Matrix is singular');
} }

View File

@ -4,9 +4,11 @@ declare(strict_types=1);
namespace Phpml\Exception; namespace Phpml\Exception;
class NormalizerException extends \Exception use Exception;
class NormalizerException extends Exception
{ {
public static function unknownNorm() : NormalizerException public static function unknownNorm(): self
{ {
return new self('Unknown norm supplied.'); return new self('Unknown norm supplied.');
} }

View File

@ -4,14 +4,16 @@ declare(strict_types=1);
namespace Phpml\Exception; namespace Phpml\Exception;
class SerializeException extends \Exception use Exception;
class SerializeException extends Exception
{ {
public static function cantUnserialize(string $filepath) : SerializeException public static function cantUnserialize(string $filepath): self
{ {
return new self(sprintf('"%s" can not be unserialized.', $filepath)); return new self(sprintf('"%s" can not be unserialized.', $filepath));
} }
public static function cantSerialize(string $classname) : SerializeException public static function cantSerialize(string $classname): self
{ {
return new self(sprintf('Class "%s" can not be serialized.', $classname)); return new self(sprintf('Class "%s" can not be serialized.', $classname));
} }

View File

@ -11,19 +11,19 @@ class StopWords
/** /**
* @var array * @var array
*/ */
protected $stopWords; protected $stopWords = [];
public function __construct(array $stopWords) public function __construct(array $stopWords)
{ {
$this->stopWords = array_fill_keys($stopWords, true); $this->stopWords = array_fill_keys($stopWords, true);
} }
public function isStopWord(string $token) : bool public function isStopWord(string $token): bool
{ {
return isset($this->stopWords[$token]); return isset($this->stopWords[$token]);
} }
public static function factory(string $language = 'English') : StopWords public static function factory(string $language = 'English'): self
{ {
$className = __NAMESPACE__."\\StopWords\\$language"; $className = __NAMESPACE__."\\StopWords\\$language";

View File

@ -11,7 +11,7 @@ class TfIdfTransformer implements Transformer
/** /**
* @var array * @var array
*/ */
private $idf; private $idf = [];
public function __construct(?array $samples = null) public function __construct(?array $samples = null)
{ {

View File

@ -27,21 +27,18 @@ class TokenCountVectorizer implements Transformer
/** /**
* @var array * @var array
*/ */
private $vocabulary; private $vocabulary = [];
/** /**
* @var array * @var array
*/ */
private $frequencies; private $frequencies = [];
public function __construct(Tokenizer $tokenizer, ?StopWords $stopWords = null, float $minDF = 0.0) public function __construct(Tokenizer $tokenizer, ?StopWords $stopWords = null, float $minDF = 0.0)
{ {
$this->tokenizer = $tokenizer; $this->tokenizer = $tokenizer;
$this->stopWords = $stopWords; $this->stopWords = $stopWords;
$this->minDF = $minDF; $this->minDF = $minDF;
$this->vocabulary = [];
$this->frequencies = [];
} }
public function fit(array $samples): void public function fit(array $samples): void
@ -58,7 +55,7 @@ class TokenCountVectorizer implements Transformer
$this->checkDocumentFrequency($samples); $this->checkDocumentFrequency($samples);
} }
public function getVocabulary() : array public function getVocabulary(): array
{ {
return array_flip($this->vocabulary); return array_flip($this->vocabulary);
} }
@ -80,7 +77,7 @@ class TokenCountVectorizer implements Transformer
foreach ($tokens as $token) { foreach ($tokens as $token) {
$index = $this->getTokenIndex($token); $index = $this->getTokenIndex($token);
if (false !== $index) { if ($index !== false) {
$this->updateFrequency($token); $this->updateFrequency($token);
if (!isset($counts[$index])) { if (!isset($counts[$index])) {
$counts[$index] = 0; $counts[$index] = 0;
@ -155,7 +152,7 @@ class TokenCountVectorizer implements Transformer
} }
} }
private function getBeyondMinimumIndexes(int $samplesCount) : array private function getBeyondMinimumIndexes(int $samplesCount): array
{ {
$indexes = []; $indexes = [];
foreach ($this->frequencies as $token => $frequency) { foreach ($this->frequencies as $token => $frequency) {

View File

@ -36,6 +36,18 @@ trait OneVsRest
$this->trainBylabel($samples, $targets); $this->trainBylabel($samples, $targets);
} }
/**
* Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers.
*/
public function reset(): void
{
$this->classifiers = [];
$this->allLabels = [];
$this->costValues = [];
$this->resetBinary();
}
protected function trainByLabel(array $samples, array $targets, array $allLabels = []): void protected function trainByLabel(array $samples, array $targets, array $allLabels = []): void
{ {
// Overwrites the current value if it exist. $allLabels must be provided for each partialTrain run. // Overwrites the current value if it exist. $allLabels must be provided for each partialTrain run.
@ -44,6 +56,7 @@ trait OneVsRest
} else { } else {
$this->allLabels = array_keys(array_count_values($targets)); $this->allLabels = array_keys(array_count_values($targets));
} }
sort($this->allLabels, SORT_STRING); sort($this->allLabels, SORT_STRING);
// If there are only two targets, then there is no need to perform OvR // If there are only two targets, then there is no need to perform OvR
@ -77,18 +90,6 @@ trait OneVsRest
} }
} }
/**
* Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers.
*/
public function reset(): void
{
$this->classifiers = [];
$this->allLabels = [];
$this->costValues = [];
$this->resetBinary();
}
/** /**
* Returns an instance of the current class after cleaning up OneVsRest stuff. * Returns an instance of the current class after cleaning up OneVsRest stuff.
* *
@ -105,29 +106,6 @@ trait OneVsRest
return $classifier; return $classifier;
} }
/**
* Groups all targets into two groups: Targets equal to
* the given label and the others
*
* $targets is not passed by reference nor contains objects so this method
* changes will not affect the caller $targets array.
*
* @param mixed $label
*
* @return array Binarized targets and target's labels
*/
private function binarizeTargets(array $targets, $label) : array
{
$notLabel = "not_$label";
foreach ($targets as $key => $target) {
$targets[$key] = $target == $label ? $label : $notLabel;
}
$labels = [$label, $notLabel];
return [$targets, $labels];
}
/** /**
* @return mixed * @return mixed
*/ */
@ -155,8 +133,6 @@ trait OneVsRest
/** /**
* To be overwritten by OneVsRest classifiers. * To be overwritten by OneVsRest classifiers.
*
* @return void
*/ */
abstract protected function resetBinary(): void; abstract protected function resetBinary(): void;
@ -174,4 +150,27 @@ trait OneVsRest
* @return mixed * @return mixed
*/ */
abstract protected function predictSampleBinary(array $sample); abstract protected function predictSampleBinary(array $sample);
/**
* Groups all targets into two groups: Targets equal to
* the given label and the others
*
* $targets is not passed by reference nor contains objects so this method
* changes will not affect the caller $targets array.
*
* @param mixed $label
*
* @return array Binarized targets and target's labels
*/
private function binarizeTargets(array $targets, $label): array
{
$notLabel = "not_$label";
foreach ($targets as $key => $target) {
$targets[$key] = $target == $label ? $label : $notLabel;
}
$labels = [$label, $notLabel];
return [$targets, $labels];
}
} }

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer; namespace Phpml\Helper\Optimizer;
use Closure;
/** /**
* Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x * Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
* See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method) * See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
@ -17,7 +19,7 @@ namespace Phpml\Helper\Optimizer;
*/ */
class ConjugateGradient extends GD class ConjugateGradient extends GD
{ {
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{ {
$this->samples = $samples; $this->samples = $samples;
$this->targets = $targets; $this->targets = $targets;
@ -25,7 +27,7 @@ class ConjugateGradient extends GD
$this->sampleCount = count($samples); $this->sampleCount = count($samples);
$this->costValues = []; $this->costValues = [];
$d = mp::muls($this->gradient($this->theta), -1); $d = MP::muls($this->gradient($this->theta), -1);
for ($i = 0; $i < $this->maxIterations; ++$i) { for ($i = 0; $i < $this->maxIterations; ++$i) {
// Obtain α that minimizes f(θ + α.d) // Obtain α that minimizes f(θ + α.d)
@ -59,7 +61,7 @@ class ConjugateGradient extends GD
* Executes the callback function for the problem and returns * Executes the callback function for the problem and returns
* sum of the gradient for all samples & targets. * sum of the gradient for all samples & targets.
*/ */
protected function gradient(array $theta) : array protected function gradient(array $theta): array
{ {
[, $gradient] = parent::gradient($theta); [, $gradient] = parent::gradient($theta);
@ -69,7 +71,7 @@ class ConjugateGradient extends GD
/** /**
* Returns the value of f(x) for given solution * Returns the value of f(x) for given solution
*/ */
protected function cost(array $theta) : float protected function cost(array $theta): float
{ {
[$cost] = parent::gradient($theta); [$cost] = parent::gradient($theta);
@ -90,14 +92,14 @@ class ConjugateGradient extends GD
* b-1) If cost function decreases, continue enlarging alpha * b-1) If cost function decreases, continue enlarging alpha
* b-2) If cost function increases, take the midpoint and try again * b-2) If cost function increases, take the midpoint and try again
*/ */
protected function getAlpha(float $d) : float protected function getAlpha(float $d): float
{ {
$small = 0.0001 * $d; $small = 0.0001 * $d;
$large = 0.01 * $d; $large = 0.01 * $d;
// Obtain θ + α.d for two initial values, x0 and x1 // Obtain θ + α.d for two initial values, x0 and x1
$x0 = mp::adds($this->theta, $small); $x0 = MP::adds($this->theta, $small);
$x1 = mp::adds($this->theta, $large); $x1 = MP::adds($this->theta, $large);
$epsilon = 0.0001; $epsilon = 0.0001;
$iteration = 0; $iteration = 0;
@ -113,9 +115,9 @@ class ConjugateGradient extends GD
if ($fx1 < $fx0) { if ($fx1 < $fx0) {
$x0 = $x1; $x0 = $x1;
$x1 = mp::adds($x1, 0.01); // Enlarge second $x1 = MP::adds($x1, 0.01); // Enlarge second
} else { } else {
$x1 = mp::divs(mp::add($x1, $x0), 2.0); $x1 = MP::divs(MP::add($x1, $x0), 2.0);
} // Get to the midpoint } // Get to the midpoint
$error = $fx1 / $this->dimensions; $error = $fx1 / $this->dimensions;
@ -135,7 +137,7 @@ class ConjugateGradient extends GD
* *
* θ(k+1) = θ(k) + α.d * θ(k+1) = θ(k) + α.d
*/ */
protected function getNewTheta(float $alpha, array $d) : array protected function getNewTheta(float $alpha, array $d): array
{ {
$theta = $this->theta; $theta = $this->theta;
@ -164,7 +166,7 @@ class ConjugateGradient extends GD
* See: * See:
* R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149154. * R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149154.
*/ */
protected function getBeta(array $newTheta) : float protected function getBeta(array $newTheta): float
{ {
$dNew = array_sum($this->gradient($newTheta)); $dNew = array_sum($this->gradient($newTheta));
$dOld = array_sum($this->gradient($this->theta)) + 1e-100; $dOld = array_sum($this->gradient($this->theta)) + 1e-100;
@ -177,11 +179,11 @@ class ConjugateGradient extends GD
* *
* d(k+1) =∇f(x(k+1)) + β(k).d(k) * d(k+1) =∇f(x(k+1)) + β(k).d(k)
*/ */
protected function getNewDirection(array $theta, float $beta, array $d) : array protected function getNewDirection(array $theta, float $beta, array $d): array
{ {
$grad = $this->gradient($theta); $grad = $this->gradient($theta);
return mp::add(mp::muls($grad, -1), mp::muls($d, $beta)); return MP::add(MP::muls($grad, -1), MP::muls($d, $beta));
} }
} }
@ -189,12 +191,12 @@ class ConjugateGradient extends GD
* Handles element-wise vector operations between vector-vector * Handles element-wise vector operations between vector-vector
* and vector-scalar variables * and vector-scalar variables
*/ */
class mp class MP
{ {
/** /**
* Element-wise <b>multiplication</b> of two vectors of the same size * Element-wise <b>multiplication</b> of two vectors of the same size
*/ */
public static function mul(array $m1, array $m2) : array public static function mul(array $m1, array $m2): array
{ {
$res = []; $res = [];
foreach ($m1 as $i => $val) { foreach ($m1 as $i => $val) {
@ -207,7 +209,7 @@ class mp
/** /**
* Element-wise <b>division</b> of two vectors of the same size * Element-wise <b>division</b> of two vectors of the same size
*/ */
public static function div(array $m1, array $m2) : array public static function div(array $m1, array $m2): array
{ {
$res = []; $res = [];
foreach ($m1 as $i => $val) { foreach ($m1 as $i => $val) {
@ -220,7 +222,7 @@ class mp
/** /**
* Element-wise <b>addition</b> of two vectors of the same size * Element-wise <b>addition</b> of two vectors of the same size
*/ */
public static function add(array $m1, array $m2, int $mag = 1) : array public static function add(array $m1, array $m2, int $mag = 1): array
{ {
$res = []; $res = [];
foreach ($m1 as $i => $val) { foreach ($m1 as $i => $val) {
@ -233,7 +235,7 @@ class mp
/** /**
* Element-wise <b>subtraction</b> of two vectors of the same size * Element-wise <b>subtraction</b> of two vectors of the same size
*/ */
public static function sub(array $m1, array $m2) : array public static function sub(array $m1, array $m2): array
{ {
return self::add($m1, $m2, -1); return self::add($m1, $m2, -1);
} }
@ -241,7 +243,7 @@ class mp
/** /**
* Element-wise <b>multiplication</b> of a vector with a scalar * Element-wise <b>multiplication</b> of a vector with a scalar
*/ */
public static function muls(array $m1, float $m2) : array public static function muls(array $m1, float $m2): array
{ {
$res = []; $res = [];
foreach ($m1 as $val) { foreach ($m1 as $val) {
@ -254,7 +256,7 @@ class mp
/** /**
* Element-wise <b>division</b> of a vector with a scalar * Element-wise <b>division</b> of a vector with a scalar
*/ */
public static function divs(array $m1, float $m2) : array public static function divs(array $m1, float $m2): array
{ {
$res = []; $res = [];
foreach ($m1 as $val) { foreach ($m1 as $val) {
@ -267,7 +269,7 @@ class mp
/** /**
* Element-wise <b>addition</b> of a vector with a scalar * Element-wise <b>addition</b> of a vector with a scalar
*/ */
public static function adds(array $m1, float $m2, int $mag = 1) : array public static function adds(array $m1, float $m2, int $mag = 1): array
{ {
$res = []; $res = [];
foreach ($m1 as $val) { foreach ($m1 as $val) {
@ -280,7 +282,7 @@ class mp
/** /**
* Element-wise <b>subtraction</b> of a vector with a scalar * Element-wise <b>subtraction</b> of a vector with a scalar
*/ */
public static function subs(array $m1, float $m2) : array public static function subs(array $m1, float $m2): array
{ {
return self::adds($m1, $m2, -1); return self::adds($m1, $m2, -1);
} }

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer; namespace Phpml\Helper\Optimizer;
use Closure;
/** /**
* Batch version of Gradient Descent to optimize the weights * Batch version of Gradient Descent to optimize the weights
* of a classifier given samples, targets and the objective function to minimize * of a classifier given samples, targets and the objective function to minimize
@ -17,7 +19,7 @@ class GD extends StochasticGD
*/ */
protected $sampleCount = null; protected $sampleCount = null;
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{ {
$this->samples = $samples; $this->samples = $samples;
$this->targets = $targets; $this->targets = $targets;
@ -51,7 +53,7 @@ class GD extends StochasticGD
* Calculates gradient, cost function and penalty term for each sample * Calculates gradient, cost function and penalty term for each sample
* then returns them as an array of values * then returns them as an array of values
*/ */
protected function gradient(array $theta) : array protected function gradient(array $theta): array
{ {
$costs = []; $costs = [];
$gradient = []; $gradient = [];

View File

@ -4,6 +4,9 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer; namespace Phpml\Helper\Optimizer;
use Closure;
use Exception;
abstract class Optimizer abstract class Optimizer
{ {
/** /**
@ -11,7 +14,7 @@ abstract class Optimizer
* *
* @var array * @var array
*/ */
protected $theta; protected $theta = [];
/** /**
* Number of dimensions * Number of dimensions
@ -30,7 +33,7 @@ abstract class Optimizer
// Inits the weights randomly // Inits the weights randomly
$this->theta = []; $this->theta = [];
for ($i = 0; $i < $this->dimensions; ++$i) { for ($i = 0; $i < $this->dimensions; ++$i) {
$this->theta[] = rand() / (float) getrandmax(); $this->theta[] = random_int(0, getrandmax()) / (float) getrandmax();
} }
} }
@ -44,7 +47,7 @@ abstract class Optimizer
public function setInitialTheta(array $theta) public function setInitialTheta(array $theta)
{ {
if (count($theta) != $this->dimensions) { if (count($theta) != $this->dimensions) {
throw new \Exception("Number of values in the weights array should be $this->dimensions"); throw new Exception("Number of values in the weights array should be $this->dimensions");
} }
$this->theta = $theta; $this->theta = $theta;
@ -56,5 +59,5 @@ abstract class Optimizer
* Executes the optimization with the given samples & targets * Executes the optimization with the given samples & targets
* and returns the weights * and returns the weights
*/ */
abstract public function runOptimization(array $samples, array $targets, \Closure $gradientCb); abstract public function runOptimization(array $samples, array $targets, Closure $gradientCb);
} }

View File

@ -4,6 +4,8 @@ declare(strict_types=1);
namespace Phpml\Helper\Optimizer; namespace Phpml\Helper\Optimizer;
use Closure;
/** /**
* Stochastic Gradient Descent optimization method * Stochastic Gradient Descent optimization method
* to find a solution for the equation A.ϴ = y where * to find a solution for the equation A.ϴ = y where
@ -66,6 +68,7 @@ class StochasticGD extends Optimizer
* @var bool * @var bool
*/ */
protected $enableEarlyStop = true; protected $enableEarlyStop = true;
/** /**
* List of values obtained by evaluating the cost function at each iteration * List of values obtained by evaluating the cost function at each iteration
* of the algorithm * of the algorithm
@ -141,7 +144,7 @@ class StochasticGD extends Optimizer
* The cost function to minimize and the gradient of the function are to be * The cost function to minimize and the gradient of the function are to be
* handled by the callback function provided as the third parameter of the method. * handled by the callback function provided as the third parameter of the method.
*/ */
public function runOptimization(array $samples, array $targets, \Closure $gradientCb) : array public function runOptimization(array $samples, array $targets, Closure $gradientCb): array
{ {
$this->samples = $samples; $this->samples = $samples;
$this->targets = $targets; $this->targets = $targets;
@ -181,7 +184,16 @@ class StochasticGD extends Optimizer
return $this->theta = $bestTheta; return $this->theta = $bestTheta;
} }
protected function updateTheta() : float /**
* Returns the list of cost values for each iteration executed in
* last run of the optimization
*/
public function getCostValues(): array
{
return $this->costValues;
}
protected function updateTheta(): float
{ {
$jValue = 0.0; $jValue = 0.0;
$theta = $this->theta; $theta = $this->theta;
@ -237,15 +249,6 @@ class StochasticGD extends Optimizer
return false; return false;
} }
/**
* Returns the list of cost values for each iteration executed in
* last run of the optimization
*/
public function getCostValues() : array
{
return $this->costValues;
}
/** /**
* Clears the optimizer internal vars after the optimization process. * Clears the optimizer internal vars after the optimization process.
*/ */

View File

@ -10,5 +10,5 @@ interface Distance
* @param array $a * @param array $a
* @param array $b * @param array $b
*/ */
public function distance(array $a, array $b) : float; public function distance(array $a, array $b): float;
} }

View File

@ -12,7 +12,7 @@ class Chebyshev implements Distance
/** /**
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public function distance(array $a, array $b) : float public function distance(array $a, array $b): float
{ {
if (count($a) !== count($b)) { if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch(); throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -12,7 +12,7 @@ class Euclidean implements Distance
/** /**
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public function distance(array $a, array $b) : float public function distance(array $a, array $b): float
{ {
if (count($a) !== count($b)) { if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch(); throw InvalidArgumentException::arraySizeNotMatch();
@ -30,7 +30,7 @@ class Euclidean implements Distance
/** /**
* Square of Euclidean distance * Square of Euclidean distance
*/ */
public function sqDistance(array $a, array $b) : float public function sqDistance(array $a, array $b): float
{ {
return $this->distance($a, $b) ** 2; return $this->distance($a, $b) ** 2;
} }

View File

@ -12,7 +12,7 @@ class Manhattan implements Distance
/** /**
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public function distance(array $a, array $b) : float public function distance(array $a, array $b): float
{ {
if (count($a) !== count($b)) { if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch(); throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -22,7 +22,7 @@ class Minkowski implements Distance
/** /**
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public function distance(array $a, array $b) : float public function distance(array $a, array $b): float
{ {
if (count($a) !== count($b)) { if (count($a) !== count($b)) {
throw InvalidArgumentException::arraySizeNotMatch(); throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -7,10 +7,10 @@ namespace Phpml\Math;
interface Kernel interface Kernel
{ {
/** /**
* @param float $a * @param float|array $a
* @param float $b * @param float|array $b
* *
* @return float * @return float|array
*/ */
public function compute($a, $b); public function compute($a, $b);
} }

View File

@ -23,12 +23,11 @@ class RBF implements Kernel
* @param array $a * @param array $a
* @param array $b * @param array $b
*/ */
public function compute($a, $b) public function compute($a, $b): float
{ {
$score = 2 * Product::scalar($a, $b); $score = 2 * Product::scalar($a, $b);
$squares = Product::scalar($a, $a) + Product::scalar($b, $b); $squares = Product::scalar($a, $a) + Product::scalar($b, $b);
$result = exp(-$this->gamma * ($squares - $score));
return $result; return exp(-$this->gamma * ($squares - $score));
} }
} }

View File

@ -1,6 +1,7 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
/** /**
* Class to obtain eigenvalues and eigenvectors of a real matrix. * Class to obtain eigenvalues and eigenvectors of a real matrix.
* *
@ -54,6 +55,7 @@ class EigenvalueDecomposition
* @var array * @var array
*/ */
private $d = []; private $d = [];
private $e = []; private $e = [];
/** /**
@ -64,25 +66,26 @@ class EigenvalueDecomposition
private $V = []; private $V = [];
/** /**
* Array for internal storage of nonsymmetric Hessenberg form. * Array for internal storage of nonsymmetric Hessenberg form.
* *
* @var array * @var array
*/ */
private $H = []; private $H = [];
/** /**
* Working storage for nonsymmetric algorithm. * Working storage for nonsymmetric algorithm.
* *
* @var array * @var array
*/ */
private $ort; private $ort = [];
/** /**
* Used for complex scalar division. * Used for complex scalar division.
* *
* @var float * @var float
*/ */
private $cdivr; private $cdivr;
private $cdivi; private $cdivi;
/** /**
@ -116,6 +119,71 @@ class EigenvalueDecomposition
} }
} }
/**
* Return the eigenvector matrix
*/
public function getEigenvectors(): array
{
$vectors = $this->V;
// Always return the eigenvectors of length 1.0
$vectors = new Matrix($vectors);
$vectors = array_map(function ($vect) {
$sum = 0;
for ($i = 0; $i < count($vect); ++$i) {
$sum += $vect[$i] ** 2;
}
$sum = sqrt($sum);
for ($i = 0; $i < count($vect); ++$i) {
$vect[$i] /= $sum;
}
return $vect;
}, $vectors->transpose()->toArray());
return $vectors;
}
/**
* Return the real parts of the eigenvalues<br>
* d = real(diag(D));
*/
public function getRealEigenvalues(): array
{
return $this->d;
}
/**
* Return the imaginary parts of the eigenvalues <br>
* d = imag(diag(D))
*/
public function getImagEigenvalues(): array
{
return $this->e;
}
/**
* Return the block diagonal eigenvalue matrix
*/
public function getDiagonalEigenvalues(): array
{
$D = [];
for ($i = 0; $i < $this->n; ++$i) {
$D[$i] = array_fill(0, $this->n, 0.0);
$D[$i][$i] = $this->d[$i];
if ($this->e[$i] == 0) {
continue;
}
$o = ($this->e[$i] > 0) ? $i + 1 : $i - 1;
$D[$i][$o] = $this->e[$i];
}
return $D;
}
/** /**
* Symmetric Householder reduction to tridiagonal form. * Symmetric Householder reduction to tridiagonal form.
*/ */
@ -158,6 +226,7 @@ class EigenvalueDecomposition
for ($j = 0; $j < $i; ++$j) { for ($j = 0; $j < $i; ++$j) {
$this->e[$j] = 0.0; $this->e[$j] = 0.0;
} }
// Apply similarity transformation to remaining columns. // Apply similarity transformation to remaining columns.
for ($j = 0; $j < $i; ++$j) { for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j]; $f = $this->d[$j];
@ -168,6 +237,7 @@ class EigenvalueDecomposition
$g += $this->V[$k][$j] * $this->d[$k]; $g += $this->V[$k][$j] * $this->d[$k];
$this->e[$k] += $this->V[$k][$j] * $f; $this->e[$k] += $this->V[$k][$j] * $f;
} }
$this->e[$j] = $g; $this->e[$j] = $g;
} }
@ -185,16 +255,19 @@ class EigenvalueDecomposition
for ($j = 0; $j < $i; ++$j) { for ($j = 0; $j < $i; ++$j) {
$this->e[$j] -= $hh * $this->d[$j]; $this->e[$j] -= $hh * $this->d[$j];
} }
for ($j = 0; $j < $i; ++$j) { for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j]; $f = $this->d[$j];
$g = $this->e[$j]; $g = $this->e[$j];
for ($k = $j; $k <= $i_; ++$k) { for ($k = $j; $k <= $i_; ++$k) {
$this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]); $this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]);
} }
$this->d[$j] = $this->V[$i - 1][$j]; $this->d[$j] = $this->V[$i - 1][$j];
$this->V[$i][$j] = 0.0; $this->V[$i][$j] = 0.0;
} }
} }
$this->d[$i] = $h; $this->d[$i] = $h;
} }
@ -207,16 +280,19 @@ class EigenvalueDecomposition
for ($k = 0; $k <= $i; ++$k) { for ($k = 0; $k <= $i; ++$k) {
$this->d[$k] = $this->V[$k][$i + 1] / $h; $this->d[$k] = $this->V[$k][$i + 1] / $h;
} }
for ($j = 0; $j <= $i; ++$j) { for ($j = 0; $j <= $i; ++$j) {
$g = 0.0; $g = 0.0;
for ($k = 0; $k <= $i; ++$k) { for ($k = 0; $k <= $i; ++$k) {
$g += $this->V[$k][$i + 1] * $this->V[$k][$j]; $g += $this->V[$k][$i + 1] * $this->V[$k][$j];
} }
for ($k = 0; $k <= $i; ++$k) { for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$j] -= $g * $this->d[$k]; $this->V[$k][$j] -= $g * $this->d[$k];
} }
} }
} }
for ($k = 0; $k <= $i; ++$k) { for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$i + 1] = 0.0; $this->V[$k][$i + 1] = 0.0;
} }
@ -241,6 +317,7 @@ class EigenvalueDecomposition
for ($i = 1; $i < $this->n; ++$i) { for ($i = 1; $i < $this->n; ++$i) {
$this->e[$i - 1] = $this->e[$i]; $this->e[$i - 1] = $this->e[$i];
} }
$this->e[$this->n - 1] = 0.0; $this->e[$this->n - 1] = 0.0;
$f = 0.0; $f = 0.0;
$tst1 = 0.0; $tst1 = 0.0;
@ -254,8 +331,10 @@ class EigenvalueDecomposition
if (abs($this->e[$m]) <= $eps * $tst1) { if (abs($this->e[$m]) <= $eps * $tst1) {
break; break;
} }
++$m; ++$m;
} }
// If m == l, $this->d[l] is an eigenvalue, // If m == l, $this->d[l] is an eigenvalue,
// otherwise, iterate. // otherwise, iterate.
if ($m > $l) { if ($m > $l) {
@ -270,6 +349,7 @@ class EigenvalueDecomposition
if ($p < 0) { if ($p < 0) {
$r *= -1; $r *= -1;
} }
$this->d[$l] = $this->e[$l] / ($p + $r); $this->d[$l] = $this->e[$l] / ($p + $r);
$this->d[$l + 1] = $this->e[$l] * ($p + $r); $this->d[$l + 1] = $this->e[$l] * ($p + $r);
$dl1 = $this->d[$l + 1]; $dl1 = $this->d[$l + 1];
@ -277,6 +357,7 @@ class EigenvalueDecomposition
for ($i = $l + 2; $i < $this->n; ++$i) { for ($i = $l + 2; $i < $this->n; ++$i) {
$this->d[$i] -= $h; $this->d[$i] -= $h;
} }
$f += $h; $f += $h;
// Implicit QL transformation. // Implicit QL transformation.
$p = $this->d[$m]; $p = $this->d[$m];
@ -303,12 +384,14 @@ class EigenvalueDecomposition
$this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h; $this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h;
} }
} }
$p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1; $p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1;
$this->e[$l] = $s * $p; $this->e[$l] = $s * $p;
$this->d[$l] = $c * $p; $this->d[$l] = $c * $p;
// Check for convergence. // Check for convergence.
} while (abs($this->e[$l]) > $eps * $tst1); } while (abs($this->e[$l]) > $eps * $tst1);
} }
$this->d[$l] = $this->d[$l] + $f; $this->d[$l] = $this->d[$l] + $f;
$this->e[$l] = 0.0; $this->e[$l] = 0.0;
} }
@ -323,6 +406,7 @@ class EigenvalueDecomposition
$p = $this->d[$j]; $p = $this->d[$j];
} }
} }
if ($k != $i) { if ($k != $i) {
$this->d[$k] = $this->d[$i]; $this->d[$k] = $this->d[$i];
$this->d[$i] = $p; $this->d[$i] = $p;
@ -354,6 +438,7 @@ class EigenvalueDecomposition
for ($i = $m; $i <= $high; ++$i) { for ($i = $m; $i <= $high; ++$i) {
$scale = $scale + abs($this->H[$i][$m - 1]); $scale = $scale + abs($this->H[$i][$m - 1]);
} }
if ($scale != 0.0) { if ($scale != 0.0) {
// Compute Householder transformation. // Compute Householder transformation.
$h = 0.0; $h = 0.0;
@ -361,10 +446,12 @@ class EigenvalueDecomposition
$this->ort[$i] = $this->H[$i][$m - 1] / $scale; $this->ort[$i] = $this->H[$i][$m - 1] / $scale;
$h += $this->ort[$i] * $this->ort[$i]; $h += $this->ort[$i] * $this->ort[$i];
} }
$g = sqrt($h); $g = sqrt($h);
if ($this->ort[$m] > 0) { if ($this->ort[$m] > 0) {
$g *= -1; $g *= -1;
} }
$h -= $this->ort[$m] * $g; $h -= $this->ort[$m] * $g;
$this->ort[$m] -= $g; $this->ort[$m] -= $g;
// Apply Householder similarity transformation // Apply Householder similarity transformation
@ -374,21 +461,25 @@ class EigenvalueDecomposition
for ($i = $high; $i >= $m; --$i) { for ($i = $high; $i >= $m; --$i) {
$f += $this->ort[$i] * $this->H[$i][$j]; $f += $this->ort[$i] * $this->H[$i][$j];
} }
$f /= $h; $f /= $h;
for ($i = $m; $i <= $high; ++$i) { for ($i = $m; $i <= $high; ++$i) {
$this->H[$i][$j] -= $f * $this->ort[$i]; $this->H[$i][$j] -= $f * $this->ort[$i];
} }
} }
for ($i = 0; $i <= $high; ++$i) { for ($i = 0; $i <= $high; ++$i) {
$f = 0.0; $f = 0.0;
for ($j = $high; $j >= $m; --$j) { for ($j = $high; $j >= $m; --$j) {
$f += $this->ort[$j] * $this->H[$i][$j]; $f += $this->ort[$j] * $this->H[$i][$j];
} }
$f = $f / $h; $f = $f / $h;
for ($j = $m; $j <= $high; ++$j) { for ($j = $m; $j <= $high; ++$j) {
$this->H[$i][$j] -= $f * $this->ort[$j]; $this->H[$i][$j] -= $f * $this->ort[$j];
} }
} }
$this->ort[$m] = $scale * $this->ort[$m]; $this->ort[$m] = $scale * $this->ort[$m];
$this->H[$m][$m - 1] = $scale * $g; $this->H[$m][$m - 1] = $scale * $g;
} }
@ -400,16 +491,19 @@ class EigenvalueDecomposition
$this->V[$i][$j] = ($i == $j ? 1.0 : 0.0); $this->V[$i][$j] = ($i == $j ? 1.0 : 0.0);
} }
} }
for ($m = $high - 1; $m >= $low + 1; --$m) { for ($m = $high - 1; $m >= $low + 1; --$m) {
if ($this->H[$m][$m - 1] != 0.0) { if ($this->H[$m][$m - 1] != 0.0) {
for ($i = $m + 1; $i <= $high; ++$i) { for ($i = $m + 1; $i <= $high; ++$i) {
$this->ort[$i] = $this->H[$i][$m - 1]; $this->ort[$i] = $this->H[$i][$m - 1];
} }
for ($j = $m; $j <= $high; ++$j) { for ($j = $m; $j <= $high; ++$j) {
$g = 0.0; $g = 0.0;
for ($i = $m; $i <= $high; ++$i) { for ($i = $m; $i <= $high; ++$i) {
$g += $this->ort[$i] * $this->V[$i][$j]; $g += $this->ort[$i] * $this->V[$i][$j];
} }
// Double division avoids possible underflow // Double division avoids possible underflow
$g = ($g / $this->ort[$m]) / $this->H[$m][$m - 1]; $g = ($g / $this->ort[$m]) / $this->H[$m][$m - 1];
for ($i = $m; $i <= $high; ++$i) { for ($i = $m; $i <= $high; ++$i) {
@ -469,6 +563,7 @@ class EigenvalueDecomposition
$this->d[$i] = $this->H[$i][$i]; $this->d[$i] = $this->H[$i][$i];
$this->e[$i] = 0.0; $this->e[$i] = 0.0;
} }
for ($j = max($i - 1, 0); $j < $nn; ++$j) { for ($j = max($i - 1, 0); $j < $nn; ++$j) {
$norm = $norm + abs($this->H[$i][$j]); $norm = $norm + abs($this->H[$i][$j]);
} }
@ -484,11 +579,14 @@ class EigenvalueDecomposition
if ($s == 0.0) { if ($s == 0.0) {
$s = $norm; $s = $norm;
} }
if (abs($this->H[$l][$l - 1]) < $eps * $s) { if (abs($this->H[$l][$l - 1]) < $eps * $s) {
break; break;
} }
--$l; --$l;
} }
// Check for convergence // Check for convergence
// One root found // One root found
if ($l == $n) { if ($l == $n) {
@ -513,11 +611,13 @@ class EigenvalueDecomposition
} else { } else {
$z = $p - $z; $z = $p - $z;
} }
$this->d[$n - 1] = $x + $z; $this->d[$n - 1] = $x + $z;
$this->d[$n] = $this->d[$n - 1]; $this->d[$n] = $this->d[$n - 1];
if ($z != 0.0) { if ($z != 0.0) {
$this->d[$n] = $x - $w / $z; $this->d[$n] = $x - $w / $z;
} }
$this->e[$n - 1] = 0.0; $this->e[$n - 1] = 0.0;
$this->e[$n] = 0.0; $this->e[$n] = 0.0;
$x = $this->H[$n][$n - 1]; $x = $this->H[$n][$n - 1];
@ -533,18 +633,21 @@ class EigenvalueDecomposition
$this->H[$n - 1][$j] = $q * $z + $p * $this->H[$n][$j]; $this->H[$n - 1][$j] = $q * $z + $p * $this->H[$n][$j];
$this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z; $this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z;
} }
// Column modification // Column modification
for ($i = 0; $i <= $n; ++$i) { for ($i = 0; $i <= $n; ++$i) {
$z = $this->H[$i][$n - 1]; $z = $this->H[$i][$n - 1];
$this->H[$i][$n - 1] = $q * $z + $p * $this->H[$i][$n]; $this->H[$i][$n - 1] = $q * $z + $p * $this->H[$i][$n];
$this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z; $this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z;
} }
// Accumulate transformations // Accumulate transformations
for ($i = $low; $i <= $high; ++$i) { for ($i = $low; $i <= $high; ++$i) {
$z = $this->V[$i][$n - 1]; $z = $this->V[$i][$n - 1];
$this->V[$i][$n - 1] = $q * $z + $p * $this->V[$i][$n]; $this->V[$i][$n - 1] = $q * $z + $p * $this->V[$i][$n];
$this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z; $this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z;
} }
// Complex pair // Complex pair
} else { } else {
$this->d[$n - 1] = $x + $p; $this->d[$n - 1] = $x + $p;
@ -552,6 +655,7 @@ class EigenvalueDecomposition
$this->e[$n - 1] = $z; $this->e[$n - 1] = $z;
$this->e[$n] = -$z; $this->e[$n] = -$z;
} }
$n = $n - 2; $n = $n - 2;
$iter = 0; $iter = 0;
// No convergence yet // No convergence yet
@ -564,16 +668,19 @@ class EigenvalueDecomposition
$y = $this->H[$n - 1][$n - 1]; $y = $this->H[$n - 1][$n - 1];
$w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n]; $w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n];
} }
// Wilkinson's original ad hoc shift // Wilkinson's original ad hoc shift
if ($iter == 10) { if ($iter == 10) {
$exshift += $x; $exshift += $x;
for ($i = $low; $i <= $n; ++$i) { for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $x; $this->H[$i][$i] -= $x;
} }
$s = abs($this->H[$n][$n - 1]) + abs($this->H[$n - 1][$n - 2]); $s = abs($this->H[$n][$n - 1]) + abs($this->H[$n - 1][$n - 2]);
$x = $y = 0.75 * $s; $x = $y = 0.75 * $s;
$w = -0.4375 * $s * $s; $w = -0.4375 * $s * $s;
} }
// MATLAB's new ad hoc shift // MATLAB's new ad hoc shift
if ($iter == 30) { if ($iter == 30) {
$s = ($y - $x) / 2.0; $s = ($y - $x) / 2.0;
@ -583,14 +690,17 @@ class EigenvalueDecomposition
if ($y < $x) { if ($y < $x) {
$s = -$s; $s = -$s;
} }
$s = $x - $w / (($y - $x) / 2.0 + $s); $s = $x - $w / (($y - $x) / 2.0 + $s);
for ($i = $low; $i <= $n; ++$i) { for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $s; $this->H[$i][$i] -= $s;
} }
$exshift += $s; $exshift += $s;
$x = $y = $w = 0.964; $x = $y = $w = 0.964;
} }
} }
// Could check iteration count here. // Could check iteration count here.
$iter = $iter + 1; $iter = $iter + 1;
// Look for two consecutive small sub-diagonal elements // Look for two consecutive small sub-diagonal elements
@ -609,18 +719,22 @@ class EigenvalueDecomposition
if ($m == $l) { if ($m == $l) {
break; break;
} }
if (abs($this->H[$m][$m - 1]) * (abs($q) + abs($r)) < if (abs($this->H[$m][$m - 1]) * (abs($q) + abs($r)) <
$eps * (abs($p) * (abs($this->H[$m - 1][$m - 1]) + abs($z) + abs($this->H[$m + 1][$m + 1])))) { $eps * (abs($p) * (abs($this->H[$m - 1][$m - 1]) + abs($z) + abs($this->H[$m + 1][$m + 1])))) {
break; break;
} }
--$m; --$m;
} }
for ($i = $m + 2; $i <= $n; ++$i) { for ($i = $m + 2; $i <= $n; ++$i) {
$this->H[$i][$i - 2] = 0.0; $this->H[$i][$i - 2] = 0.0;
if ($i > $m + 2) { if ($i > $m + 2) {
$this->H[$i][$i - 3] = 0.0; $this->H[$i][$i - 3] = 0.0;
} }
} }
// Double QR step involving rows l:n and columns m:n // Double QR step involving rows l:n and columns m:n
for ($k = $m; $k <= $n - 1; ++$k) { for ($k = $m; $k <= $n - 1; ++$k) {
$notlast = ($k != $n - 1); $notlast = ($k != $n - 1);
@ -635,19 +749,23 @@ class EigenvalueDecomposition
$r = $r / $x; $r = $r / $x;
} }
} }
if ($x == 0.0) { if ($x == 0.0) {
break; break;
} }
$s = sqrt($p * $p + $q * $q + $r * $r); $s = sqrt($p * $p + $q * $q + $r * $r);
if ($p < 0) { if ($p < 0) {
$s = -$s; $s = -$s;
} }
if ($s != 0) { if ($s != 0) {
if ($k != $m) { if ($k != $m) {
$this->H[$k][$k - 1] = -$s * $x; $this->H[$k][$k - 1] = -$s * $x;
} elseif ($l != $m) { } elseif ($l != $m) {
$this->H[$k][$k - 1] = -$this->H[$k][$k - 1]; $this->H[$k][$k - 1] = -$this->H[$k][$k - 1];
} }
$p = $p + $s; $p = $p + $s;
$x = $p / $s; $x = $p / $s;
$y = $q / $s; $y = $q / $s;
@ -661,9 +779,11 @@ class EigenvalueDecomposition
$p = $p + $r * $this->H[$k + 2][$j]; $p = $p + $r * $this->H[$k + 2][$j];
$this->H[$k + 2][$j] = $this->H[$k + 2][$j] - $p * $z; $this->H[$k + 2][$j] = $this->H[$k + 2][$j] - $p * $z;
} }
$this->H[$k][$j] = $this->H[$k][$j] - $p * $x; $this->H[$k][$j] = $this->H[$k][$j] - $p * $x;
$this->H[$k + 1][$j] = $this->H[$k + 1][$j] - $p * $y; $this->H[$k + 1][$j] = $this->H[$k + 1][$j] - $p * $y;
} }
// Column modification // Column modification
for ($i = 0; $i <= min($n, $k + 3); ++$i) { for ($i = 0; $i <= min($n, $k + 3); ++$i) {
$p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k + 1]; $p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k + 1];
@ -671,9 +791,11 @@ class EigenvalueDecomposition
$p = $p + $z * $this->H[$i][$k + 2]; $p = $p + $z * $this->H[$i][$k + 2];
$this->H[$i][$k + 2] = $this->H[$i][$k + 2] - $p * $r; $this->H[$i][$k + 2] = $this->H[$i][$k + 2] - $p * $r;
} }
$this->H[$i][$k] = $this->H[$i][$k] - $p; $this->H[$i][$k] = $this->H[$i][$k] - $p;
$this->H[$i][$k + 1] = $this->H[$i][$k + 1] - $p * $q; $this->H[$i][$k + 1] = $this->H[$i][$k + 1] - $p * $q;
} }
// Accumulate transformations // Accumulate transformations
for ($i = $low; $i <= $high; ++$i) { for ($i = $low; $i <= $high; ++$i) {
$p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k + 1]; $p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k + 1];
@ -681,6 +803,7 @@ class EigenvalueDecomposition
$p = $p + $z * $this->V[$i][$k + 2]; $p = $p + $z * $this->V[$i][$k + 2];
$this->V[$i][$k + 2] = $this->V[$i][$k + 2] - $p * $r; $this->V[$i][$k + 2] = $this->V[$i][$k + 2] - $p * $r;
} }
$this->V[$i][$k] = $this->V[$i][$k] - $p; $this->V[$i][$k] = $this->V[$i][$k] - $p;
$this->V[$i][$k + 1] = $this->V[$i][$k + 1] - $p * $q; $this->V[$i][$k + 1] = $this->V[$i][$k + 1] - $p * $q;
} }
@ -719,6 +842,7 @@ class EigenvalueDecomposition
} else { } else {
$this->H[$i][$n] = -$r / ($eps * $norm); $this->H[$i][$n] = -$r / ($eps * $norm);
} }
// Solve real equations // Solve real equations
} else { } else {
$x = $this->H[$i][$i + 1]; $x = $this->H[$i][$i + 1];
@ -732,6 +856,7 @@ class EigenvalueDecomposition
$this->H[$i + 1][$n] = (-$s - $y * $t) / $z; $this->H[$i + 1][$n] = (-$s - $y * $t) / $z;
} }
} }
// Overflow control // Overflow control
$t = abs($this->H[$i][$n]); $t = abs($this->H[$i][$n]);
if (($eps * $t) * $t > 1) { if (($eps * $t) * $t > 1) {
@ -741,6 +866,7 @@ class EigenvalueDecomposition
} }
} }
} }
// Complex vector // Complex vector
} elseif ($q < 0) { } elseif ($q < 0) {
$l = $n - 1; $l = $n - 1;
@ -753,6 +879,7 @@ class EigenvalueDecomposition
$this->H[$n - 1][$n - 1] = $this->cdivr; $this->H[$n - 1][$n - 1] = $this->cdivr;
$this->H[$n - 1][$n] = $this->cdivi; $this->H[$n - 1][$n] = $this->cdivi;
} }
$this->H[$n][$n - 1] = 0.0; $this->H[$n][$n - 1] = 0.0;
$this->H[$n][$n] = 1.0; $this->H[$n][$n] = 1.0;
for ($i = $n - 2; $i >= 0; --$i) { for ($i = $n - 2; $i >= 0; --$i) {
@ -763,6 +890,7 @@ class EigenvalueDecomposition
$ra = $ra + $this->H[$i][$j] * $this->H[$j][$n - 1]; $ra = $ra + $this->H[$i][$j] * $this->H[$j][$n - 1];
$sa = $sa + $this->H[$i][$j] * $this->H[$j][$n]; $sa = $sa + $this->H[$i][$j] * $this->H[$j][$n];
} }
$w = $this->H[$i][$i] - $p; $w = $this->H[$i][$i] - $p;
if ($this->e[$i] < 0.0) { if ($this->e[$i] < 0.0) {
$z = $w; $z = $w;
@ -783,6 +911,7 @@ class EigenvalueDecomposition
if ($vr == 0.0 & $vi == 0.0) { if ($vr == 0.0 & $vi == 0.0) {
$vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z)); $vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z));
} }
$this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi); $this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi);
$this->H[$i][$n - 1] = $this->cdivr; $this->H[$i][$n - 1] = $this->cdivr;
$this->H[$i][$n] = $this->cdivi; $this->H[$i][$n] = $this->cdivi;
@ -795,6 +924,7 @@ class EigenvalueDecomposition
$this->H[$i + 1][$n] = $this->cdivi; $this->H[$i + 1][$n] = $this->cdivi;
} }
} }
// Overflow control // Overflow control
$t = max(abs($this->H[$i][$n - 1]), abs($this->H[$i][$n])); $t = max(abs($this->H[$i][$n - 1]), abs($this->H[$i][$n]));
if (($eps * $t) * $t > 1) { if (($eps * $t) * $t > 1) {
@ -824,81 +954,9 @@ class EigenvalueDecomposition
for ($k = $low; $k <= min($j, $high); ++$k) { for ($k = $low; $k <= min($j, $high); ++$k) {
$z = $z + $this->V[$i][$k] * $this->H[$k][$j]; $z = $z + $this->V[$i][$k] * $this->H[$k][$j];
} }
$this->V[$i][$j] = $z; $this->V[$i][$j] = $z;
} }
} }
} }
/**
* Return the eigenvector matrix
*
* @return array
*/
public function getEigenvectors()
{
$vectors = $this->V;
// Always return the eigenvectors of length 1.0
$vectors = new Matrix($vectors);
$vectors = array_map(function ($vect) {
$sum = 0;
for ($i = 0; $i < count($vect); ++$i) {
$sum += $vect[$i] ** 2;
}
$sum = sqrt($sum);
for ($i = 0; $i < count($vect); ++$i) {
$vect[$i] /= $sum;
}
return $vect;
}, $vectors->transpose()->toArray());
return $vectors;
}
/**
* Return the real parts of the eigenvalues<br>
* d = real(diag(D));
*
* @return array
*/
public function getRealEigenvalues()
{
return $this->d;
}
/**
* Return the imaginary parts of the eigenvalues <br>
* d = imag(diag(D))
*
* @return array
*/
public function getImagEigenvalues()
{
return $this->e;
}
/**
* Return the block diagonal eigenvalue matrix
*
* @return array
*/
public function getDiagonalEigenvalues()
{
$D = [];
for ($i = 0; $i < $this->n; ++$i) {
$D[$i] = array_fill(0, $this->n, 0.0);
$D[$i][$i] = $this->d[$i];
if ($this->e[$i] == 0) {
continue;
}
$o = ($this->e[$i] > 0) ? $i + 1 : $i - 1;
$D[$i][$o] = $this->e[$i];
}
return $D;
}
} }

View File

@ -1,6 +1,7 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
/** /**
* @package JAMA * @package JAMA
* *
@ -90,6 +91,7 @@ class LUDecomposition
for ($i = 0; $i < $this->m; ++$i) { for ($i = 0; $i < $this->m; ++$i) {
$this->piv[$i] = $i; $this->piv[$i] = $i;
} }
$this->pivsign = 1; $this->pivsign = 1;
$LUcolj = []; $LUcolj = [];
@ -99,6 +101,7 @@ class LUDecomposition
for ($i = 0; $i < $this->m; ++$i) { for ($i = 0; $i < $this->m; ++$i) {
$LUcolj[$i] = &$this->LU[$i][$j]; $LUcolj[$i] = &$this->LU[$i][$j];
} }
// Apply previous transformations. // Apply previous transformations.
for ($i = 0; $i < $this->m; ++$i) { for ($i = 0; $i < $this->m; ++$i) {
$LUrowi = $this->LU[$i]; $LUrowi = $this->LU[$i];
@ -108,8 +111,10 @@ class LUDecomposition
for ($k = 0; $k < $kmax; ++$k) { for ($k = 0; $k < $kmax; ++$k) {
$s += $LUrowi[$k] * $LUcolj[$k]; $s += $LUrowi[$k] * $LUcolj[$k];
} }
$LUrowi[$j] = $LUcolj[$i] -= $s; $LUrowi[$j] = $LUcolj[$i] -= $s;
} }
// Find pivot and exchange if necessary. // Find pivot and exchange if necessary.
$p = $j; $p = $j;
for ($i = $j + 1; $i < $this->m; ++$i) { for ($i = $j + 1; $i < $this->m; ++$i) {
@ -117,17 +122,20 @@ class LUDecomposition
$p = $i; $p = $i;
} }
} }
if ($p != $j) { if ($p != $j) {
for ($k = 0; $k < $this->n; ++$k) { for ($k = 0; $k < $this->n; ++$k) {
$t = $this->LU[$p][$k]; $t = $this->LU[$p][$k];
$this->LU[$p][$k] = $this->LU[$j][$k]; $this->LU[$p][$k] = $this->LU[$j][$k];
$this->LU[$j][$k] = $t; $this->LU[$j][$k] = $t;
} }
$k = $this->piv[$p]; $k = $this->piv[$p];
$this->piv[$p] = $this->piv[$j]; $this->piv[$p] = $this->piv[$j];
$this->piv[$j] = $k; $this->piv[$j] = $k;
$this->pivsign = $this->pivsign * -1; $this->pivsign = $this->pivsign * -1;
} }
// Compute multipliers. // Compute multipliers.
if (($j < $this->m) && ($this->LU[$j][$j] != 0.0)) { if (($j < $this->m) && ($this->LU[$j][$j] != 0.0)) {
for ($i = $j + 1; $i < $this->m; ++$i) { for ($i = $j + 1; $i < $this->m; ++$i) {
@ -142,7 +150,7 @@ class LUDecomposition
* *
* @return Matrix Lower triangular factor * @return Matrix Lower triangular factor
*/ */
public function getL() : Matrix public function getL(): Matrix
{ {
$L = []; $L = [];
for ($i = 0; $i < $this->m; ++$i) { for ($i = 0; $i < $this->m; ++$i) {
@ -165,7 +173,7 @@ class LUDecomposition
* *
* @return Matrix Upper triangular factor * @return Matrix Upper triangular factor
*/ */
public function getU() : Matrix public function getU(): Matrix
{ {
$U = []; $U = [];
for ($i = 0; $i < $this->n; ++$i) { for ($i = 0; $i < $this->n; ++$i) {
@ -186,7 +194,7 @@ class LUDecomposition
* *
* @return array Pivot vector * @return array Pivot vector
*/ */
public function getPivot() : array public function getPivot(): array
{ {
return $this->piv; return $this->piv;
} }
@ -247,7 +255,7 @@ class LUDecomposition
* *
* @throws MatrixException * @throws MatrixException
*/ */
public function solve(Matrix $B) : array public function solve(Matrix $B): array
{ {
if ($B->getRows() != $this->m) { if ($B->getRows() != $this->m) {
throw MatrixException::notSquareMatrix(); throw MatrixException::notSquareMatrix();
@ -268,11 +276,13 @@ class LUDecomposition
} }
} }
} }
// Solve U*X = Y; // Solve U*X = Y;
for ($k = $this->n - 1; $k >= 0; --$k) { for ($k = $this->n - 1; $k >= 0; --$k) {
for ($j = 0; $j < $nx; ++$j) { for ($j = 0; $j < $nx; ++$j) {
$X[$k][$j] /= $this->LU[$k][$k]; $X[$k][$j] /= $this->LU[$k][$k];
} }
for ($i = 0; $i < $k; ++$i) { for ($i = 0; $i < $k; ++$i) {
for ($j = 0; $j < $nx; ++$j) { for ($j = 0; $j < $nx; ++$j) {
$X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k]; $X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k];
@ -283,7 +293,7 @@ class LUDecomposition
return $X; return $X;
} }
protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF) : array protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF): array
{ {
$m = count($RL); $m = count($RL);
$n = $jF - $j0; $n = $jF - $j0;

View File

@ -13,7 +13,7 @@ class Matrix
/** /**
* @var array * @var array
*/ */
private $matrix; private $matrix = [];
/** /**
* @var int * @var int
@ -56,7 +56,7 @@ class Matrix
$this->matrix = $matrix; $this->matrix = $matrix;
} }
public static function fromFlatArray(array $array) : Matrix public static function fromFlatArray(array $array): self
{ {
$matrix = []; $matrix = [];
foreach ($array as $value) { foreach ($array as $value) {
@ -66,12 +66,12 @@ class Matrix
return new self($matrix); return new self($matrix);
} }
public function toArray() : array public function toArray(): array
{ {
return $this->matrix; return $this->matrix;
} }
public function toScalar() : float public function toScalar(): float
{ {
return $this->matrix[0][0]; return $this->matrix[0][0];
} }
@ -89,7 +89,7 @@ class Matrix
/** /**
* @throws MatrixException * @throws MatrixException
*/ */
public function getColumnValues($column) : array public function getColumnValues($column): array
{ {
if ($column >= $this->columns) { if ($column >= $this->columns) {
throw MatrixException::columnOutOfRange(); throw MatrixException::columnOutOfRange();
@ -123,7 +123,7 @@ class Matrix
return $this->columns === $this->rows; return $this->columns === $this->rows;
} }
public function transpose() : Matrix public function transpose(): self
{ {
if ($this->rows == 1) { if ($this->rows == 1) {
$matrix = array_map(function ($el) { $matrix = array_map(function ($el) {
@ -136,7 +136,7 @@ class Matrix
return new self($matrix, false); return new self($matrix, false);
} }
public function multiply(Matrix $matrix) : Matrix public function multiply(self $matrix): self
{ {
if ($this->columns != $matrix->getRows()) { if ($this->columns != $matrix->getRows()) {
throw InvalidArgumentException::inconsistentMatrixSupplied(); throw InvalidArgumentException::inconsistentMatrixSupplied();
@ -157,7 +157,7 @@ class Matrix
return new self($product, false); return new self($product, false);
} }
public function divideByScalar($value) : Matrix public function divideByScalar($value): self
{ {
$newMatrix = []; $newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) { for ($i = 0; $i < $this->rows; ++$i) {
@ -169,7 +169,7 @@ class Matrix
return new self($newMatrix, false); return new self($newMatrix, false);
} }
public function multiplyByScalar($value) : Matrix public function multiplyByScalar($value): self
{ {
$newMatrix = []; $newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) { for ($i = 0; $i < $this->rows; ++$i) {
@ -184,7 +184,7 @@ class Matrix
/** /**
* Element-wise addition of the matrix with another one * Element-wise addition of the matrix with another one
*/ */
public function add(Matrix $other) : Matrix public function add(self $other): self
{ {
return $this->_add($other); return $this->_add($other);
} }
@ -192,15 +192,74 @@ class Matrix
/** /**
* Element-wise subtracting of another matrix from this one * Element-wise subtracting of another matrix from this one
*/ */
public function subtract(Matrix $other) : Matrix public function subtract(self $other): self
{ {
return $this->_add($other, -1); return $this->_add($other, -1);
} }
public function inverse(): self
{
if (!$this->isSquare()) {
throw MatrixException::notSquareMatrix();
}
$LU = new LUDecomposition($this);
$identity = $this->getIdentity();
$inverse = $LU->solve($identity);
return new self($inverse, false);
}
public function crossOut(int $row, int $column): self
{
$newMatrix = [];
$r = 0;
for ($i = 0; $i < $this->rows; ++$i) {
$c = 0;
if ($row != $i) {
for ($j = 0; $j < $this->columns; ++$j) {
if ($column != $j) {
$newMatrix[$r][$c] = $this->matrix[$i][$j];
++$c;
}
}
++$r;
}
}
return new self($newMatrix, false);
}
public function isSingular(): bool
{
return $this->getDeterminant() == 0;
}
/**
* Returns the transpose of given array
*/
public static function transposeArray(array $array): array
{
return (new self($array, false))->transpose()->toArray();
}
/**
* Returns the dot product of two arrays<br>
* Matrix::dot(x, y) ==> x.y'
*/
public static function dot(array $array1, array $array2): array
{
$m1 = new self($array1, false);
$m2 = new self($array2, false);
return $m1->multiply($m2->transpose())->toArray()[0];
}
/** /**
* Element-wise addition or substraction depending on the given sign parameter * Element-wise addition or substraction depending on the given sign parameter
*/ */
protected function _add(Matrix $other, int $sign = 1) : Matrix protected function _add(self $other, int $sign = 1): self
{ {
$a1 = $this->toArray(); $a1 = $this->toArray();
$a2 = $other->toArray(); $a2 = $other->toArray();
@ -215,23 +274,10 @@ class Matrix
return new self($newMatrix, false); return new self($newMatrix, false);
} }
public function inverse() : Matrix
{
if (!$this->isSquare()) {
throw MatrixException::notSquareMatrix();
}
$LU = new LUDecomposition($this);
$identity = $this->getIdentity();
$inverse = $LU->solve($identity);
return new self($inverse, false);
}
/** /**
* Returns diagonal identity matrix of the same size of this matrix * Returns diagonal identity matrix of the same size of this matrix
*/ */
protected function getIdentity() : Matrix protected function getIdentity(): self
{ {
$array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0)); $array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0));
for ($i = 0; $i < $this->rows; ++$i) { for ($i = 0; $i < $this->rows; ++$i) {
@ -240,49 +286,4 @@ class Matrix
return new self($array, false); return new self($array, false);
} }
public function crossOut(int $row, int $column) : Matrix
{
$newMatrix = [];
$r = 0;
for ($i = 0; $i < $this->rows; ++$i) {
$c = 0;
if ($row != $i) {
for ($j = 0; $j < $this->columns; ++$j) {
if ($column != $j) {
$newMatrix[$r][$c] = $this->matrix[$i][$j];
++$c;
}
}
++$r;
}
}
return new self($newMatrix, false);
}
public function isSingular() : bool
{
return 0 == $this->getDeterminant();
}
/**
* Returns the transpose of given array
*/
public static function transposeArray(array $array) : array
{
return (new self($array, false))->transpose()->toArray();
}
/**
* Returns the dot product of two arrays<br>
* Matrix::dot(x, y) ==> x.y'
*/
public static function dot(array $array1, array $array2) : array
{
$m1 = new self($array1, false);
$m2 = new self($array2, false);
return $m1->multiply($m2->transpose())->toArray()[0];
}
} }

View File

@ -4,12 +4,15 @@ declare(strict_types=1);
namespace Phpml\Math; namespace Phpml\Math;
class Set implements \IteratorAggregate use ArrayIterator;
use IteratorAggregate;
class Set implements IteratorAggregate
{ {
/** /**
* @var string[]|int[]|float[] * @var string[]|int[]|float[]
*/ */
private $elements; private $elements = [];
/** /**
* @param string[]|int[]|float[] $elements * @param string[]|int[]|float[] $elements
@ -22,7 +25,7 @@ class Set implements \IteratorAggregate
/** /**
* Creates the union of A and B. * Creates the union of A and B.
*/ */
public static function union(Set $a, Set $b) : Set public static function union(self $a, self $b): self
{ {
return new self(array_merge($a->toArray(), $b->toArray())); return new self(array_merge($a->toArray(), $b->toArray()));
} }
@ -30,7 +33,7 @@ class Set implements \IteratorAggregate
/** /**
* Creates the intersection of A and B. * Creates the intersection of A and B.
*/ */
public static function intersection(Set $a, Set $b) : Set public static function intersection(self $a, self $b): self
{ {
return new self(array_intersect($a->toArray(), $b->toArray())); return new self(array_intersect($a->toArray(), $b->toArray()));
} }
@ -38,7 +41,7 @@ class Set implements \IteratorAggregate
/** /**
* Creates the difference of A and B. * Creates the difference of A and B.
*/ */
public static function difference(Set $a, Set $b) : Set public static function difference(self $a, self $b): self
{ {
return new self(array_diff($a->toArray(), $b->toArray())); return new self(array_diff($a->toArray(), $b->toArray()));
} }
@ -48,7 +51,7 @@ class Set implements \IteratorAggregate
* *
* @return Set[] * @return Set[]
*/ */
public static function cartesian(Set $a, Set $b) : array public static function cartesian(self $a, self $b): array
{ {
$cartesian = []; $cartesian = [];
@ -66,7 +69,7 @@ class Set implements \IteratorAggregate
* *
* @return Set[] * @return Set[]
*/ */
public static function power(Set $a) : array public static function power(self $a): array
{ {
$power = [new self()]; $power = [new self()];
@ -79,24 +82,10 @@ class Set implements \IteratorAggregate
return $power; return $power;
} }
/**
* Removes duplicates and rewrites index.
*
* @param string[]|int[]|float[] $elements
*
* @return string[]|int[]|float[]
*/
private static function sanitize(array $elements) : array
{
sort($elements, SORT_ASC);
return array_values(array_unique($elements, SORT_ASC));
}
/** /**
* @param string|int|float $element * @param string|int|float $element
*/ */
public function add($element) : Set public function add($element): self
{ {
return $this->addAll([$element]); return $this->addAll([$element]);
} }
@ -104,7 +93,7 @@ class Set implements \IteratorAggregate
/** /**
* @param string[]|int[]|float[] $elements * @param string[]|int[]|float[] $elements
*/ */
public function addAll(array $elements) : Set public function addAll(array $elements): self
{ {
$this->elements = self::sanitize(array_merge($this->elements, $elements)); $this->elements = self::sanitize(array_merge($this->elements, $elements));
@ -114,7 +103,7 @@ class Set implements \IteratorAggregate
/** /**
* @param string|int|float $element * @param string|int|float $element
*/ */
public function remove($element) : Set public function remove($element): self
{ {
return $this->removeAll([$element]); return $this->removeAll([$element]);
} }
@ -122,7 +111,7 @@ class Set implements \IteratorAggregate
/** /**
* @param string[]|int[]|float[] $elements * @param string[]|int[]|float[] $elements
*/ */
public function removeAll(array $elements) : Set public function removeAll(array $elements): self
{ {
$this->elements = self::sanitize(array_diff($this->elements, $elements)); $this->elements = self::sanitize(array_diff($this->elements, $elements));
@ -132,7 +121,7 @@ class Set implements \IteratorAggregate
/** /**
* @param string|int|float $element * @param string|int|float $element
*/ */
public function contains($element) : bool public function contains($element): bool
{ {
return $this->containsAll([$element]); return $this->containsAll([$element]);
} }
@ -140,7 +129,7 @@ class Set implements \IteratorAggregate
/** /**
* @param string[]|int[]|float[] $elements * @param string[]|int[]|float[] $elements
*/ */
public function containsAll(array $elements) : bool public function containsAll(array $elements): bool
{ {
return !array_diff($elements, $this->elements); return !array_diff($elements, $this->elements);
} }
@ -148,23 +137,37 @@ class Set implements \IteratorAggregate
/** /**
* @return string[]|int[]|float[] * @return string[]|int[]|float[]
*/ */
public function toArray() : array public function toArray(): array
{ {
return $this->elements; return $this->elements;
} }
public function getIterator() : \ArrayIterator public function getIterator(): ArrayIterator
{ {
return new \ArrayIterator($this->elements); return new ArrayIterator($this->elements);
} }
public function isEmpty() : bool public function isEmpty(): bool
{ {
return $this->cardinality() == 0; return $this->cardinality() == 0;
} }
public function cardinality() : int public function cardinality(): int
{ {
return count($this->elements); return count($this->elements);
} }
/**
* Removes duplicates and rewrites index.
*
* @param string[]|int[]|float[] $elements
*
* @return string[]|int[]|float[]
*/
private static function sanitize(array $elements): array
{
sort($elements, SORT_ASC);
return array_values(array_unique($elements, SORT_ASC));
}
} }

View File

@ -14,7 +14,7 @@ class Correlation
* *
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public static function pearson(array $x, array $y) : float public static function pearson(array $x, array $y): float
{ {
if (count($x) !== count($y)) { if (count($x) !== count($y)) {
throw InvalidArgumentException::arraySizeNotMatch(); throw InvalidArgumentException::arraySizeNotMatch();

View File

@ -4,6 +4,7 @@ declare(strict_types=1);
namespace Phpml\Math\Statistic; namespace Phpml\Math\Statistic;
use Exception;
use Phpml\Exception\InvalidArgumentException; use Phpml\Exception\InvalidArgumentException;
class Covariance class Covariance
@ -13,7 +14,7 @@ class Covariance
* *
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null) : float public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
{ {
if (empty($x) || empty($y)) { if (empty($x) || empty($y)) {
throw InvalidArgumentException::arrayCantBeEmpty(); throw InvalidArgumentException::arrayCantBeEmpty();
@ -51,7 +52,7 @@ class Covariance
* @throws InvalidArgumentException * @throws InvalidArgumentException
* @throws \Exception * @throws \Exception
*/ */
public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null) : float public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float
{ {
if (empty($data)) { if (empty($data)) {
throw InvalidArgumentException::arrayCantBeEmpty(); throw InvalidArgumentException::arrayCantBeEmpty();
@ -63,7 +64,7 @@ class Covariance
} }
if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) { if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) {
throw new \Exception('Given indices i and k do not match with the dimensionality of data'); throw new Exception('Given indices i and k do not match with the dimensionality of data');
} }
if ($meanX === null || $meanY === null) { if ($meanX === null || $meanY === null) {
@ -92,10 +93,12 @@ class Covariance
if ($index == $i) { if ($index == $i) {
$val[0] = $col - $meanX; $val[0] = $col - $meanX;
} }
if ($index == $k) { if ($index == $k) {
$val[1] = $col - $meanY; $val[1] = $col - $meanY;
} }
} }
$sum += $val[0] * $val[1]; $sum += $val[0] * $val[1];
} }
} }
@ -112,7 +115,7 @@ class Covariance
* *
* @param array|null $means * @param array|null $means
*/ */
public static function covarianceMatrix(array $data, ?array $means = null) : array public static function covarianceMatrix(array $data, ?array $means = null): array
{ {
$n = count($data[0]); $n = count($data[0]);

View File

@ -41,7 +41,7 @@ class Gaussian
* Returns probability density value of the given <i>$value</i> based on * Returns probability density value of the given <i>$value</i> based on
* given standard deviation and the mean * given standard deviation and the mean
*/ */
public static function distributionPdf(float $mean, float $std, float $value) : float public static function distributionPdf(float $mean, float $std, float $value): float
{ {
$normal = new self($mean, $std); $normal = new self($mean, $std);

View File

@ -11,7 +11,7 @@ class Mean
/** /**
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public static function arithmetic(array $numbers) : float public static function arithmetic(array $numbers): float
{ {
self::checkArrayLength($numbers); self::checkArrayLength($numbers);
@ -32,7 +32,7 @@ class Mean
sort($numbers, SORT_NUMERIC); sort($numbers, SORT_NUMERIC);
$median = $numbers[$middleIndex]; $median = $numbers[$middleIndex];
if (0 === $count % 2) { if ($count % 2 === 0) {
$median = ($median + $numbers[$middleIndex - 1]) / 2; $median = ($median + $numbers[$middleIndex - 1]) / 2;
} }

View File

@ -13,7 +13,7 @@ class StandardDeviation
* *
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
public static function population(array $a, bool $sample = true) : float public static function population(array $a, bool $sample = true): float
{ {
if (empty($a)) { if (empty($a)) {
throw InvalidArgumentException::arrayCantBeEmpty(); throw InvalidArgumentException::arrayCantBeEmpty();

View File

@ -51,27 +51,27 @@ class ClassificationReport
$this->computeAverage(); $this->computeAverage();
} }
public function getPrecision() : array public function getPrecision(): array
{ {
return $this->precision; return $this->precision;
} }
public function getRecall() : array public function getRecall(): array
{ {
return $this->recall; return $this->recall;
} }
public function getF1score() : array public function getF1score(): array
{ {
return $this->f1score; return $this->f1score;
} }
public function getSupport() : array public function getSupport(): array
{ {
return $this->support; return $this->support;
} }
public function getAverage() : array public function getAverage(): array
{ {
return $this->average; return $this->average;
} }
@ -93,6 +93,7 @@ class ClassificationReport
$this->average[$metric] = 0.0; $this->average[$metric] = 0.0;
continue; continue;
} }
$this->average[$metric] = array_sum($values) / count($values); $this->average[$metric] = array_sum($values) / count($values);
} }
} }
@ -102,7 +103,8 @@ class ClassificationReport
*/ */
private function computePrecision(int $truePositive, int $falsePositive) private function computePrecision(int $truePositive, int $falsePositive)
{ {
if (0 == ($divider = $truePositive + $falsePositive)) { $divider = $truePositive + $falsePositive;
if ($divider == 0) {
return 0.0; return 0.0;
} }
@ -114,23 +116,25 @@ class ClassificationReport
*/ */
private function computeRecall(int $truePositive, int $falseNegative) private function computeRecall(int $truePositive, int $falseNegative)
{ {
if (0 == ($divider = $truePositive + $falseNegative)) { $divider = $truePositive + $falseNegative;
if ($divider == 0) {
return 0.0; return 0.0;
} }
return $truePositive / $divider; return $truePositive / $divider;
} }
private function computeF1Score(float $precision, float $recall) : float private function computeF1Score(float $precision, float $recall): float
{ {
if (0 == ($divider = $precision + $recall)) { $divider = $precision + $recall;
if ($divider == 0) {
return 0.0; return 0.0;
} }
return 2.0 * (($precision * $recall) / $divider); return 2.0 * (($precision * $recall) / $divider);
} }
private static function getLabelIndexedArray(array $actualLabels, array $predictedLabels) : array private static function getLabelIndexedArray(array $actualLabels, array $predictedLabels): array
{ {
$labels = array_values(array_unique(array_merge($actualLabels, $predictedLabels))); $labels = array_values(array_unique(array_merge($actualLabels, $predictedLabels)));
sort($labels); sort($labels);

View File

@ -6,7 +6,7 @@ namespace Phpml\Metric;
class ConfusionMatrix class ConfusionMatrix
{ {
public static function compute(array $actualLabels, array $predictedLabels, ?array $labels = null) : array public static function compute(array $actualLabels, array $predictedLabels, ?array $labels = null): array
{ {
$labels = $labels ? array_flip($labels) : self::getUniqueLabels($actualLabels); $labels = $labels ? array_flip($labels) : self::getUniqueLabels($actualLabels);
$matrix = self::generateMatrixWithZeros($labels); $matrix = self::generateMatrixWithZeros($labels);
@ -31,7 +31,7 @@ class ConfusionMatrix
return $matrix; return $matrix;
} }
private static function generateMatrixWithZeros(array $labels) : array private static function generateMatrixWithZeros(array $labels): array
{ {
$count = count($labels); $count = count($labels);
$matrix = []; $matrix = [];
@ -43,7 +43,7 @@ class ConfusionMatrix
return $matrix; return $matrix;
} }
private static function getUniqueLabels(array $labels) : array private static function getUniqueLabels(array $labels): array
{ {
$labels = array_values(array_unique($labels)); $labels = array_values(array_unique($labels));
sort($labels); sort($labels);

View File

@ -26,7 +26,7 @@ class ModelManager
} }
} }
public function restoreFromFile(string $filepath) : Estimator public function restoreFromFile(string $filepath): Estimator
{ {
if (!file_exists($filepath) || !is_readable($filepath)) { if (!file_exists($filepath) || !is_readable($filepath)) {
throw FileException::cantOpenFile(basename($filepath)); throw FileException::cantOpenFile(basename($filepath));

View File

@ -9,5 +9,5 @@ interface ActivationFunction
/** /**
* @param float|int $value * @param float|int $value
*/ */
public function compute($value) : float; public function compute($value): float;
} }

View File

@ -11,7 +11,7 @@ class BinaryStep implements ActivationFunction
/** /**
* @param float|int $value * @param float|int $value
*/ */
public function compute($value) : float public function compute($value): float
{ {
return $value >= 0 ? 1.0 : 0.0; return $value >= 0 ? 1.0 : 0.0;
} }

View File

@ -11,7 +11,7 @@ class Gaussian implements ActivationFunction
/** /**
* @param float|int $value * @param float|int $value
*/ */
public function compute($value) : float public function compute($value): float
{ {
return exp(-pow($value, 2)); return exp(-pow($value, 2));
} }

View File

@ -21,7 +21,7 @@ class HyperbolicTangent implements ActivationFunction
/** /**
* @param float|int $value * @param float|int $value
*/ */
public function compute($value) : float public function compute($value): float
{ {
return tanh($this->beta * $value); return tanh($this->beta * $value);
} }

View File

@ -21,7 +21,7 @@ class PReLU implements ActivationFunction
/** /**
* @param float|int $value * @param float|int $value
*/ */
public function compute($value) : float public function compute($value): float
{ {
return $value >= 0 ? $value : $this->beta * $value; return $value >= 0 ? $value : $this->beta * $value;
} }

View File

@ -21,7 +21,7 @@ class Sigmoid implements ActivationFunction
/** /**
* @param float|int $value * @param float|int $value
*/ */
public function compute($value) : float public function compute($value): float
{ {
return 1 / (1 + exp(-$this->beta * $value)); return 1 / (1 + exp(-$this->beta * $value));
} }

View File

@ -21,7 +21,7 @@ class ThresholdedReLU implements ActivationFunction
/** /**
* @param float|int $value * @param float|int $value
*/ */
public function compute($value) : float public function compute($value): float
{ {
return $value > $this->theta ? $value : 0.0; return $value > $this->theta ? $value : 0.0;
} }

View File

@ -28,20 +28,6 @@ class Layer
} }
} }
/**
* @param ActivationFunction|null $activationFunction
*
* @return Neuron
*/
private function createNode(string $nodeClass, ?ActivationFunction $activationFunction = null)
{
if (Neuron::class == $nodeClass) {
return new Neuron($activationFunction);
}
return new $nodeClass();
}
public function addNode(Node $node): void public function addNode(Node $node): void
{ {
$this->nodes[] = $node; $this->nodes[] = $node;
@ -50,8 +36,20 @@ class Layer
/** /**
* @return Node[] * @return Node[]
*/ */
public function getNodes() : array public function getNodes(): array
{ {
return $this->nodes; return $this->nodes;
} }
/**
* @return Neuron
*/
private function createNode(string $nodeClass, ?ActivationFunction $activationFunction = null): Node
{
if ($nodeClass == Neuron::class) {
return new Neuron($activationFunction);
}
return new $nodeClass();
}
} }

View File

@ -8,20 +8,15 @@ interface Network
{ {
/** /**
* @param mixed $input * @param mixed $input
*
* @return self
*/ */
public function setInput($input); public function setInput($input): self;
/** public function getOutput(): array;
* @return array
*/
public function getOutput() : array;
public function addLayer(Layer $layer); public function addLayer(Layer $layer);
/** /**
* @return Layer[] * @return Layer[]
*/ */
public function getLayers() : array; public function getLayers(): array;
} }

View File

@ -14,7 +14,7 @@ abstract class LayeredNetwork implements Network
/** /**
* @var Layer[] * @var Layer[]
*/ */
protected $layers; protected $layers = [];
public function addLayer(Layer $layer): void public function addLayer(Layer $layer): void
{ {
@ -24,7 +24,7 @@ abstract class LayeredNetwork implements Network
/** /**
* @return Layer[] * @return Layer[]
*/ */
public function getLayers() : array public function getLayers(): array
{ {
return $this->layers; return $this->layers;
} }
@ -39,7 +39,7 @@ abstract class LayeredNetwork implements Network
return $this->layers[count($this->layers) - 1]; return $this->layers[count($this->layers) - 1];
} }
public function getOutput() : array public function getOutput(): array
{ {
$result = []; $result = [];
foreach ($this->getOutputLayer()->getNodes() as $neuron) { foreach ($this->getOutputLayer()->getNodes() as $neuron) {
@ -54,7 +54,7 @@ abstract class LayeredNetwork implements Network
* *
* @return $this * @return $this
*/ */
public function setInput($input) public function setInput($input): Network
{ {
$firstLayer = $this->layers[0]; $firstLayer = $this->layers[0];

View File

@ -20,41 +20,36 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
{ {
use Predictable; use Predictable;
/**
* @var int
*/
private $inputLayerFeatures;
/**
* @var array
*/
private $hiddenLayers;
/** /**
* @var array * @var array
*/ */
protected $classes = []; protected $classes = [];
/**
* @var int
*/
private $iterations;
/** /**
* @var ActivationFunction * @var ActivationFunction
*/ */
protected $activationFunction; protected $activationFunction;
/**
* @var float
*/
private $learningRate;
/** /**
* @var Backpropagation * @var Backpropagation
*/ */
protected $backpropagation = null; protected $backpropagation = null;
/**
* @var int
*/
private $inputLayerFeatures;
/**
* @var array
*/
private $hiddenLayers = [];
/**
* @var float
*/
private $learningRate;
/** /**
* @throws InvalidArgumentException * @throws InvalidArgumentException
*/ */
@ -78,18 +73,6 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
$this->initNetwork(); $this->initNetwork();
} }
private function initNetwork(): void
{
$this->addInputLayer($this->inputLayerFeatures);
$this->addNeuronLayers($this->hiddenLayers, $this->activationFunction);
$this->addNeuronLayers([count($this->classes)], $this->activationFunction);
$this->addBiasNodes();
$this->generateSynapses();
$this->backpropagation = new Backpropagation($this->learningRate);
}
public function train(array $samples, array $targets): void public function train(array $samples, array $targets): void
{ {
$this->reset(); $this->reset();
@ -127,6 +110,18 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator,
$this->removeLayers(); $this->removeLayers();
} }
private function initNetwork(): void
{
$this->addInputLayer($this->inputLayerFeatures);
$this->addNeuronLayers($this->hiddenLayers, $this->activationFunction);
$this->addNeuronLayers([count($this->classes)], $this->activationFunction);
$this->addBiasNodes();
$this->generateSynapses();
$this->backpropagation = new Backpropagation($this->learningRate);
}
private function addInputLayer(int $nodes): void private function addInputLayer(int $nodes): void
{ {
$this->addLayer(new Layer($nodes, Input::class)); $this->addLayer(new Layer($nodes, Input::class));

View File

@ -6,5 +6,5 @@ namespace Phpml\NeuralNetwork;
interface Node interface Node
{ {
public function getOutput() : float; public function getOutput(): float;
} }

View File

@ -8,7 +8,7 @@ use Phpml\NeuralNetwork\Node;
class Bias implements Node class Bias implements Node
{ {
public function getOutput() : float public function getOutput(): float
{ {
return 1.0; return 1.0;
} }

View File

@ -18,7 +18,7 @@ class Input implements Node
$this->input = $input; $this->input = $input;
} }
public function getOutput() : float public function getOutput(): float
{ {
return $this->input; return $this->input;
} }

View File

@ -5,6 +5,7 @@ declare(strict_types=1);
namespace Phpml\NeuralNetwork\Node; namespace Phpml\NeuralNetwork\Node;
use Phpml\NeuralNetwork\ActivationFunction; use Phpml\NeuralNetwork\ActivationFunction;
use Phpml\NeuralNetwork\ActivationFunction\Sigmoid;
use Phpml\NeuralNetwork\Node; use Phpml\NeuralNetwork\Node;
use Phpml\NeuralNetwork\Node\Neuron\Synapse; use Phpml\NeuralNetwork\Node\Neuron\Synapse;
@ -13,7 +14,7 @@ class Neuron implements Node
/** /**
* @var Synapse[] * @var Synapse[]
*/ */
protected $synapses; protected $synapses = [];
/** /**
* @var ActivationFunction * @var ActivationFunction
@ -27,7 +28,7 @@ class Neuron implements Node
public function __construct(?ActivationFunction $activationFunction = null) public function __construct(?ActivationFunction $activationFunction = null)
{ {
$this->activationFunction = $activationFunction ?: new ActivationFunction\Sigmoid(); $this->activationFunction = $activationFunction ?: new Sigmoid();
$this->synapses = []; $this->synapses = [];
$this->output = 0; $this->output = 0;
} }
@ -45,9 +46,9 @@ class Neuron implements Node
return $this->synapses; return $this->synapses;
} }
public function getOutput() : float public function getOutput(): float
{ {
if (0 === $this->output) { if ($this->output === 0) {
$sum = 0; $sum = 0;
foreach ($this->synapses as $synapse) { foreach ($this->synapses as $synapse) {
$sum += $synapse->getOutput(); $sum += $synapse->getOutput();

View File

@ -27,12 +27,7 @@ class Synapse
$this->weight = $weight ?: $this->generateRandomWeight(); $this->weight = $weight ?: $this->generateRandomWeight();
} }
protected function generateRandomWeight() : float public function getOutput(): float
{
return 1 / random_int(5, 25) * (random_int(0, 1) ? -1 : 1);
}
public function getOutput() : float
{ {
return $this->weight * $this->node->getOutput(); return $this->weight * $this->node->getOutput();
} }
@ -42,7 +37,7 @@ class Synapse
$this->weight += $delta; $this->weight += $delta;
} }
public function getWeight() : float public function getWeight(): float
{ {
return $this->weight; return $this->weight;
} }
@ -51,4 +46,9 @@ class Synapse
{ {
return $this->node; return $this->node;
} }
protected function generateRandomWeight(): float
{
return 1 / random_int(5, 25) * (random_int(0, 1) ? -1 : 1);
}
} }

View File

@ -47,6 +47,7 @@ class Backpropagation
} }
} }
} }
$this->prevSigmas = $this->sigmas; $this->prevSigmas = $this->sigmas;
} }
@ -55,7 +56,7 @@ class Backpropagation
$this->prevSigmas = null; $this->prevSigmas = null;
} }
private function getSigma(Neuron $neuron, int $targetClass, int $key, bool $lastLayer) : float private function getSigma(Neuron $neuron, int $targetClass, int $key, bool $lastLayer): float
{ {
$neuronOutput = $neuron->getOutput(); $neuronOutput = $neuron->getOutput();
$sigma = $neuronOutput * (1 - $neuronOutput); $sigma = $neuronOutput * (1 - $neuronOutput);
@ -65,6 +66,7 @@ class Backpropagation
if ($targetClass === $key) { if ($targetClass === $key) {
$value = 1; $value = 1;
} }
$sigma *= ($value - $neuronOutput); $sigma *= ($value - $neuronOutput);
} else { } else {
$sigma *= $this->getPrevSigma($neuron); $sigma *= $this->getPrevSigma($neuron);
@ -75,7 +77,7 @@ class Backpropagation
return $sigma; return $sigma;
} }
private function getPrevSigma(Neuron $neuron) : float private function getPrevSigma(Neuron $neuron): float
{ {
$sigma = 0.0; $sigma = 0.0;

View File

@ -29,12 +29,12 @@ class Sigma
return $this->neuron; return $this->neuron;
} }
public function getSigma() : float public function getSigma(): float
{ {
return $this->sigma; return $this->sigma;
} }
public function getSigmaForNeuron(Neuron $neuron) : float public function getSigmaForNeuron(Neuron $neuron): float
{ {
$sigma = 0.0; $sigma = 0.0;

View File

@ -9,7 +9,7 @@ class Pipeline implements Estimator
/** /**
* @var array|Transformer[] * @var array|Transformer[]
*/ */
private $transformers; private $transformers = [];
/** /**
* @var Estimator * @var Estimator
@ -41,7 +41,7 @@ class Pipeline implements Estimator
/** /**
* @return array|Transformer[] * @return array|Transformer[]
*/ */
public function getTransformers() : array public function getTransformers(): array
{ {
return $this->transformers; return $this->transformers;
} }

View File

@ -9,6 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
class Imputer implements Preprocessor class Imputer implements Preprocessor
{ {
public const AXIS_COLUMN = 0; public const AXIS_COLUMN = 0;
public const AXIS_ROW = 1; public const AXIS_ROW = 1;
/** /**
@ -64,9 +65,9 @@ class Imputer implements Preprocessor
} }
} }
private function getAxis(int $column, array $currentSample) : array private function getAxis(int $column, array $currentSample): array
{ {
if (self::AXIS_ROW === $this->axis) { if ($this->axis === self::AXIS_ROW) {
return array_diff($currentSample, [$this->missingValue]); return array_diff($currentSample, [$this->missingValue]);
} }

View File

@ -9,7 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
class MeanStrategy implements Strategy class MeanStrategy implements Strategy
{ {
public function replaceValue(array $currentAxis) : float public function replaceValue(array $currentAxis): float
{ {
return Mean::arithmetic($currentAxis); return Mean::arithmetic($currentAxis);
} }

View File

@ -9,7 +9,7 @@ use Phpml\Preprocessing\Imputer\Strategy;
class MedianStrategy implements Strategy class MedianStrategy implements Strategy
{ {
public function replaceValue(array $currentAxis) : float public function replaceValue(array $currentAxis): float
{ {
return Mean::median($currentAxis); return Mean::median($currentAxis);
} }

View File

@ -11,7 +11,9 @@ use Phpml\Math\Statistic\StandardDeviation;
class Normalizer implements Preprocessor class Normalizer implements Preprocessor
{ {
public const NORM_L1 = 1; public const NORM_L1 = 1;
public const NORM_L2 = 2; public const NORM_L2 = 2;
public const NORM_STD = 3; public const NORM_STD = 3;
/** /**
@ -27,12 +29,12 @@ class Normalizer implements Preprocessor
/** /**
* @var array * @var array
*/ */
private $std; private $std = [];
/** /**
* @var array * @var array
*/ */
private $mean; private $mean = [];
/** /**
* @throws NormalizerException * @throws NormalizerException
@ -69,7 +71,7 @@ class Normalizer implements Preprocessor
$methods = [ $methods = [
self::NORM_L1 => 'normalizeL1', self::NORM_L1 => 'normalizeL1',
self::NORM_L2 => 'normalizeL2', self::NORM_L2 => 'normalizeL2',
self::NORM_STD => 'normalizeSTD' self::NORM_STD => 'normalizeSTD',
]; ];
$method = $methods[$this->norm]; $method = $methods[$this->norm];
@ -87,7 +89,7 @@ class Normalizer implements Preprocessor
$norm1 += abs($feature); $norm1 += abs($feature);
} }
if (0 == $norm1) { if ($norm1 == 0) {
$count = count($sample); $count = count($sample);
$sample = array_fill(0, $count, 1.0 / $count); $sample = array_fill(0, $count, 1.0 / $count);
} else { } else {
@ -103,9 +105,10 @@ class Normalizer implements Preprocessor
foreach ($sample as $feature) { foreach ($sample as $feature) {
$norm2 += $feature * $feature; $norm2 += $feature * $feature;
} }
$norm2 = sqrt((float) $norm2); $norm2 = sqrt((float) $norm2);
if (0 == $norm2) { if ($norm2 == 0) {
$sample = array_fill(0, count($sample), 1); $sample = array_fill(0, count($sample), 1);
} else { } else {
foreach ($sample as &$feature) { foreach ($sample as &$feature) {

View File

@ -28,7 +28,7 @@ class LeastSquares implements Regression
/** /**
* @var array * @var array
*/ */
private $coefficients; private $coefficients = [];
public function train(array $samples, array $targets): void public function train(array $samples, array $targets): void
{ {
@ -51,12 +51,12 @@ class LeastSquares implements Regression
return $result; return $result;
} }
public function getCoefficients() : array public function getCoefficients(): array
{ {
return $this->coefficients; return $this->coefficients;
} }
public function getIntercept() : float public function getIntercept(): float
{ {
return $this->intercept; return $this->intercept;
} }
@ -79,7 +79,7 @@ class LeastSquares implements Regression
/** /**
* Add one dimension for intercept calculation. * Add one dimension for intercept calculation.
*/ */
private function getSamplesMatrix() : Matrix private function getSamplesMatrix(): Matrix
{ {
$samples = []; $samples = [];
foreach ($this->samples as $sample) { foreach ($this->samples as $sample) {
@ -90,7 +90,7 @@ class LeastSquares implements Regression
return new Matrix($samples); return new Matrix($samples);
} }
private function getTargetsMatrix() : Matrix private function getTargetsMatrix(): Matrix
{ {
if (is_array($this->targets[0])) { if (is_array($this->targets[0])) {
return new Matrix($this->targets); return new Matrix($this->targets);

View File

@ -34,7 +34,7 @@ class DataTransformer
return $set; return $set;
} }
public static function predictions(string $rawPredictions, array $labels) : array public static function predictions(string $rawPredictions, array $labels): array
{ {
$numericLabels = self::numericLabels($labels); $numericLabels = self::numericLabels($labels);
$results = []; $results = [];
@ -47,7 +47,7 @@ class DataTransformer
return $results; return $results;
} }
public static function numericLabels(array $labels) : array public static function numericLabels(array $labels): array
{ {
$numericLabels = []; $numericLabels = [];
foreach ($labels as $label) { foreach ($labels as $label) {

View File

@ -167,7 +167,7 @@ class SupportVectorMachine
} }
/** /**
* @return array * @return array|string
*/ */
public function predict(array $samples) public function predict(array $samples)
{ {

View File

@ -6,5 +6,5 @@ namespace Phpml\Tokenization;
interface Tokenizer interface Tokenizer
{ {
public function tokenize(string $text) : array; public function tokenize(string $text): array;
} }

View File

@ -6,7 +6,7 @@ namespace Phpml\Tokenization;
class WhitespaceTokenizer implements Tokenizer class WhitespaceTokenizer implements Tokenizer
{ {
public function tokenize(string $text) : array public function tokenize(string $text): array
{ {
return preg_split('/[\pZ\pC]+/u', $text, -1, PREG_SPLIT_NO_EMPTY); return preg_split('/[\pZ\pC]+/u', $text, -1, PREG_SPLIT_NO_EMPTY);
} }

View File

@ -6,7 +6,7 @@ namespace Phpml\Tokenization;
class WordTokenizer implements Tokenizer class WordTokenizer implements Tokenizer
{ {
public function tokenize(string $text) : array public function tokenize(string $text): array
{ {
$tokens = []; $tokens = [];
preg_match_all('/\w\w+/u', $text, $tokens); preg_match_all('/\w\w+/u', $text, $tokens);

View File

@ -7,6 +7,7 @@ namespace tests\Phpml\Classification;
use Phpml\Association\Apriori; use Phpml\Association\Apriori;
use Phpml\ModelManager; use Phpml\ModelManager;
use PHPUnit\Framework\TestCase; use PHPUnit\Framework\TestCase;
use ReflectionClass;
class AprioriTest extends TestCase class AprioriTest extends TestCase
{ {
@ -172,7 +173,6 @@ class AprioriTest extends TestCase
/** /**
* Invokes objects method. Private/protected will be set accessible. * Invokes objects method. Private/protected will be set accessible.
* *
* @param object &$object Instantiated object to be called on
* @param string $method Method name to be called * @param string $method Method name to be called
* @param array $params Array of params to be passed * @param array $params Array of params to be passed
* *
@ -180,7 +180,7 @@ class AprioriTest extends TestCase
*/ */
public function invoke(&$object, $method, array $params = []) public function invoke(&$object, $method, array $params = [])
{ {
$reflection = new \ReflectionClass(get_class($object)); $reflection = new ReflectionClass(get_class($object));
$method = $reflection->getMethod($method); $method = $reflection->getMethod($method);
$method->setAccessible(true); $method->setAccessible(true);
@ -195,7 +195,7 @@ class AprioriTest extends TestCase
$testSamples = [['alpha', 'epsilon'], ['beta', 'theta']]; $testSamples = [['alpha', 'epsilon'], ['beta', 'theta']];
$predicted = $classifier->predict($testSamples); $predicted = $classifier->predict($testSamples);
$filename = 'apriori-test-'.rand(100, 999).'-'.uniqid(); $filename = 'apriori-test-'.random_int(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename); $filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager(); $modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath); $modelManager->saveToFile($classifier, $filepath);

View File

@ -24,7 +24,7 @@ class DecisionTreeTest extends TestCase
['sunny', 75, 70, 'true', 'Play'], ['sunny', 75, 70, 'true', 'Play'],
['overcast', 72, 90, 'true', 'Play'], ['overcast', 72, 90, 'true', 'Play'],
['overcast', 81, 75, 'false', 'Play'], ['overcast', 81, 75, 'false', 'Play'],
['rain', 71, 80, 'true', 'Dont_play'] ['rain', 71, 80, 'true', 'Dont_play'],
]; ];
private $extraData = [ private $extraData = [
@ -32,16 +32,6 @@ class DecisionTreeTest extends TestCase
['scorching', 100, 93, 'true', 'Dont_play'], ['scorching', 100, 93, 'true', 'Dont_play'],
]; ];
private function getData($input)
{
$targets = array_column($input, 4);
array_walk($input, function (&$v): void {
array_splice($v, 4, 1);
});
return [$input, $targets];
}
public function testPredictSingleSample() public function testPredictSingleSample()
{ {
[$data, $targets] = $this->getData($this->data); [$data, $targets] = $this->getData($this->data);
@ -68,7 +58,7 @@ class DecisionTreeTest extends TestCase
$testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']]; $testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']];
$predicted = $classifier->predict($testSamples); $predicted = $classifier->predict($testSamples);
$filename = 'decision-tree-test-'.rand(100, 999).'-'.uniqid(); $filename = 'decision-tree-test-'.random_int(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename); $filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager(); $modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath); $modelManager->saveToFile($classifier, $filepath);
@ -83,6 +73,16 @@ class DecisionTreeTest extends TestCase
[$data, $targets] = $this->getData($this->data); [$data, $targets] = $this->getData($this->data);
$classifier = new DecisionTree(5); $classifier = new DecisionTree(5);
$classifier->train($data, $targets); $classifier->train($data, $targets);
$this->assertTrue(5 >= $classifier->actualDepth); $this->assertTrue($classifier->actualDepth <= 5);
}
private function getData($input)
{
$targets = array_column($input, 4);
array_walk($input, function (&$v): void {
array_splice($v, 4, 1);
});
return [$input, $targets];
} }
} }

Some files were not shown because too many files have changed in this diff Show More