diff --git a/.php_cs b/.php_cs index 0ea0c2a..9fb8baa 100644 --- a/.php_cs +++ b/.php_cs @@ -3,11 +3,25 @@ return PhpCsFixer\Config::create() ->setRules([ '@PSR2' => true, - 'declare_strict_types' => true, 'array_syntax' => ['syntax' => 'short'], + 'binary_operator_spaces' => ['align_double_arrow' => false, 'align_equals' => false], 'blank_line_after_opening_tag' => true, + 'blank_line_before_return' => true, + 'cast_spaces' => true, + 'concat_space' => ['spacing' => 'none'], + 'declare_strict_types' => true, + 'method_separation' => true, + 'no_blank_lines_after_class_opening' => true, + 'no_spaces_around_offset' => ['positions' => ['inside', 'outside']], + 'no_unneeded_control_parentheses' => true, + 'no_unused_imports' => true, + 'phpdoc_align' => true, + 'phpdoc_no_access' => true, + 'phpdoc_separation' => true, + 'pre_increment' => true, + 'single_quote' => true, + 'trim_array_spaces' => true, 'single_blank_line_before_namespace' => true, - 'no_unused_imports' => true ]) ->setFinder( PhpCsFixer\Finder::create() diff --git a/src/Phpml/Classification/DecisionTree.php b/src/Phpml/Classification/DecisionTree.php index c0c71f3..6cf6870 100644 --- a/src/Phpml/Classification/DecisionTree.php +++ b/src/Phpml/Classification/DecisionTree.php @@ -144,7 +144,7 @@ class DecisionTree implements Classifier // otherwise group the records so that we can classify the leaf // in case maximum depth is reached $leftRecords = []; - $rightRecords= []; + $rightRecords = []; $remainingTargets = []; $prevRecord = null; $allSame = true; @@ -162,7 +162,7 @@ class DecisionTree implements Classifier if ($split->evaluate($record)) { $leftRecords[] = $recordNo; } else { - $rightRecords[]= $recordNo; + $rightRecords[] = $recordNo; } // Group remaining targets @@ -183,7 +183,7 @@ class DecisionTree implements Classifier $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); } if ($rightRecords) { - $split->rightLeaf= $this->getSplitLeaf($rightRecords, $depth + 1); + $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); } } diff --git a/src/Phpml/Classification/DecisionTree/DecisionTreeLeaf.php b/src/Phpml/Classification/DecisionTree/DecisionTreeLeaf.php index 787108f..53c3386 100644 --- a/src/Phpml/Classification/DecisionTree/DecisionTreeLeaf.php +++ b/src/Phpml/Classification/DecisionTree/DecisionTreeLeaf.php @@ -34,7 +34,7 @@ class DecisionTreeLeaf /** * @var DecisionTreeLeaf */ - public $rightLeaf= null; + public $rightLeaf = null; /** * @var array @@ -71,6 +71,7 @@ class DecisionTreeLeaf /** * @param array $record + * * @return bool */ public function evaluate($record) @@ -79,9 +80,10 @@ class DecisionTreeLeaf if ($this->isContinuous) { $op = $this->operator; - $value= $this->numericValue; + $value = $this->numericValue; $recordField = strval($recordField); eval("\$result = $recordField $op $value;"); + return $result; } @@ -102,16 +104,16 @@ class DecisionTreeLeaf return 0.0; } - $nodeSampleCount = (float)count($this->records); + $nodeSampleCount = (float) count($this->records); $iT = $this->giniIndex; if ($this->leftLeaf) { - $pL = count($this->leftLeaf->records)/$nodeSampleCount; + $pL = count($this->leftLeaf->records) / $nodeSampleCount; $iT -= $pL * $this->leftLeaf->giniIndex; } if ($this->rightLeaf) { - $pR = count($this->rightLeaf->records)/$nodeSampleCount; + $pR = count($this->rightLeaf->records) / $nodeSampleCount; $iT -= $pR * $this->rightLeaf->giniIndex; } @@ -122,6 +124,7 @@ class DecisionTreeLeaf * Returns HTML representation of the node including children nodes * * @param $columnNames + * * @return string */ public function getHTML($columnNames = null) @@ -135,29 +138,34 @@ class DecisionTreeLeaf } else { $col = "col_$this->columnIndex"; } - if (!preg_match("/^[<>=]{1,2}/", $value)) { + if (!preg_match('/^[<>=]{1,2}/', $value)) { $value = "=$value"; } - $value = "$col $value
Gini: ". number_format($this->giniIndex, 2); + $value = "$col $value
Gini: ".number_format($this->giniIndex, 2); } - $str = ""; + + $str = "
- $value
"; + if ($this->leftLeaf || $this->rightLeaf) { - $str .=''; + $str .= ''; if ($this->leftLeaf) { - $str .=""; + $str .= ''; } else { - $str .=''; + $str .= ''; } - $str .=''; + + $str .= ''; if ($this->rightLeaf) { - $str .=""; + $str .= ''; } else { - $str .=''; + $str .= ''; } + $str .= ''; } + $str .= '
$value
| Yes
" . $this->leftLeaf->getHTML($columnNames) . "
| Yes
'.$this->leftLeaf->getHTML($columnNames).'
  No |
" . $this->rightLeaf->getHTML($columnNames) . "
No |
'.$this->rightLeaf->getHTML($columnNames).'
'; + return $str; } diff --git a/src/Phpml/Classification/Ensemble/AdaBoost.php b/src/Phpml/Classification/Ensemble/AdaBoost.php index 38571da..95daf49 100644 --- a/src/Phpml/Classification/Ensemble/AdaBoost.php +++ b/src/Phpml/Classification/Ensemble/AdaBoost.php @@ -18,6 +18,7 @@ class AdaBoost implements Classifier /** * Actual labels given in the targets array + * * @var array */ protected $labels = []; @@ -86,7 +87,7 @@ class AdaBoost implements Classifier * Sets the base classifier that will be used for boosting (default = DecisionStump) * * @param string $baseClassifier - * @param array $classifierOptions + * @param array $classifierOptions */ public function setBaseClassifier(string $baseClassifier = DecisionStump::class, array $classifierOptions = []) { @@ -105,7 +106,7 @@ class AdaBoost implements Classifier // Initialize usual variables $this->labels = array_keys(array_count_values($targets)); if (count($this->labels) != 2) { - throw new \Exception("AdaBoost is a binary classifier and can classify between two classes only"); + throw new \Exception('AdaBoost is a binary classifier and can classify between two classes only'); } // Set all target values to either -1 or 1 @@ -175,14 +176,14 @@ class AdaBoost implements Classifier { $weights = $this->weights; $std = StandardDeviation::population($weights); - $mean= Mean::arithmetic($weights); + $mean = Mean::arithmetic($weights); $min = min($weights); - $minZ= (int)round(($min - $mean) / $std); + $minZ = (int) round(($min - $mean) / $std); $samples = []; $targets = []; foreach ($weights as $index => $weight) { - $z = (int)round(($weight - $mean) / $std) - $minZ + 1; + $z = (int) round(($weight - $mean) / $std) - $minZ + 1; for ($i = 0; $i < $z; ++$i) { if (rand(0, 1) == 0) { continue; @@ -220,6 +221,7 @@ class AdaBoost implements Classifier * Calculates alpha of a classifier * * @param float $errorRate + * * @return float */ protected function calculateAlpha(float $errorRate) @@ -227,6 +229,7 @@ class AdaBoost implements Classifier if ($errorRate == 0) { $errorRate = 1e-10; } + return 0.5 * log((1 - $errorRate) / $errorRate); } @@ -234,7 +237,7 @@ class AdaBoost implements Classifier * Updates the sample weights * * @param Classifier $classifier - * @param float $alpha + * @param float $alpha */ protected function updateWeights(Classifier $classifier, float $alpha) { @@ -254,6 +257,7 @@ class AdaBoost implements Classifier /** * @param array $sample + * * @return mixed */ public function predictSample(array $sample) @@ -264,6 +268,6 @@ class AdaBoost implements Classifier $sum += $h * $alpha; } - return $this->labels[ $sum > 0 ? 1 : -1]; + return $this->labels[$sum > 0 ? 1 : -1]; } } diff --git a/src/Phpml/Classification/Ensemble/Bagging.php b/src/Phpml/Classification/Ensemble/Bagging.php index 1af155d..716a6bc 100644 --- a/src/Phpml/Classification/Ensemble/Bagging.php +++ b/src/Phpml/Classification/Ensemble/Bagging.php @@ -84,10 +84,11 @@ class Bagging implements Classifier public function setSubsetRatio(float $ratio) { if ($ratio < 0.1 || $ratio > 1.0) { - throw new \Exception("Subset ratio should be between 0.1 and 1.0"); + throw new \Exception('Subset ratio should be between 0.1 and 1.0'); } $this->subsetRatio = $ratio; + return $this; } @@ -100,7 +101,7 @@ class Bagging implements Classifier * names are neglected. * * @param string $classifier - * @param array $classifierOptions + * @param array $classifierOptions * * @return $this */ @@ -135,6 +136,7 @@ class Bagging implements Classifier /** * @param int $index + * * @return array */ protected function getRandomSubset(int $index) @@ -168,6 +170,7 @@ class Bagging implements Classifier $classifiers[] = $this->initSingleClassifier($obj); } + return $classifiers; } @@ -183,6 +186,7 @@ class Bagging implements Classifier /** * @param array $sample + * * @return mixed */ protected function predictSample(array $sample) @@ -196,6 +200,7 @@ class Bagging implements Classifier $counts = array_count_values($predictions); arsort($counts); reset($counts); + return key($counts); } } diff --git a/src/Phpml/Classification/Ensemble/RandomForest.php b/src/Phpml/Classification/Ensemble/RandomForest.php index 7849cd8..e6677cb 100644 --- a/src/Phpml/Classification/Ensemble/RandomForest.php +++ b/src/Phpml/Classification/Ensemble/RandomForest.php @@ -50,7 +50,7 @@ class RandomForest extends Bagging public function setFeatureSubsetRatio($ratio) { if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) { - throw new \Exception("When a float given, feature subset ratio should be between 0.1 and 1.0"); + throw new \Exception('When a float given, feature subset ratio should be between 0.1 and 1.0'); } if (is_string($ratio) && $ratio != 'sqrt' && $ratio != 'log') { @@ -58,6 +58,7 @@ class RandomForest extends Bagging } $this->featureSubsetRatio = $ratio; + return $this; } @@ -74,7 +75,7 @@ class RandomForest extends Bagging public function setClassifer(string $classifier, array $classifierOptions = []) { if ($classifier != DecisionTree::class) { - throw new \Exception("RandomForest can only use DecisionTree as base classifier"); + throw new \Exception('RandomForest can only use DecisionTree as base classifier'); } return parent::setClassifer($classifier, $classifierOptions); @@ -120,6 +121,7 @@ class RandomForest extends Bagging * when trying to print some information about the trees such as feature importances * * @param array $names + * * @return $this */ public function setColumnNames(array $names) @@ -137,11 +139,11 @@ class RandomForest extends Bagging protected function initSingleClassifier($classifier) { if (is_float($this->featureSubsetRatio)) { - $featureCount = (int)($this->featureSubsetRatio * $this->featureCount); + $featureCount = (int) ($this->featureSubsetRatio * $this->featureCount); } elseif ($this->featureCount == 'sqrt') { - $featureCount = (int)sqrt($this->featureCount) + 1; + $featureCount = (int) sqrt($this->featureCount) + 1; } else { - $featureCount = (int)log($this->featureCount, 2) + 1; + $featureCount = (int) log($this->featureCount, 2) + 1; } if ($featureCount >= $this->featureCount) { diff --git a/src/Phpml/Classification/Linear/Adaline.php b/src/Phpml/Classification/Linear/Adaline.php index df648e8..d10fff4 100644 --- a/src/Phpml/Classification/Linear/Adaline.php +++ b/src/Phpml/Classification/Linear/Adaline.php @@ -9,12 +9,12 @@ class Adaline extends Perceptron /** * Batch training is the default Adaline training algorithm */ - const BATCH_TRAINING = 1; + const BATCH_TRAINING = 1; /** * Online training: Stochastic gradient descent learning */ - const ONLINE_TRAINING = 2; + const ONLINE_TRAINING = 2; /** * Training type may be either 'Batch' or 'Online' learning @@ -46,7 +46,7 @@ class Adaline extends Perceptron int $trainingType = self::BATCH_TRAINING ) { if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) { - throw new \Exception("Adaline can only be trained with batch and online/stochastic gradient descent algorithm"); + throw new \Exception('Adaline can only be trained with batch and online/stochastic gradient descent algorithm'); } $this->trainingType = $trainingType; diff --git a/src/Phpml/Classification/Linear/DecisionStump.php b/src/Phpml/Classification/Linear/DecisionStump.php index 5a3247f..776a6a2 100644 --- a/src/Phpml/Classification/Linear/DecisionStump.php +++ b/src/Phpml/Classification/Linear/DecisionStump.php @@ -106,7 +106,7 @@ class DecisionStump extends WeightedClassifier if ($this->weights) { $numWeights = count($this->weights); if ($numWeights != count($samples)) { - throw new \Exception("Number of sample weights does not match with number of samples"); + throw new \Exception('Number of sample weights does not match with number of samples'); } } else { $this->weights = array_fill(0, count($samples), 1); @@ -163,7 +163,7 @@ class DecisionStump extends WeightedClassifier * * @param array $samples * @param array $targets - * @param int $col + * @param int $col * * @return array */ @@ -192,8 +192,8 @@ class DecisionStump extends WeightedClassifier } // Try other possible points one by one - for ($step = $minValue; $step <= $maxValue; $step+= $stepSize) { - $threshold = (float)$step; + for ($step = $minValue; $step <= $maxValue; $step += $stepSize) { + $threshold = (float) $step; list($errorRate, $prob) = $this->calculateErrorRate($targets, $threshold, $operator, $values); if ($errorRate < $split['trainingErrorRate']) { $split = ['value' => $threshold, 'operator' => $operator, @@ -209,7 +209,7 @@ class DecisionStump extends WeightedClassifier /** * @param array $samples * @param array $targets - * @param int $col + * @param int $col * * @return array */ @@ -217,7 +217,7 @@ class DecisionStump extends WeightedClassifier { $values = array_column($samples, $col); $valueCounts = array_count_values($values); - $distinctVals= array_keys($valueCounts); + $distinctVals = array_keys($valueCounts); $split = null; @@ -236,7 +236,6 @@ class DecisionStump extends WeightedClassifier return $split; } - /** * * @param mixed $leftValue @@ -264,10 +263,10 @@ class DecisionStump extends WeightedClassifier * Calculates the ratio of wrong predictions based on the new threshold * value given as the parameter * - * @param array $targets - * @param float $threshold + * @param array $targets + * @param float $threshold * @param string $operator - * @param array $values + * @param array $values * * @return array */ @@ -276,7 +275,7 @@ class DecisionStump extends WeightedClassifier $wrong = 0.0; $prob = []; $leftLabel = $this->binaryLabels[0]; - $rightLabel= $this->binaryLabels[1]; + $rightLabel = $this->binaryLabels[1]; foreach ($values as $index => $value) { if ($this->evaluate($value, $operator, $threshold)) { @@ -299,7 +298,7 @@ class DecisionStump extends WeightedClassifier // Calculate probabilities: Proportion of labels in each leaf $dist = array_combine($this->binaryLabels, array_fill(0, 2, 0.0)); foreach ($prob as $leaf => $counts) { - $leafTotal = (float)array_sum($prob[$leaf]); + $leafTotal = (float) array_sum($prob[$leaf]); foreach ($counts as $label => $count) { if (strval($leaf) == strval($label)) { $dist[$leaf] = $count / $leafTotal; @@ -357,8 +356,8 @@ class DecisionStump extends WeightedClassifier */ public function __toString() { - return "IF $this->column $this->operator $this->value " . - "THEN " . $this->binaryLabels[0] . " ". - "ELSE " . $this->binaryLabels[1]; + return "IF $this->column $this->operator $this->value ". + 'THEN '.$this->binaryLabels[0].' '. + 'ELSE '.$this->binaryLabels[1]; } } diff --git a/src/Phpml/Classification/Linear/LogisticRegression.php b/src/Phpml/Classification/Linear/LogisticRegression.php index 90ef4d1..0447ef8 100644 --- a/src/Phpml/Classification/Linear/LogisticRegression.php +++ b/src/Phpml/Classification/Linear/LogisticRegression.php @@ -59,9 +59,9 @@ class LogisticRegression extends Adaline * * Penalty (Regularization term) can be 'L2' or empty string to cancel penalty term * - * @param int $maxIterations - * @param bool $normalizeInputs - * @param int $trainingType + * @param int $maxIterations + * @param bool $normalizeInputs + * @param int $trainingType * @param string $cost * @param string $penalty * @@ -76,13 +76,13 @@ class LogisticRegression extends Adaline ) { $trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING); if (!in_array($trainingType, $trainingTypes)) { - throw new \Exception("Logistic regression can only be trained with " . - "batch (gradient descent), online (stochastic gradient descent) " . - "or conjugate batch (conjugate gradients) algorithms"); + throw new \Exception('Logistic regression can only be trained with '. + 'batch (gradient descent), online (stochastic gradient descent) '. + 'or conjugate batch (conjugate gradients) algorithms'); } if (!in_array($cost, ['log', 'sse'])) { - throw new \Exception("Logistic regression cost function can be one of the following: \n" . + throw new \Exception("Logistic regression cost function can be one of the following: \n". "'log' for log-likelihood and 'sse' for sum of squared errors"); } @@ -290,6 +290,7 @@ class LogisticRegression extends Adaline if (strval($predicted) == strval($label)) { $sample = $this->checkNormalizedSample($sample); + return abs($this->output($sample) - 0.5); } diff --git a/src/Phpml/Classification/Linear/Perceptron.php b/src/Phpml/Classification/Linear/Perceptron.php index 145a992..000059f 100644 --- a/src/Phpml/Classification/Linear/Perceptron.php +++ b/src/Phpml/Classification/Linear/Perceptron.php @@ -74,11 +74,11 @@ class Perceptron implements Classifier, IncrementalEstimator public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, bool $normalizeInputs = true) { if ($learningRate <= 0.0 || $learningRate > 1.0) { - throw new \Exception("Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)"); + throw new \Exception('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)'); } if ($maxIterations <= 0) { - throw new \Exception("Maximum number of iterations must be an integer greater than 0"); + throw new \Exception('Maximum number of iterations must be an integer greater than 0'); } if ($normalizeInputs) { @@ -175,7 +175,7 @@ class Perceptron implements Classifier, IncrementalEstimator $prediction = $this->outputClass($sample); $gradient = $prediction - $target; - $error = $gradient**2; + $error = $gradient ** 2; return [$error, $gradient]; }; @@ -231,6 +231,7 @@ class Perceptron implements Classifier, IncrementalEstimator * Calculates net output of the network as a float value for the given input * * @param array $sample + * * @return int */ protected function output(array $sample) @@ -251,6 +252,7 @@ class Perceptron implements Classifier, IncrementalEstimator * Returns the class value (either -1 or 1) for the given input * * @param array $sample + * * @return int */ protected function outputClass(array $sample) @@ -275,6 +277,7 @@ class Perceptron implements Classifier, IncrementalEstimator if (strval($predicted) == strval($label)) { $sample = $this->checkNormalizedSample($sample); + return abs($this->output($sample)); } diff --git a/src/Phpml/Classification/MLPClassifier.php b/src/Phpml/Classification/MLPClassifier.php index 7f9043b..dfb5394 100644 --- a/src/Phpml/Classification/MLPClassifier.php +++ b/src/Phpml/Classification/MLPClassifier.php @@ -9,7 +9,6 @@ use Phpml\NeuralNetwork\Network\MultilayerPerceptron; class MLPClassifier extends MultilayerPerceptron implements Classifier { - /** * @param mixed $target * @@ -22,6 +21,7 @@ class MLPClassifier extends MultilayerPerceptron implements Classifier if (!in_array($target, $this->classes)) { throw InvalidArgumentException::invalidTarget($target); } + return array_search($target, $this->classes); } @@ -42,6 +42,7 @@ class MLPClassifier extends MultilayerPerceptron implements Classifier $max = $value; } } + return $this->classes[$predictedClass]; } diff --git a/src/Phpml/Classification/NaiveBayes.php b/src/Phpml/Classification/NaiveBayes.php index 1a634da..8daaf86 100644 --- a/src/Phpml/Classification/NaiveBayes.php +++ b/src/Phpml/Classification/NaiveBayes.php @@ -13,8 +13,8 @@ class NaiveBayes implements Classifier { use Trainable, Predictable; - const CONTINUOS = 1; - const NOMINAL = 2; + const CONTINUOS = 1; + const NOMINAL = 2; const EPSILON = 1e-10; /** @@ -25,7 +25,7 @@ class NaiveBayes implements Classifier /** * @var array */ - private $mean= []; + private $mean = []; /** * @var array @@ -80,13 +80,14 @@ class NaiveBayes implements Classifier /** * Calculates vital statistics for each label & feature. Stores these * values in private array in order to avoid repeated calculation + * * @param string $label - * @param array $samples + * @param array $samples */ private function calculateStatistics($label, $samples) { $this->std[$label] = array_fill(0, $this->featureCount, 0); - $this->mean[$label]= array_fill(0, $this->featureCount, 0); + $this->mean[$label] = array_fill(0, $this->featureCount, 0); $this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); $this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); for ($i = 0; $i < $this->featureCount; ++$i) { @@ -128,10 +129,11 @@ class NaiveBayes implements Classifier $this->discreteProb[$label][$feature][$value] == 0) { return self::EPSILON; } + return $this->discreteProb[$label][$feature][$value]; } $std = $this->std[$label][$feature] ; - $mean= $this->mean[$label][$feature]; + $mean = $this->mean[$label][$feature]; // Calculate the probability density by use of normal/Gaussian distribution // Ref: https://en.wikipedia.org/wiki/Normal_distribution // @@ -139,8 +141,9 @@ class NaiveBayes implements Classifier // some libraries adopt taking log of calculations such as // scikit-learn did. // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) - $pdf = -0.5 * log(2.0 * pi() * $std * $std); + $pdf = -0.5 * log(2.0 * pi() * $std * $std); $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); + return $pdf; } @@ -159,11 +162,13 @@ class NaiveBayes implements Classifier $samples[] = $this->samples[$i]; } } + return $samples; } /** * @param array $sample + * * @return mixed */ protected function predictSample(array $sample) @@ -174,7 +179,7 @@ class NaiveBayes implements Classifier $predictions = []; foreach ($this->labels as $label) { $p = $this->p[$label]; - for ($i = 0; $i<$this->featureCount; ++$i) { + for ($i = 0; $i < $this->featureCount; ++$i) { $Plf = $this->sampleProbability($sample, $i, $label); $p += $Plf; } @@ -183,6 +188,7 @@ class NaiveBayes implements Classifier arsort($predictions, SORT_NUMERIC); reset($predictions); + return key($predictions); } } diff --git a/src/Phpml/Clustering/FuzzyCMeans.php b/src/Phpml/Clustering/FuzzyCMeans.php index c6a3c46..da1398e 100644 --- a/src/Phpml/Clustering/FuzzyCMeans.php +++ b/src/Phpml/Clustering/FuzzyCMeans.php @@ -58,10 +58,10 @@ class FuzzyCMeans implements Clusterer private $samples; /** - * @param int $clustersNumber + * @param int $clustersNumber * @param float $fuzziness * @param float $epsilon - * @param int $maxIterations + * @param int $maxIterations * * @throws InvalidArgumentException */ @@ -159,6 +159,7 @@ class FuzzyCMeans implements Clusterer * * @param int $row * @param int $col + * * @return float */ protected function getDistanceCalc(int $row, int $col) @@ -179,6 +180,7 @@ class FuzzyCMeans implements Clusterer $val = pow($dist1 / $dist2, 2.0 / ($this->fuzziness - 1)); $sum += $val; } + return $sum; } @@ -212,13 +214,14 @@ class FuzzyCMeans implements Clusterer /** * @param array|Point[] $samples + * * @return array */ public function cluster(array $samples) { // Initialize variables, clusters and membership matrix $this->sampleCount = count($samples); - $this->samples =& $samples; + $this->samples = &$samples; $this->space = new Space(count($samples[0])); $this->initClusters(); diff --git a/src/Phpml/Clustering/KMeans/Space.php b/src/Phpml/Clustering/KMeans/Space.php index 0276880..14c1760 100644 --- a/src/Phpml/Clustering/KMeans/Space.php +++ b/src/Phpml/Clustering/KMeans/Space.php @@ -66,7 +66,7 @@ class Space extends SplObjectStorage /** * @param Point $point - * @param null $data + * @param null $data */ public function attach($point, $data = null) { diff --git a/src/Phpml/DimensionReduction/EigenTransformerBase.php b/src/Phpml/DimensionReduction/EigenTransformerBase.php index 6c0ef05..5e27a13 100644 --- a/src/Phpml/DimensionReduction/EigenTransformerBase.php +++ b/src/Phpml/DimensionReduction/EigenTransformerBase.php @@ -54,7 +54,7 @@ abstract class EigenTransformerBase { $eig = new EigenvalueDecomposition($matrix); $eigVals = $eig->getRealEigenvalues(); - $eigVects= $eig->getEigenvectors(); + $eigVects = $eig->getEigenvectors(); $totalEigVal = array_sum($eigVals); // Sort eigenvalues in descending order diff --git a/src/Phpml/DimensionReduction/KernelPCA.php b/src/Phpml/DimensionReduction/KernelPCA.php index 94e18c9..908c441 100644 --- a/src/Phpml/DimensionReduction/KernelPCA.php +++ b/src/Phpml/DimensionReduction/KernelPCA.php @@ -44,10 +44,10 @@ class KernelPCA extends PCA * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0.
* This transformation will return the same number of rows with only 2 columns. * - * @param int $kernel + * @param int $kernel * @param float $totalVariance Total variance to be preserved if numFeatures is not given - * @param int $numFeatures Number of columns to be returned - * @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels + * @param int $numFeatures Number of columns to be returned + * @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels * * @throws \Exception */ @@ -55,7 +55,7 @@ class KernelPCA extends PCA { $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR]; if (!in_array($kernel, $availableKernels)) { - throw new \Exception("KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian"); + throw new \Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian'); } parent::__construct($totalVariance, $numFeatures); @@ -133,7 +133,7 @@ class KernelPCA extends PCA */ protected function centerMatrix(array $matrix, int $n) { - $N = array_fill(0, $n, array_fill(0, $n, 1.0/$n)); + $N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n)); $N = new Matrix($N, false); $K = new Matrix($matrix, false); @@ -168,6 +168,7 @@ class KernelPCA extends PCA case self::KERNEL_RBF: // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance $dist = new Euclidean(); + return function ($x, $y) use ($dist) { return exp(-$this->gamma * $dist->sqDistance($x, $y)); }; @@ -176,12 +177,14 @@ class KernelPCA extends PCA // k(x,y)=tanh(γ.xT.y+c0) where c0=1 return function ($x, $y) { $res = Matrix::dot($x, $y)[0] + 1.0; + return tanh($this->gamma * $res); }; case self::KERNEL_LAPLACIAN: // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance $dist = new Manhattan(); + return function ($x, $y) use ($dist) { return exp(-$this->gamma * $dist->distance($x, $y)); }; @@ -241,11 +244,11 @@ class KernelPCA extends PCA public function transform(array $sample) { if (!$this->fit) { - throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first"); + throw new \Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first'); } if (is_array($sample[0])) { - throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays"); + throw new \Exception('KernelPCA::transform() accepts only one-dimensional arrays'); } $pairs = $this->getDistancePairs($sample); diff --git a/src/Phpml/DimensionReduction/LDA.php b/src/Phpml/DimensionReduction/LDA.php index e094c35..a2df627 100644 --- a/src/Phpml/DimensionReduction/LDA.php +++ b/src/Phpml/DimensionReduction/LDA.php @@ -43,20 +43,20 @@ class LDA extends EigenTransformerBase * or numFeatures (number of features in the dataset) to be preserved. * * @param float|null $totalVariance Total explained variance to be preserved - * @param int|null $numFeatures Number of features to be preserved + * @param int|null $numFeatures Number of features to be preserved * * @throws \Exception */ public function __construct($totalVariance = null, $numFeatures = null) { if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { - throw new \Exception("Total variance can be a value between 0.1 and 0.99"); + throw new \Exception('Total variance can be a value between 0.1 and 0.99'); } if ($numFeatures !== null && $numFeatures <= 0) { - throw new \Exception("Number of features to be preserved should be greater than 0"); + throw new \Exception('Number of features to be preserved should be greater than 0'); } if ($totalVariance !== null && $numFeatures !== null) { - throw new \Exception("Either totalVariance or numFeatures should be specified in order to run the algorithm"); + throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm'); } if ($numFeatures !== null) { @@ -78,7 +78,7 @@ class LDA extends EigenTransformerBase public function fit(array $data, array $classes) : array { $this->labels = $this->getLabels($classes); - $this->means = $this->calculateMeans($data, $classes); + $this->means = $this->calculateMeans($data, $classes); $sW = $this->calculateClassVar($data, $classes); $sB = $this->calculateClassCov(); @@ -105,7 +105,6 @@ class LDA extends EigenTransformerBase return array_keys($counts); } - /** * Calculates mean of each column for each class and returns * n by m matrix where n is number of labels and m is number of columns @@ -118,7 +117,7 @@ class LDA extends EigenTransformerBase protected function calculateMeans(array $data, array $classes) : array { $means = []; - $counts= []; + $counts = []; $overallMean = array_fill(0, count($data[0]), 0.0); foreach ($data as $index => $row) { @@ -156,7 +155,6 @@ class LDA extends EigenTransformerBase return $means; } - /** * Returns in-class scatter matrix for each class, which * is a n by m matrix where n is number of classes and @@ -237,7 +235,7 @@ class LDA extends EigenTransformerBase public function transform(array $sample) { if (!$this->fit) { - throw new \Exception("LDA has not been fitted with respect to original dataset, please run LDA::fit() first"); + throw new \Exception('LDA has not been fitted with respect to original dataset, please run LDA::fit() first'); } if (!is_array($sample[0])) { diff --git a/src/Phpml/DimensionReduction/PCA.php b/src/Phpml/DimensionReduction/PCA.php index acaa8e0..7d3fd4f 100644 --- a/src/Phpml/DimensionReduction/PCA.php +++ b/src/Phpml/DimensionReduction/PCA.php @@ -28,20 +28,20 @@ class PCA extends EigenTransformerBase * within the data. It is a lossy data compression technique.
* * @param float $totalVariance Total explained variance to be preserved - * @param int $numFeatures Number of features to be preserved + * @param int $numFeatures Number of features to be preserved * * @throws \Exception */ public function __construct($totalVariance = null, $numFeatures = null) { if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { - throw new \Exception("Total variance can be a value between 0.1 and 0.99"); + throw new \Exception('Total variance can be a value between 0.1 and 0.99'); } if ($numFeatures !== null && $numFeatures <= 0) { - throw new \Exception("Number of features to be preserved should be greater than 0"); + throw new \Exception('Number of features to be preserved should be greater than 0'); } if ($totalVariance !== null && $numFeatures !== null) { - throw new \Exception("Either totalVariance or numFeatures should be specified in order to run the algorithm"); + throw new \Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm'); } if ($numFeatures !== null) { @@ -79,7 +79,7 @@ class PCA extends EigenTransformerBase /** * @param array $data - * @param int $n + * @param int $n */ protected function calculateMeans(array $data, int $n) { @@ -129,7 +129,7 @@ class PCA extends EigenTransformerBase public function transform(array $sample) { if (!$this->fit) { - throw new \Exception("PCA has not been fitted with respect to original dataset, please run PCA::fit() first"); + throw new \Exception('PCA has not been fitted with respect to original dataset, please run PCA::fit() first'); } if (!is_array($sample[0])) { diff --git a/src/Phpml/Exception/InvalidArgumentException.php b/src/Phpml/Exception/InvalidArgumentException.php index b618d00..313ca79 100644 --- a/src/Phpml/Exception/InvalidArgumentException.php +++ b/src/Phpml/Exception/InvalidArgumentException.php @@ -73,7 +73,7 @@ class InvalidArgumentException extends \Exception */ public static function invalidTarget($target) { - return new self('Target with value ' . $target . ' is not part of the accepted classes'); + return new self('Target with value '.$target.' is not part of the accepted classes'); } /** diff --git a/src/Phpml/FeatureExtraction/TfIdfTransformer.php b/src/Phpml/FeatureExtraction/TfIdfTransformer.php index 9335775..61f7e65 100644 --- a/src/Phpml/FeatureExtraction/TfIdfTransformer.php +++ b/src/Phpml/FeatureExtraction/TfIdfTransformer.php @@ -32,7 +32,7 @@ class TfIdfTransformer implements Transformer $count = count($samples); foreach ($this->idf as &$value) { - $value = log((float)($count / $value), 10.0); + $value = log((float) ($count / $value), 10.0); } } diff --git a/src/Phpml/Helper/OneVsRest.php b/src/Phpml/Helper/OneVsRest.php index 8d71fbc..72757df 100644 --- a/src/Phpml/Helper/OneVsRest.php +++ b/src/Phpml/Helper/OneVsRest.php @@ -109,6 +109,7 @@ trait OneVsRest // multiple instances of this classifier $classifier = clone $this; $classifier->reset(); + return $classifier; } @@ -121,6 +122,7 @@ trait OneVsRest * * @param array $targets * @param mixed $label + * * @return array Binarized targets and target's labels */ private function binarizeTargets($targets, $label) @@ -131,10 +133,10 @@ trait OneVsRest } $labels = [$label, $notLabel]; + return [$targets, $labels]; } - /** * @param array $sample * @@ -153,6 +155,7 @@ trait OneVsRest } arsort($probs, SORT_NUMERIC); + return key($probs); } diff --git a/src/Phpml/Helper/Optimizer/GD.php b/src/Phpml/Helper/Optimizer/GD.php index b88b0c7..8babc7d 100644 --- a/src/Phpml/Helper/Optimizer/GD.php +++ b/src/Phpml/Helper/Optimizer/GD.php @@ -42,7 +42,7 @@ class GD extends StochasticGD $this->updateWeightsWithUpdates($updates, $totalPenalty); - $this->costValues[] = array_sum($errors)/$this->sampleCount; + $this->costValues[] = array_sum($errors) / $this->sampleCount; if ($this->earlyStop($theta)) { break; @@ -65,7 +65,7 @@ class GD extends StochasticGD protected function gradient(array $theta) { $costs = []; - $gradient= []; + $gradient = []; $totalPenalty = 0; foreach ($this->samples as $index => $sample) { diff --git a/src/Phpml/Helper/Optimizer/StochasticGD.php b/src/Phpml/Helper/Optimizer/StochasticGD.php index 82e860a..df29261 100644 --- a/src/Phpml/Helper/Optimizer/StochasticGD.php +++ b/src/Phpml/Helper/Optimizer/StochasticGD.php @@ -72,7 +72,7 @@ class StochasticGD extends Optimizer * * @var array */ - protected $costValues= []; + protected $costValues = []; /** * Initializes the SGD optimizer for the given number of dimensions @@ -151,8 +151,8 @@ class StochasticGD extends Optimizer * The cost function to minimize and the gradient of the function are to be * handled by the callback function provided as the third parameter of the method. * - * @param array $samples - * @param array $targets + * @param array $samples + * @param array $targets * @param \Closure $gradientCb * * @return array diff --git a/src/Phpml/Math/Distance/Minkowski.php b/src/Phpml/Math/Distance/Minkowski.php index 0788193..2af835e 100644 --- a/src/Phpml/Math/Distance/Minkowski.php +++ b/src/Phpml/Math/Distance/Minkowski.php @@ -43,6 +43,6 @@ class Minkowski implements Distance $distance += pow(abs($a[$i] - $b[$i]), $this->lambda); } - return (float)pow($distance, 1 / $this->lambda); + return (float) pow($distance, 1 / $this->lambda); } } diff --git a/src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php b/src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php index 642e8b3..c67673b 100644 --- a/src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php +++ b/src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php @@ -20,10 +20,12 @@ declare(strict_types=1); * * @author Paul Meagher * @license PHP v3.0 + * * @version 1.1 * * Slightly changed to adapt the original code to PHP-ML library * @date 2017/04/11 + * * @author Mustafa Karabulut */ @@ -35,18 +37,21 @@ class EigenvalueDecomposition { /** * Row and column dimension (square matrix). + * * @var int */ private $n; /** * Internal symmetry flag. + * * @var bool */ private $symmetric; /** * Arrays for internal storage of eigenvalues. + * * @var array */ private $d = []; @@ -54,24 +59,28 @@ class EigenvalueDecomposition /** * Array for internal storage of eigenvectors. + * * @var array */ private $V = []; /** * Array for internal storage of nonsymmetric Hessenberg form. + * * @var array */ private $H = []; /** * Working storage for nonsymmetric algorithm. + * * @var array */ private $ort; /** * Used for complex scalar division. + * * @var float */ private $cdivr; @@ -222,7 +231,6 @@ class EigenvalueDecomposition $this->e[0] = 0.0; } - /** * Symmetric tridiagonal QL algorithm. * @@ -239,7 +247,7 @@ class EigenvalueDecomposition $this->e[$this->n - 1] = 0.0; $f = 0.0; $tst1 = 0.0; - $eps = pow(2.0, -52.0); + $eps = pow(2.0, -52.0); for ($l = 0; $l < $this->n; ++$l) { // Find small subdiagonal element @@ -283,9 +291,9 @@ class EigenvalueDecomposition $c3 = $c2; $c2 = $c; $s2 = $s; - $g = $c * $this->e[$i]; - $h = $c * $p; - $r = hypot($p, $this->e[$i]); + $g = $c * $this->e[$i]; + $h = $c * $p; + $r = hypot($p, $this->e[$i]); $this->e[$i + 1] = $s * $r; $s = $this->e[$i] / $r; $c = $p / $r; @@ -295,7 +303,7 @@ class EigenvalueDecomposition for ($k = 0; $k < $this->n; ++$k) { $h = $this->V[$k][$i + 1]; $this->V[$k][$i + 1] = $s * $this->V[$k][$i] + $c * $h; - $this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h; + $this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h; } } $p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1; @@ -330,7 +338,6 @@ class EigenvalueDecomposition } } - /** * Nonsymmetric reduction to Hessenberg form. * @@ -341,7 +348,7 @@ class EigenvalueDecomposition */ private function orthes() { - $low = 0; + $low = 0; $high = $this->n - 1; for ($m = $low + 1; $m <= $high - 1; ++$m) { @@ -451,7 +458,7 @@ class EigenvalueDecomposition { // Initialize $nn = $this->n; - $n = $nn - 1; + $n = $nn - 1; $low = 0; $high = $nn - 1; $eps = pow(2.0, -52.0); @@ -544,9 +551,9 @@ class EigenvalueDecomposition // Complex pair } else { $this->d[$n - 1] = $x + $p; - $this->d[$n] = $x + $p; + $this->d[$n] = $x + $p; $this->e[$n - 1] = $z; - $this->e[$n] = -$z; + $this->e[$n] = -$z; } $n = $n - 2; $iter = 0; @@ -747,10 +754,10 @@ class EigenvalueDecomposition } else { $this->cdiv(0.0, -$this->H[$n - 1][$n], $this->H[$n - 1][$n - 1] - $p, $q); $this->H[$n - 1][$n - 1] = $this->cdivr; - $this->H[$n - 1][$n] = $this->cdivi; + $this->H[$n - 1][$n] = $this->cdivi; } $this->H[$n][$n - 1] = 0.0; - $this->H[$n][$n] = 1.0; + $this->H[$n][$n] = 1.0; for ($i = $n - 2; $i >= 0; --$i) { // double ra,sa,vr,vi; $ra = 0.0; @@ -769,7 +776,7 @@ class EigenvalueDecomposition if ($this->e[$i] == 0) { $this->cdiv(-$ra, -$sa, $w, $q); $this->H[$i][$n - 1] = $this->cdivr; - $this->H[$i][$n] = $this->cdivi; + $this->H[$i][$n] = $this->cdivi; } else { // Solve complex equations $x = $this->H[$i][$i + 1]; @@ -781,14 +788,14 @@ class EigenvalueDecomposition } $this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi); $this->H[$i][$n - 1] = $this->cdivr; - $this->H[$i][$n] = $this->cdivi; + $this->H[$i][$n] = $this->cdivi; if (abs($x) > (abs($z) + abs($q))) { $this->H[$i + 1][$n - 1] = (-$ra - $w * $this->H[$i][$n - 1] + $q * $this->H[$i][$n]) / $x; - $this->H[$i + 1][$n] = (-$sa - $w * $this->H[$i][$n] - $q * $this->H[$i][$n - 1]) / $x; + $this->H[$i + 1][$n] = (-$sa - $w * $this->H[$i][$n] - $q * $this->H[$i][$n - 1]) / $x; } else { $this->cdiv(-$r - $y * $this->H[$i][$n - 1], -$s - $y * $this->H[$i][$n], $z, $q); $this->H[$i + 1][$n - 1] = $this->cdivr; - $this->H[$i + 1][$n] = $this->cdivi; + $this->H[$i + 1][$n] = $this->cdivi; } } // Overflow control @@ -796,7 +803,7 @@ class EigenvalueDecomposition if (($eps * $t) * $t > 1) { for ($j = $i; $j <= $n; ++$j) { $this->H[$j][$n - 1] = $this->H[$j][$n - 1] / $t; - $this->H[$j][$n] = $this->H[$j][$n] / $t; + $this->H[$j][$n] = $this->H[$j][$n] / $t; } } } // end else @@ -823,12 +830,11 @@ class EigenvalueDecomposition $this->V[$i][$j] = $z; } } - } // end hqr2 + } /** * Return the eigenvector matrix * - * @access public * * @return array */ @@ -899,4 +905,4 @@ class EigenvalueDecomposition return $D; } -} // class EigenvalueDecomposition +} diff --git a/src/Phpml/Math/LinearAlgebra/LUDecomposition.php b/src/Phpml/Math/LinearAlgebra/LUDecomposition.php index de6a15d..7a143f1 100644 --- a/src/Phpml/Math/LinearAlgebra/LUDecomposition.php +++ b/src/Phpml/Math/LinearAlgebra/LUDecomposition.php @@ -17,11 +17,14 @@ declare(strict_types=1); * @author Paul Meagher * @author Bartosz Matosiuk * @author Michael Bommarito + * * @version 1.1 + * * @license PHP v3.0 * * Slightly changed to adapt the original code to PHP-ML library * @date 2017/04/24 + * * @author Mustafa Karabulut */ @@ -34,35 +37,39 @@ class LUDecomposition { /** * Decomposition storage + * * @var array */ private $LU = []; /** * Row dimension. + * * @var int */ private $m; /** * Column dimension. + * * @var int */ private $n; /** * Pivot sign. + * * @var int */ private $pivsign; /** * Internal storage of pivot vector. + * * @var array */ private $piv = []; - /** * Constructs Structure to access L, U and piv. * @@ -78,8 +85,8 @@ class LUDecomposition // Use a "left-looking", dot-product, Crout/Doolittle algorithm. $this->LU = $A->toArray(); - $this->m = $A->getRows(); - $this->n = $A->getColumns(); + $this->m = $A->getRows(); + $this->n = $A->getColumns(); for ($i = 0; $i < $this->m; ++$i) { $this->piv[$i] = $i; } @@ -128,8 +135,7 @@ class LUDecomposition } } } - } // function __construct() - + } /** * Get lower triangular factor. @@ -150,9 +156,9 @@ class LUDecomposition } } } - return new Matrix($L); - } // function getL() + return new Matrix($L); + } /** * Get upper triangular factor. @@ -171,9 +177,9 @@ class LUDecomposition } } } - return new Matrix($U); - } // function getU() + return new Matrix($U); + } /** * Return pivot permutation vector. @@ -183,8 +189,7 @@ class LUDecomposition public function getPivot() { return $this->piv; - } // function getPivot() - + } /** * Alias for getPivot @@ -194,8 +199,7 @@ class LUDecomposition public function getDoublePivot() { return $this->getPivot(); - } // function getDoublePivot() - + } /** * Is the matrix nonsingular? @@ -211,8 +215,7 @@ class LUDecomposition } return true; - } // function isNonsingular() - + } /** * Count determinants @@ -233,8 +236,7 @@ class LUDecomposition } return $d; - } // function det() - + } /** * Solve A*X = B @@ -257,7 +259,7 @@ class LUDecomposition // Copy right hand side with pivoting $nx = $B->getColumns(); - $X = $this->getSubMatrix($B->toArray(), $this->piv, 0, $nx - 1); + $X = $this->getSubMatrix($B->toArray(), $this->piv, 0, $nx - 1); // Solve L*Y = B(piv,:) for ($k = 0; $k < $this->n; ++$k) { for ($i = $k + 1; $i < $this->n; ++$i) { @@ -277,8 +279,9 @@ class LUDecomposition } } } + return $X; - } // function solve() + } /** * @param array $matrix @@ -302,4 +305,4 @@ class LUDecomposition return $R; } -} // class LUDecomposition +} diff --git a/src/Phpml/Math/Matrix.php b/src/Phpml/Math/Matrix.php index 3c31052..fd91234 100644 --- a/src/Phpml/Math/Matrix.php +++ b/src/Phpml/Math/Matrix.php @@ -122,7 +122,6 @@ class Matrix return array_column($this->matrix, $column); } - /** * @return float|int * diff --git a/src/Phpml/Math/Statistic/Covariance.php b/src/Phpml/Math/Statistic/Covariance.php index 779b895..e0a239d 100644 --- a/src/Phpml/Math/Statistic/Covariance.php +++ b/src/Phpml/Math/Statistic/Covariance.php @@ -80,7 +80,7 @@ class Covariance } if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) { - throw new \Exception("Given indices i and k do not match with the dimensionality of data"); + throw new \Exception('Given indices i and k do not match with the dimensionality of data'); } if ($meanX === null || $meanY === null) { diff --git a/src/Phpml/Math/Statistic/Gaussian.php b/src/Phpml/Math/Statistic/Gaussian.php index d09edba..ae4c9a6 100644 --- a/src/Phpml/Math/Statistic/Gaussian.php +++ b/src/Phpml/Math/Statistic/Gaussian.php @@ -39,7 +39,8 @@ class Gaussian // Ref: https://en.wikipedia.org/wiki/Normal_distribution $std2 = $this->std ** 2; $mean = $this->mean; - return exp(- (($value - $mean) ** 2) / (2 * $std2)) / sqrt(2 * $std2 * pi()); + + return exp(-(($value - $mean) ** 2) / (2 * $std2)) / sqrt(2 * $std2 * pi()); } /** @@ -55,6 +56,7 @@ class Gaussian public static function distributionPdf(float $mean, float $std, float $value) { $normal = new self($mean, $std); + return $normal->pdf($value); } } diff --git a/src/Phpml/Math/Statistic/Mean.php b/src/Phpml/Math/Statistic/Mean.php index bd9657e..6dd9853 100644 --- a/src/Phpml/Math/Statistic/Mean.php +++ b/src/Phpml/Math/Statistic/Mean.php @@ -34,7 +34,7 @@ class Mean self::checkArrayLength($numbers); $count = count($numbers); - $middleIndex = (int)floor($count / 2); + $middleIndex = (int) floor($count / 2); sort($numbers, SORT_NUMERIC); $median = $numbers[$middleIndex]; diff --git a/src/Phpml/NeuralNetwork/Network/MultilayerPerceptron.php b/src/Phpml/NeuralNetwork/Network/MultilayerPerceptron.php index b7364b2..21510c4 100644 --- a/src/Phpml/NeuralNetwork/Network/MultilayerPerceptron.php +++ b/src/Phpml/NeuralNetwork/Network/MultilayerPerceptron.php @@ -138,6 +138,7 @@ abstract class MultilayerPerceptron extends LayeredNetwork implements Estimator, /** * @param array $sample + * * @return mixed */ abstract protected function predictSample(array $sample); diff --git a/src/Phpml/Preprocessing/Normalizer.php b/src/Phpml/Preprocessing/Normalizer.php index c61b447..fc00030 100644 --- a/src/Phpml/Preprocessing/Normalizer.php +++ b/src/Phpml/Preprocessing/Normalizer.php @@ -12,7 +12,7 @@ class Normalizer implements Preprocessor { const NORM_L1 = 1; const NORM_L2 = 2; - const NORM_STD= 3; + const NORM_STD = 3; /** * @var int @@ -77,7 +77,7 @@ class Normalizer implements Preprocessor $methods = [ self::NORM_L1 => 'normalizeL1', self::NORM_L2 => 'normalizeL2', - self::NORM_STD=> 'normalizeSTD' + self::NORM_STD => 'normalizeSTD' ]; $method = $methods[$this->norm]; @@ -117,7 +117,7 @@ class Normalizer implements Preprocessor foreach ($sample as $feature) { $norm2 += $feature * $feature; } - $norm2 = sqrt((float)$norm2); + $norm2 = sqrt((float) $norm2); if (0 == $norm2) { $sample = array_fill(0, count($sample), 1); diff --git a/tests/Phpml/Classification/DecisionTreeTest.php b/tests/Phpml/Classification/DecisionTreeTest.php index db2d810..3b61166 100644 --- a/tests/Phpml/Classification/DecisionTreeTest.php +++ b/tests/Phpml/Classification/DecisionTreeTest.php @@ -11,20 +11,20 @@ use PHPUnit\Framework\TestCase; class DecisionTreeTest extends TestCase { private $data = [ - ['sunny', 85, 85, 'false', 'Dont_play' ], - ['sunny', 80, 90, 'true', 'Dont_play' ], - ['overcast', 83, 78, 'false', 'Play' ], - ['rain', 70, 96, 'false', 'Play' ], - ['rain', 68, 80, 'false', 'Play' ], - ['rain', 65, 70, 'true', 'Dont_play' ], - ['overcast', 64, 65, 'true', 'Play' ], - ['sunny', 72, 95, 'false', 'Dont_play' ], - ['sunny', 69, 70, 'false', 'Play' ], - ['rain', 75, 80, 'false', 'Play' ], - ['sunny', 75, 70, 'true', 'Play' ], - ['overcast', 72, 90, 'true', 'Play' ], - ['overcast', 81, 75, 'false', 'Play' ], - ['rain', 71, 80, 'true', 'Dont_play' ] + ['sunny', 85, 85, 'false', 'Dont_play'], + ['sunny', 80, 90, 'true', 'Dont_play'], + ['overcast', 83, 78, 'false', 'Play'], + ['rain', 70, 96, 'false', 'Play'], + ['rain', 68, 80, 'false', 'Play'], + ['rain', 65, 70, 'true', 'Dont_play'], + ['overcast', 64, 65, 'true', 'Play'], + ['sunny', 72, 95, 'false', 'Dont_play'], + ['sunny', 69, 70, 'false', 'Play'], + ['rain', 75, 80, 'false', 'Play'], + ['sunny', 75, 70, 'true', 'Play'], + ['overcast', 72, 90, 'true', 'Play'], + ['overcast', 81, 75, 'false', 'Play'], + ['rain', 71, 80, 'true', 'Dont_play'] ]; private $extraData = [ @@ -38,6 +38,7 @@ class DecisionTreeTest extends TestCase array_walk($input, function (&$v) { array_splice($v, 4, 1); }); + return [$input, $targets]; } @@ -54,6 +55,7 @@ class DecisionTreeTest extends TestCase $classifier->train($data, $targets); $this->assertEquals('Dont_play', $classifier->predict(['scorching', 95, 90, 'true'])); $this->assertEquals('Play', $classifier->predict(['overcast', 60, 60, 'false'])); + return $classifier; } diff --git a/tests/Phpml/Classification/Ensemble/BaggingTest.php b/tests/Phpml/Classification/Ensemble/BaggingTest.php index 002697e..a00b176 100644 --- a/tests/Phpml/Classification/Ensemble/BaggingTest.php +++ b/tests/Phpml/Classification/Ensemble/BaggingTest.php @@ -13,25 +13,25 @@ use PHPUnit\Framework\TestCase; class BaggingTest extends TestCase { private $data = [ - ['sunny', 85, 85, 'false', 'Dont_play' ], - ['sunny', 80, 90, 'true', 'Dont_play' ], - ['overcast', 83, 78, 'false', 'Play' ], - ['rain', 70, 96, 'false', 'Play' ], - ['rain', 68, 80, 'false', 'Play' ], - ['rain', 65, 70, 'true', 'Dont_play' ], - ['overcast', 64, 65, 'true', 'Play' ], - ['sunny', 72, 95, 'false', 'Dont_play' ], - ['sunny', 69, 70, 'false', 'Play' ], - ['rain', 75, 80, 'false', 'Play' ], - ['sunny', 75, 70, 'true', 'Play' ], - ['overcast', 72, 90, 'true', 'Play' ], - ['overcast', 81, 75, 'false', 'Play' ], - ['rain', 71, 80, 'true', 'Dont_play' ] + ['sunny', 85, 85, 'false', 'Dont_play'], + ['sunny', 80, 90, 'true', 'Dont_play'], + ['overcast', 83, 78, 'false', 'Play'], + ['rain', 70, 96, 'false', 'Play'], + ['rain', 68, 80, 'false', 'Play'], + ['rain', 65, 70, 'true', 'Dont_play'], + ['overcast', 64, 65, 'true', 'Play'], + ['sunny', 72, 95, 'false', 'Dont_play'], + ['sunny', 69, 70, 'false', 'Play'], + ['rain', 75, 80, 'false', 'Play'], + ['sunny', 75, 70, 'true', 'Play'], + ['overcast', 72, 90, 'true', 'Play'], + ['overcast', 81, 75, 'false', 'Play'], + ['rain', 71, 80, 'true', 'Dont_play'] ]; private $extraData = [ - ['scorching', 90, 95, 'false', 'Dont_play'], - ['scorching', 0, 0, 'false', 'Dont_play'], + ['scorching', 90, 95, 'false', 'Dont_play'], + ['scorching', 0, 0, 'false', 'Dont_play'], ]; public function testPredictSingleSample() @@ -97,6 +97,7 @@ class BaggingTest extends TestCase $classifier = new Bagging($numBaseClassifiers); $classifier->setSubsetRatio(1.0); $classifier->setClassifer(DecisionTree::class, ['depth' => 10]); + return $classifier; } @@ -104,7 +105,7 @@ class BaggingTest extends TestCase { return [ DecisionTree::class => ['depth' => 5], - NaiveBayes::class => [] + NaiveBayes::class => [] ]; } @@ -113,7 +114,7 @@ class BaggingTest extends TestCase // Populating input data to a size large enough // for base classifiers that they can work with a subset of it $populated = []; - for ($i=0; $i<20; $i++) { + for ($i = 0; $i < 20; ++$i) { $populated = array_merge($populated, $input); } shuffle($populated); @@ -121,6 +122,7 @@ class BaggingTest extends TestCase array_walk($populated, function (&$v) { array_splice($v, 4, 1); }); + return [$populated, $targets]; } } diff --git a/tests/Phpml/Classification/Ensemble/RandomForestTest.php b/tests/Phpml/Classification/Ensemble/RandomForestTest.php index be587ef..8468893 100644 --- a/tests/Phpml/Classification/Ensemble/RandomForestTest.php +++ b/tests/Phpml/Classification/Ensemble/RandomForestTest.php @@ -14,12 +14,13 @@ class RandomForestTest extends BaggingTest { $classifier = new RandomForest($numBaseClassifiers); $classifier->setFeatureSubsetRatio('log'); + return $classifier; } protected function getAvailableBaseClassifiers() { - return [ DecisionTree::class => ['depth' => 5] ]; + return [DecisionTree::class => ['depth' => 5]]; } public function testOtherBaseClassifier() diff --git a/tests/Phpml/Classification/MLPClassifierTest.php b/tests/Phpml/Classification/MLPClassifierTest.php index 3a009c3..db30afd 100644 --- a/tests/Phpml/Classification/MLPClassifierTest.php +++ b/tests/Phpml/Classification/MLPClassifierTest.php @@ -180,6 +180,7 @@ class MLPClassifierTest extends TestCase [0, 1, 2] ); } + /** * @expectedException \Phpml\Exception\InvalidArgumentException */ diff --git a/tests/Phpml/Clustering/FuzzyCMeansTest.php b/tests/Phpml/Clustering/FuzzyCMeansTest.php index 85285b2..68cc0db 100644 --- a/tests/Phpml/Clustering/FuzzyCMeansTest.php +++ b/tests/Phpml/Clustering/FuzzyCMeansTest.php @@ -21,6 +21,7 @@ class FuzzyCMeansTest extends TestCase } } $this->assertCount(0, $samples); + return $fcm; } diff --git a/tests/Phpml/DimensionReduction/LDATest.php b/tests/Phpml/DimensionReduction/LDATest.php index 5ebe018..713e205 100644 --- a/tests/Phpml/DimensionReduction/LDATest.php +++ b/tests/Phpml/DimensionReduction/LDATest.php @@ -57,7 +57,7 @@ class LDATest extends TestCase // for each projected row foreach ($data as $i => $row) { $newRow = [$transformed2[$i]]; - $newRow2= $lda->transform($row); + $newRow2 = $lda->transform($row); array_map($check, $newRow, $newRow2); } diff --git a/tests/Phpml/DimensionReduction/PCATest.php b/tests/Phpml/DimensionReduction/PCATest.php index 8f65e98..a4784f9 100644 --- a/tests/Phpml/DimensionReduction/PCATest.php +++ b/tests/Phpml/DimensionReduction/PCATest.php @@ -47,7 +47,7 @@ class PCATest extends TestCase // same dimensionality with the original dataset foreach ($data as $i => $row) { $newRow = [[$transformed[$i]]]; - $newRow2= $pca->transform($row); + $newRow2 = $pca->transform($row); array_map(function ($val1, $val2) use ($epsilon) { $this->assertEquals(abs($val1), abs($val2), '', $epsilon); diff --git a/tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php b/tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php index 4bca1bd..e2c615e 100644 --- a/tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php +++ b/tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php @@ -22,7 +22,7 @@ class EigenDecompositionTest extends TestCase [0.614444444, 0.716555556] ]; $knownEigvalues = [0.0490833989, 1.28402771]; - $knownEigvectors= [[-0.735178656, 0.677873399], [-0.677873399, -0.735178656]]; + $knownEigvectors = [[-0.735178656, 0.677873399], [-0.677873399, -0.735178656]]; $decomp = new EigenvalueDecomposition($matrix); $eigVectors = $decomp->getEigenvectors(); @@ -37,8 +37,8 @@ class EigenDecompositionTest extends TestCase $len = 3; $A = array_fill(0, $len, array_fill(0, $len, 0.0)); srand(intval(microtime(true) * 1000)); - for ($i=0; $i < $len; $i++) { - for ($k=0; $k < $len; $k++) { + for ($i = 0; $i < $len; ++$i) { + for ($k = 0; $k < $len; ++$k) { if ($i > $k) { $A[$i][$k] = $A[$k][$i]; } else { @@ -49,7 +49,7 @@ class EigenDecompositionTest extends TestCase $decomp = new EigenvalueDecomposition($A); $eigValues = $decomp->getRealEigenvalues(); - $eigVectors= $decomp->getEigenvectors(); + $eigVectors = $decomp->getEigenvectors(); foreach ($eigValues as $index => $lambda) { $m1 = new Matrix($A); @@ -57,7 +57,7 @@ class EigenDecompositionTest extends TestCase // A.v=λ.v $leftSide = $m1->multiply($m2)->toArray(); - $rightSide= $m2->multiplyByScalar($lambda)->toArray(); + $rightSide = $m2->multiplyByScalar($lambda)->toArray(); $this->assertEquals($leftSide, $rightSide, '', $epsilon); } diff --git a/tests/Phpml/Math/Statistic/GaussianTest.php b/tests/Phpml/Math/Statistic/GaussianTest.php index 6bbf63b..a0c9700 100644 --- a/tests/Phpml/Math/Statistic/GaussianTest.php +++ b/tests/Phpml/Math/Statistic/GaussianTest.php @@ -12,12 +12,12 @@ class GaussianTest extends TestCase public function testPdf() { $std = 1.0; - $mean= 0.0; + $mean = 0.0; $g = new Gaussian($mean, $std); // Allowable error $delta = 0.001; - $x = [0, 0.1, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]; + $x = [0, 0.1, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]; $pdf = [0.3989, 0.3969, 0.3520, 0.2419, 0.1295, 0.0539, 0.0175, 0.0044]; foreach ($x as $i => $v) { $this->assertEquals($pdf[$i], $g->pdf($v), '', $delta); diff --git a/tests/Phpml/ModelManagerTest.php b/tests/Phpml/ModelManagerTest.php index 066aad1..400b6d1 100644 --- a/tests/Phpml/ModelManagerTest.php +++ b/tests/Phpml/ModelManagerTest.php @@ -13,7 +13,7 @@ class ModelManagerTest extends TestCase public function testSaveAndRestore() { $filename = uniqid(); - $filepath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . $filename; + $filepath = sys_get_temp_dir().DIRECTORY_SEPARATOR.$filename; $estimator = new LeastSquares(); $modelManager = new ModelManager(); @@ -28,7 +28,7 @@ class ModelManagerTest extends TestCase */ public function testRestoreWrongFile() { - $filepath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'unexisting'; + $filepath = sys_get_temp_dir().DIRECTORY_SEPARATOR.'unexisting'; $modelManager = new ModelManager(); $modelManager->restoreFromFile($filepath); } diff --git a/tests/Phpml/Preprocessing/NormalizerTest.php b/tests/Phpml/Preprocessing/NormalizerTest.php index a8a8826..5c8efb1 100644 --- a/tests/Phpml/Preprocessing/NormalizerTest.php +++ b/tests/Phpml/Preprocessing/NormalizerTest.php @@ -106,9 +106,9 @@ class NormalizerTest extends TestCase // Generate 10 random vectors of length 3 $samples = []; srand(time()); - for ($i=0; $i<10; $i++) { + for ($i = 0; $i < 10; ++$i) { $sample = array_fill(0, 3, 0); - for ($k=0; $k<3; $k++) { + for ($k = 0; $k < 3; ++$k) { $sample[$k] = rand(1, 100); } // Last feature's value shared across samples.