From a87859dd971ee14ba42b7d851a36ebd1028c318a Mon Sep 17 00:00:00 2001 From: Mustafa Karabulut Date: Sun, 23 Apr 2017 10:03:30 +0300 Subject: [PATCH] Linear algebra operations, Dimensionality reduction and some other minor changes (#81) * Lineer Algebra operations * Covariance * PCA and KernelPCA * Tests for PCA, Eigenvalues and Covariance * KernelPCA update * KernelPCA and its test * KernelPCA and its test * MatrixTest, KernelPCA and PCA tests * Readme update * Readme update --- README.md | 10 +- src/Phpml/DimensionReduction/KernelPCA.php | 246 +++++ src/Phpml/DimensionReduction/PCA.php | 228 +++++ .../Exception/InvalidArgumentException.php | 2 +- src/Phpml/Math/Distance/Euclidean.php | 13 + .../LinearAlgebra/EigenvalueDecomposition.php | 890 ++++++++++++++++++ src/Phpml/Math/Matrix.php | 127 ++- src/Phpml/Math/Statistic/Covariance.php | 155 +++ .../DimensionReduction/KernelPCATest.php | 51 + tests/Phpml/DimensionReduction/PCATest.php | 57 ++ .../LinearAlgebra/EigenDecompositionTest.php | 65 ++ tests/Phpml/Math/MatrixTest.php | 76 ++ tests/Phpml/Math/Statistic/CovarianceTest.php | 62 ++ 13 files changed, 1965 insertions(+), 17 deletions(-) create mode 100644 src/Phpml/DimensionReduction/KernelPCA.php create mode 100644 src/Phpml/DimensionReduction/PCA.php create mode 100644 src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php create mode 100644 src/Phpml/Math/Statistic/Covariance.php create mode 100644 tests/Phpml/DimensionReduction/KernelPCATest.php create mode 100644 tests/Phpml/DimensionReduction/PCATest.php create mode 100644 tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php create mode 100644 tests/Phpml/Math/Statistic/CovarianceTest.php diff --git a/README.md b/README.md index c362a2c..bab758c 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ $labels = ['a', 'a', 'a', 'b', 'b', 'b']; $classifier = new KNearestNeighbors(); $classifier->train($samples, $labels); -$classifier->predict([3, 2]); +$classifier->predict([3, 2]); // return 'b' ``` @@ -61,12 +61,14 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples]( * Adaline * Decision Stump * Perceptron + * LogisticRegression * Regression * [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/) * [SVR](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/svr/) * Clustering * [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means/) * [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan/) + * Fuzzy C-Means * Metric * [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/) * [Confusion Matrix](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/confusion-matrix/) @@ -85,6 +87,9 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples]( * Feature Extraction * [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/) * [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/) +* Dimensionality Reduction + * PCA + * Kernel PCA * Datasets * [Array](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/array-dataset/) * [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/) @@ -100,7 +105,8 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples]( * [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/) * [Set](http://php-ml.readthedocs.io/en/latest/math/set/) * [Statistic](http://php-ml.readthedocs.io/en/latest/math/statistic/) - + * Linear Algebra + ## Contribute - [Issue Tracker: github.com/php-ai/php-ml](https://github.com/php-ai/php-ml/issues) diff --git a/src/Phpml/DimensionReduction/KernelPCA.php b/src/Phpml/DimensionReduction/KernelPCA.php new file mode 100644 index 0000000..86070c7 --- /dev/null +++ b/src/Phpml/DimensionReduction/KernelPCA.php @@ -0,0 +1,246 @@ +
+ * Example: $kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0); + * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0.
+ * This transformation will return the same number of rows with only 2 columns. + * + * @param int $kernel + * @param float $totalVariance Total variance to be preserved if numFeatures is not given + * @param int $numFeatures Number of columns to be returned + * @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels + * + * @throws \Exception + */ + public function __construct(int $kernel = self::KERNEL_RBF, $totalVariance = null, $numFeatures = null, $gamma = null) + { + $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR]; + if (! in_array($kernel, $availableKernels)) { + throw new \Exception("KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian"); + } + + parent::__construct($totalVariance, $numFeatures); + + $this->kernel = $kernel; + $this->gamma = $gamma; + } + + /** + * Takes a data and returns a lower dimensional version + * of this data while preserving $totalVariance or $numFeatures.
+ * $data is an n-by-m matrix and returned array is + * n-by-k matrix where k <= m + * + * @param array $data + * + * @return array + */ + public function fit(array $data) + { + $numRows = count($data); + $this->data = $data; + + if ($this->gamma === null) { + $this->gamma = 1.0 / $numRows; + } + + $matrix = $this->calculateKernelMatrix($this->data, $numRows); + $matrix = $this->centerMatrix($matrix, $numRows); + + list($this->eigValues, $this->eigVectors) = $this->eigenDecomposition($matrix, $numRows); + + $this->fit = true; + + return Matrix::transposeArray($this->eigVectors); + } + + /** + * Calculates similarity matrix by use of selected kernel function
+ * An n-by-m matrix is given and an n-by-n matrix is returned + * + * @param array $data + * @param int $numRows + * + * @return array + */ + protected function calculateKernelMatrix(array $data, int $numRows) + { + $kernelFunc = $this->getKernel(); + + $matrix = []; + for ($i=0; $i < $numRows; $i++) { + for ($k=0; $k < $numRows; $k++) { + if ($i <= $k) { + $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]); + } else { + $matrix[$i][$k] = $matrix[$k][$i]; + } + } + } + + return $matrix; + } + + /** + * Kernel matrix is centered in its original space by using the following + * conversion: + * + * K′ = K − N.K − K.N + N.K.N where N is n-by-n matrix filled with 1/n + * + * @param array $matrix + * @param int $n + */ + protected function centerMatrix(array $matrix, int $n) + { + $N = array_fill(0, $n, array_fill(0, $n, 1.0/$n)); + $N = new Matrix($N, false); + $K = new Matrix($matrix, false); + + // K.N (This term is repeated so we cache it once) + $K_N = $K->multiply($N); + // N.K + $N_K = $N->multiply($K); + // N.K.N + $N_K_N = $N->multiply($K_N); + + return $K->subtract($N_K) + ->subtract($K_N) + ->add($N_K_N) + ->toArray(); + } + + /** + * Returns the callable kernel function + * + * @return \Closure + */ + protected function getKernel() + { + switch ($this->kernel) { + case self::KERNEL_LINEAR: + // k(x,y) = xT.y + return function ($x, $y) { + return Matrix::dot($x, $y)[0]; + }; + case self::KERNEL_RBF: + // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance + $dist = new Euclidean(); + return function ($x, $y) use ($dist) { + return exp(-$this->gamma * $dist->sqDistance($x, $y)); + }; + + case self::KERNEL_SIGMOID: + // k(x,y)=tanh(γ.xT.y+c0) where c0=1 + return function ($x, $y) { + $res = Matrix::dot($x, $y)[0] + 1.0; + return tanh($this->gamma * $res); + }; + + case self::KERNEL_LAPLACIAN: + // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance + $dist = new Manhattan(); + return function ($x, $y) use ($dist) { + return exp(-$this->gamma * $dist->distance($x, $y)); + }; + } + } + + /** + * @param array $sample + * + * @return array + */ + protected function getDistancePairs(array $sample) + { + $kernel = $this->getKernel(); + + $pairs = []; + foreach ($this->data as $row) { + $pairs[] = $kernel($row, $sample); + } + + return $pairs; + } + + /** + * @param array $pairs + * + * @return array + */ + protected function projectSample(array $pairs) + { + // Normalize eigenvectors by eig = eigVectors / eigValues + $func = function ($eigVal, $eigVect) { + $m = new Matrix($eigVect, false); + $a = $m->divideByScalar($eigVal)->toArray(); + + return $a[0]; + }; + $eig = array_map($func, $this->eigValues, $this->eigVectors); + + // return k.dot(eig) + return Matrix::dot($pairs, $eig); + } + + /** + * Transforms the given sample to a lower dimensional vector by using + * the variables obtained during the last run of fit. + * + * @param array $sample + * + * @return array + */ + public function transform(array $sample) + { + if (!$this->fit) { + throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first"); + } + + if (is_array($sample[0])) { + throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays"); + } + + $pairs = $this->getDistancePairs($sample); + + return $this->projectSample($pairs); + } +} diff --git a/src/Phpml/DimensionReduction/PCA.php b/src/Phpml/DimensionReduction/PCA.php new file mode 100644 index 0000000..422dae4 --- /dev/null +++ b/src/Phpml/DimensionReduction/PCA.php @@ -0,0 +1,228 @@ + + * + * @param float $totalVariance Total explained variance to be preserved + * @param int $numFeatures Number of features to be preserved + * + * @throws \Exception + */ + public function __construct($totalVariance = null, $numFeatures = null) + { + if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { + throw new \Exception("Total variance can be a value between 0.1 and 0.99"); + } + if ($numFeatures !== null && $numFeatures <= 0) { + throw new \Exception("Number of features to be preserved should be greater than 0"); + } + if ($totalVariance !== null && $numFeatures !== null) { + throw new \Exception("Either totalVariance or numFeatures should be specified in order to run the algorithm"); + } + + if ($numFeatures !== null) { + $this->numFeatures = $numFeatures; + } + if ($totalVariance !== null) { + $this->totalVariance = $totalVariance; + } + } + + /** + * Takes a data and returns a lower dimensional version + * of this data while preserving $totalVariance or $numFeatures.
+ * $data is an n-by-m matrix and returned array is + * n-by-k matrix where k <= m + * + * @param array $data + * + * @return array + */ + public function fit(array $data) + { + $n = count($data[0]); + + $data = $this->normalize($data, $n); + + $covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0)); + + list($this->eigValues, $this->eigVectors) = $this->eigenDecomposition($covMatrix, $n); + + $this->fit = true; + + return $this->reduce($data); + } + + /** + * @param array $data + * @param int $n + */ + protected function calculateMeans(array $data, int $n) + { + // Calculate means for each dimension + $this->means = []; + for ($i=0; $i < $n; $i++) { + $column = array_column($data, $i); + $this->means[] = Mean::arithmetic($column); + } + } + + /** + * Normalization of the data includes subtracting mean from + * each dimension therefore dimensions will be centered to zero + * + * @param array $data + * @param int $n + * + * @return array + */ + protected function normalize(array $data, int $n) + { + if (empty($this->means)) { + $this->calculateMeans($data, $n); + } + + // Normalize data + foreach ($data as $i => $row) { + for ($k=0; $k < $n; $k++) { + $data[$i][$k] -= $this->means[$k]; + } + } + + return $data; + } + + /** + * Calculates eigenValues and eigenVectors of the given matrix. Returns + * top eigenVectors along with the largest eigenValues. The total explained variance + * of these eigenVectors will be no less than desired $totalVariance value + * + * @param array $matrix + * @param int $n + * + * @return array + */ + protected function eigenDecomposition(array $matrix, int $n) + { + $eig = new EigenvalueDecomposition($matrix); + $eigVals = $eig->getRealEigenvalues(); + $eigVects= $eig->getEigenvectors(); + + $totalEigVal = array_sum($eigVals); + // Sort eigenvalues in descending order + arsort($eigVals); + + $explainedVar = 0.0; + $vectors = []; + $values = []; + foreach ($eigVals as $i => $eigVal) { + $explainedVar += $eigVal / $totalEigVal; + $vectors[] = $eigVects[$i]; + $values[] = $eigVal; + + if ($this->numFeatures !== null) { + if (count($vectors) == $this->numFeatures) { + break; + } + } else { + if ($explainedVar >= $this->totalVariance) { + break; + } + } + } + + return [$values, $vectors]; + } + + /** + * Returns the reduced data + * + * @param array $data + * + * @return array + */ + protected function reduce(array $data) + { + $m1 = new Matrix($data); + $m2 = new Matrix($this->eigVectors); + + return $m1->multiply($m2->transpose())->toArray(); + } + + /** + * Transforms the given sample to a lower dimensional vector by using + * the eigenVectors obtained in the last run of fit. + * + * @param array $sample + * + * @return array + */ + public function transform(array $sample) + { + if (!$this->fit) { + throw new \Exception("PCA has not been fitted with respect to original dataset, please run PCA::fit() first"); + } + + if (! is_array($sample[0])) { + $sample = [$sample]; + } + + $sample = $this->normalize($sample, count($sample[0])); + + return $this->reduce($sample); + } +} diff --git a/src/Phpml/Exception/InvalidArgumentException.php b/src/Phpml/Exception/InvalidArgumentException.php index 42063f1..f6b0031 100644 --- a/src/Phpml/Exception/InvalidArgumentException.php +++ b/src/Phpml/Exception/InvalidArgumentException.php @@ -55,7 +55,7 @@ class InvalidArgumentException extends \Exception */ public static function inconsistentMatrixSupplied() { - return new self('Inconsistent matrix aupplied'); + return new self('Inconsistent matrix applied'); } /** diff --git a/src/Phpml/Math/Distance/Euclidean.php b/src/Phpml/Math/Distance/Euclidean.php index ad60e87..1158f5d 100644 --- a/src/Phpml/Math/Distance/Euclidean.php +++ b/src/Phpml/Math/Distance/Euclidean.php @@ -31,4 +31,17 @@ class Euclidean implements Distance return sqrt((float) $distance); } + + /** + * Square of Euclidean distance + * + * @param array $a + * @param array $b + * + * @return float + */ + public function sqDistance(array $a, array $b): float + { + return $this->distance($a, $b) ** 2; + } } diff --git a/src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php b/src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php new file mode 100644 index 0000000..27557bb --- /dev/null +++ b/src/Phpml/Math/LinearAlgebra/EigenvalueDecomposition.php @@ -0,0 +1,890 @@ +d = $this->V[$this->n-1]; + // Householder reduction to tridiagonal form. + for ($i = $this->n-1; $i > 0; --$i) { + $i_ = $i -1; + // Scale to avoid under/overflow. + $h = $scale = 0.0; + $scale += array_sum(array_map('abs', $this->d)); + if ($scale == 0.0) { + $this->e[$i] = $this->d[$i_]; + $this->d = array_slice($this->V[$i_], 0, $i_); + for ($j = 0; $j < $i; ++$j) { + $this->V[$j][$i] = $this->V[$i][$j] = 0.0; + } + } else { + // Generate Householder vector. + for ($k = 0; $k < $i; ++$k) { + $this->d[$k] /= $scale; + $h += pow($this->d[$k], 2); + } + $f = $this->d[$i_]; + $g = sqrt($h); + if ($f > 0) { + $g = -$g; + } + $this->e[$i] = $scale * $g; + $h = $h - $f * $g; + $this->d[$i_] = $f - $g; + for ($j = 0; $j < $i; ++$j) { + $this->e[$j] = 0.0; + } + // Apply similarity transformation to remaining columns. + for ($j = 0; $j < $i; ++$j) { + $f = $this->d[$j]; + $this->V[$j][$i] = $f; + $g = $this->e[$j] + $this->V[$j][$j] * $f; + for ($k = $j+1; $k <= $i_; ++$k) { + $g += $this->V[$k][$j] * $this->d[$k]; + $this->e[$k] += $this->V[$k][$j] * $f; + } + $this->e[$j] = $g; + } + $f = 0.0; + for ($j = 0; $j < $i; ++$j) { + if ($h === 0) { + $h = 1e-20; + } + $this->e[$j] /= $h; + $f += $this->e[$j] * $this->d[$j]; + } + $hh = $f / (2 * $h); + for ($j=0; $j < $i; ++$j) { + $this->e[$j] -= $hh * $this->d[$j]; + } + for ($j = 0; $j < $i; ++$j) { + $f = $this->d[$j]; + $g = $this->e[$j]; + for ($k = $j; $k <= $i_; ++$k) { + $this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]); + } + $this->d[$j] = $this->V[$i-1][$j]; + $this->V[$i][$j] = 0.0; + } + } + $this->d[$i] = $h; + } + + // Accumulate transformations. + for ($i = 0; $i < $this->n-1; ++$i) { + $this->V[$this->n-1][$i] = $this->V[$i][$i]; + $this->V[$i][$i] = 1.0; + $h = $this->d[$i+1]; + if ($h != 0.0) { + for ($k = 0; $k <= $i; ++$k) { + $this->d[$k] = $this->V[$k][$i+1] / $h; + } + for ($j = 0; $j <= $i; ++$j) { + $g = 0.0; + for ($k = 0; $k <= $i; ++$k) { + $g += $this->V[$k][$i+1] * $this->V[$k][$j]; + } + for ($k = 0; $k <= $i; ++$k) { + $this->V[$k][$j] -= $g * $this->d[$k]; + } + } + } + for ($k = 0; $k <= $i; ++$k) { + $this->V[$k][$i+1] = 0.0; + } + } + + $this->d = $this->V[$this->n-1]; + $this->V[$this->n-1] = array_fill(0, $j, 0.0); + $this->V[$this->n-1][$this->n-1] = 1.0; + $this->e[0] = 0.0; + } + + + /** + * Symmetric tridiagonal QL algorithm. + * + * This is derived from the Algol procedures tql2, by + * Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + * Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + * Fortran subroutine in EISPACK. + */ + private function tql2() + { + for ($i = 1; $i < $this->n; ++$i) { + $this->e[$i-1] = $this->e[$i]; + } + $this->e[$this->n-1] = 0.0; + $f = 0.0; + $tst1 = 0.0; + $eps = pow(2.0, -52.0); + + for ($l = 0; $l < $this->n; ++$l) { + // Find small subdiagonal element + $tst1 = max($tst1, abs($this->d[$l]) + abs($this->e[$l])); + $m = $l; + while ($m < $this->n) { + if (abs($this->e[$m]) <= $eps * $tst1) { + break; + } + ++$m; + } + // If m == l, $this->d[l] is an eigenvalue, + // otherwise, iterate. + if ($m > $l) { + $iter = 0; + do { + // Could check iteration count here. + $iter += 1; + // Compute implicit shift + $g = $this->d[$l]; + $p = ($this->d[$l+1] - $g) / (2.0 * $this->e[$l]); + $r = hypot($p, 1.0); + if ($p < 0) { + $r *= -1; + } + $this->d[$l] = $this->e[$l] / ($p + $r); + $this->d[$l+1] = $this->e[$l] * ($p + $r); + $dl1 = $this->d[$l+1]; + $h = $g - $this->d[$l]; + for ($i = $l + 2; $i < $this->n; ++$i) { + $this->d[$i] -= $h; + } + $f += $h; + // Implicit QL transformation. + $p = $this->d[$m]; + $c = 1.0; + $c2 = $c3 = $c; + $el1 = $this->e[$l + 1]; + $s = $s2 = 0.0; + for ($i = $m-1; $i >= $l; --$i) { + $c3 = $c2; + $c2 = $c; + $s2 = $s; + $g = $c * $this->e[$i]; + $h = $c * $p; + $r = hypot($p, $this->e[$i]); + $this->e[$i+1] = $s * $r; + $s = $this->e[$i] / $r; + $c = $p / $r; + $p = $c * $this->d[$i] - $s * $g; + $this->d[$i+1] = $h + $s * ($c * $g + $s * $this->d[$i]); + // Accumulate transformation. + for ($k = 0; $k < $this->n; ++$k) { + $h = $this->V[$k][$i+1]; + $this->V[$k][$i+1] = $s * $this->V[$k][$i] + $c * $h; + $this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h; + } + } + $p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1; + $this->e[$l] = $s * $p; + $this->d[$l] = $c * $p; + // Check for convergence. + } while (abs($this->e[$l]) > $eps * $tst1); + } + $this->d[$l] = $this->d[$l] + $f; + $this->e[$l] = 0.0; + } + + // Sort eigenvalues and corresponding vectors. + for ($i = 0; $i < $this->n - 1; ++$i) { + $k = $i; + $p = $this->d[$i]; + for ($j = $i+1; $j < $this->n; ++$j) { + if ($this->d[$j] < $p) { + $k = $j; + $p = $this->d[$j]; + } + } + if ($k != $i) { + $this->d[$k] = $this->d[$i]; + $this->d[$i] = $p; + for ($j = 0; $j < $this->n; ++$j) { + $p = $this->V[$j][$i]; + $this->V[$j][$i] = $this->V[$j][$k]; + $this->V[$j][$k] = $p; + } + } + } + } + + + /** + * Nonsymmetric reduction to Hessenberg form. + * + * This is derived from the Algol procedures orthes and ortran, + * by Martin and Wilkinson, Handbook for Auto. Comp., + * Vol.ii-Linear Algebra, and the corresponding + * Fortran subroutines in EISPACK. + */ + private function orthes() + { + $low = 0; + $high = $this->n-1; + + for ($m = $low+1; $m <= $high-1; ++$m) { + // Scale column. + $scale = 0.0; + for ($i = $m; $i <= $high; ++$i) { + $scale = $scale + abs($this->H[$i][$m-1]); + } + if ($scale != 0.0) { + // Compute Householder transformation. + $h = 0.0; + for ($i = $high; $i >= $m; --$i) { + $this->ort[$i] = $this->H[$i][$m-1] / $scale; + $h += $this->ort[$i] * $this->ort[$i]; + } + $g = sqrt($h); + if ($this->ort[$m] > 0) { + $g *= -1; + } + $h -= $this->ort[$m] * $g; + $this->ort[$m] -= $g; + // Apply Householder similarity transformation + // H = (I -u * u' / h) * H * (I -u * u') / h) + for ($j = $m; $j < $this->n; ++$j) { + $f = 0.0; + for ($i = $high; $i >= $m; --$i) { + $f += $this->ort[$i] * $this->H[$i][$j]; + } + $f /= $h; + for ($i = $m; $i <= $high; ++$i) { + $this->H[$i][$j] -= $f * $this->ort[$i]; + } + } + for ($i = 0; $i <= $high; ++$i) { + $f = 0.0; + for ($j = $high; $j >= $m; --$j) { + $f += $this->ort[$j] * $this->H[$i][$j]; + } + $f = $f / $h; + for ($j = $m; $j <= $high; ++$j) { + $this->H[$i][$j] -= $f * $this->ort[$j]; + } + } + $this->ort[$m] = $scale * $this->ort[$m]; + $this->H[$m][$m-1] = $scale * $g; + } + } + + // Accumulate transformations (Algol's ortran). + for ($i = 0; $i < $this->n; ++$i) { + for ($j = 0; $j < $this->n; ++$j) { + $this->V[$i][$j] = ($i == $j ? 1.0 : 0.0); + } + } + for ($m = $high-1; $m >= $low+1; --$m) { + if ($this->H[$m][$m-1] != 0.0) { + for ($i = $m+1; $i <= $high; ++$i) { + $this->ort[$i] = $this->H[$i][$m-1]; + } + for ($j = $m; $j <= $high; ++$j) { + $g = 0.0; + for ($i = $m; $i <= $high; ++$i) { + $g += $this->ort[$i] * $this->V[$i][$j]; + } + // Double division avoids possible underflow + $g = ($g / $this->ort[$m]) / $this->H[$m][$m-1]; + for ($i = $m; $i <= $high; ++$i) { + $this->V[$i][$j] += $g * $this->ort[$i]; + } + } + } + } + } + + + /** + * Performs complex division. + */ + private function cdiv($xr, $xi, $yr, $yi) + { + if (abs($yr) > abs($yi)) { + $r = $yi / $yr; + $d = $yr + $r * $yi; + $this->cdivr = ($xr + $r * $xi) / $d; + $this->cdivi = ($xi - $r * $xr) / $d; + } else { + $r = $yr / $yi; + $d = $yi + $r * $yr; + $this->cdivr = ($r * $xr + $xi) / $d; + $this->cdivi = ($r * $xi - $xr) / $d; + } + } + + + /** + * Nonsymmetric reduction from Hessenberg to real Schur form. + * + * Code is derived from the Algol procedure hqr2, + * by Martin and Wilkinson, Handbook for Auto. Comp., + * Vol.ii-Linear Algebra, and the corresponding + * Fortran subroutine in EISPACK. + */ + private function hqr2() + { + // Initialize + $nn = $this->n; + $n = $nn - 1; + $low = 0; + $high = $nn - 1; + $eps = pow(2.0, -52.0); + $exshift = 0.0; + $p = $q = $r = $s = $z = 0; + // Store roots isolated by balanc and compute matrix norm + $norm = 0.0; + + for ($i = 0; $i < $nn; ++$i) { + if (($i < $low) or ($i > $high)) { + $this->d[$i] = $this->H[$i][$i]; + $this->e[$i] = 0.0; + } + for ($j = max($i-1, 0); $j < $nn; ++$j) { + $norm = $norm + abs($this->H[$i][$j]); + } + } + + // Outer loop over eigenvalue index + $iter = 0; + while ($n >= $low) { + // Look for single small sub-diagonal element + $l = $n; + while ($l > $low) { + $s = abs($this->H[$l-1][$l-1]) + abs($this->H[$l][$l]); + if ($s == 0.0) { + $s = $norm; + } + if (abs($this->H[$l][$l-1]) < $eps * $s) { + break; + } + --$l; + } + // Check for convergence + // One root found + if ($l == $n) { + $this->H[$n][$n] = $this->H[$n][$n] + $exshift; + $this->d[$n] = $this->H[$n][$n]; + $this->e[$n] = 0.0; + --$n; + $iter = 0; + // Two roots found + } elseif ($l == $n-1) { + $w = $this->H[$n][$n-1] * $this->H[$n-1][$n]; + $p = ($this->H[$n-1][$n-1] - $this->H[$n][$n]) / 2.0; + $q = $p * $p + $w; + $z = sqrt(abs($q)); + $this->H[$n][$n] = $this->H[$n][$n] + $exshift; + $this->H[$n-1][$n-1] = $this->H[$n-1][$n-1] + $exshift; + $x = $this->H[$n][$n]; + // Real pair + if ($q >= 0) { + if ($p >= 0) { + $z = $p + $z; + } else { + $z = $p - $z; + } + $this->d[$n-1] = $x + $z; + $this->d[$n] = $this->d[$n-1]; + if ($z != 0.0) { + $this->d[$n] = $x - $w / $z; + } + $this->e[$n-1] = 0.0; + $this->e[$n] = 0.0; + $x = $this->H[$n][$n-1]; + $s = abs($x) + abs($z); + $p = $x / $s; + $q = $z / $s; + $r = sqrt($p * $p + $q * $q); + $p = $p / $r; + $q = $q / $r; + // Row modification + for ($j = $n-1; $j < $nn; ++$j) { + $z = $this->H[$n-1][$j]; + $this->H[$n-1][$j] = $q * $z + $p * $this->H[$n][$j]; + $this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z; + } + // Column modification + for ($i = 0; $i <= $n; ++$i) { + $z = $this->H[$i][$n-1]; + $this->H[$i][$n-1] = $q * $z + $p * $this->H[$i][$n]; + $this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z; + } + // Accumulate transformations + for ($i = $low; $i <= $high; ++$i) { + $z = $this->V[$i][$n-1]; + $this->V[$i][$n-1] = $q * $z + $p * $this->V[$i][$n]; + $this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z; + } + // Complex pair + } else { + $this->d[$n-1] = $x + $p; + $this->d[$n] = $x + $p; + $this->e[$n-1] = $z; + $this->e[$n] = -$z; + } + $n = $n - 2; + $iter = 0; + // No convergence yet + } else { + // Form shift + $x = $this->H[$n][$n]; + $y = 0.0; + $w = 0.0; + if ($l < $n) { + $y = $this->H[$n-1][$n-1]; + $w = $this->H[$n][$n-1] * $this->H[$n-1][$n]; + } + // Wilkinson's original ad hoc shift + if ($iter == 10) { + $exshift += $x; + for ($i = $low; $i <= $n; ++$i) { + $this->H[$i][$i] -= $x; + } + $s = abs($this->H[$n][$n-1]) + abs($this->H[$n-1][$n-2]); + $x = $y = 0.75 * $s; + $w = -0.4375 * $s * $s; + } + // MATLAB's new ad hoc shift + if ($iter == 30) { + $s = ($y - $x) / 2.0; + $s = $s * $s + $w; + if ($s > 0) { + $s = sqrt($s); + if ($y < $x) { + $s = -$s; + } + $s = $x - $w / (($y - $x) / 2.0 + $s); + for ($i = $low; $i <= $n; ++$i) { + $this->H[$i][$i] -= $s; + } + $exshift += $s; + $x = $y = $w = 0.964; + } + } + // Could check iteration count here. + $iter = $iter + 1; + // Look for two consecutive small sub-diagonal elements + $m = $n - 2; + while ($m >= $l) { + $z = $this->H[$m][$m]; + $r = $x - $z; + $s = $y - $z; + $p = ($r * $s - $w) / $this->H[$m+1][$m] + $this->H[$m][$m+1]; + $q = $this->H[$m+1][$m+1] - $z - $r - $s; + $r = $this->H[$m+2][$m+1]; + $s = abs($p) + abs($q) + abs($r); + $p = $p / $s; + $q = $q / $s; + $r = $r / $s; + if ($m == $l) { + break; + } + if (abs($this->H[$m][$m-1]) * (abs($q) + abs($r)) < + $eps * (abs($p) * (abs($this->H[$m-1][$m-1]) + abs($z) + abs($this->H[$m+1][$m+1])))) { + break; + } + --$m; + } + for ($i = $m + 2; $i <= $n; ++$i) { + $this->H[$i][$i-2] = 0.0; + if ($i > $m+2) { + $this->H[$i][$i-3] = 0.0; + } + } + // Double QR step involving rows l:n and columns m:n + for ($k = $m; $k <= $n-1; ++$k) { + $notlast = ($k != $n-1); + if ($k != $m) { + $p = $this->H[$k][$k-1]; + $q = $this->H[$k+1][$k-1]; + $r = ($notlast ? $this->H[$k+2][$k-1] : 0.0); + $x = abs($p) + abs($q) + abs($r); + if ($x != 0.0) { + $p = $p / $x; + $q = $q / $x; + $r = $r / $x; + } + } + if ($x == 0.0) { + break; + } + $s = sqrt($p * $p + $q * $q + $r * $r); + if ($p < 0) { + $s = -$s; + } + if ($s != 0) { + if ($k != $m) { + $this->H[$k][$k-1] = -$s * $x; + } elseif ($l != $m) { + $this->H[$k][$k-1] = -$this->H[$k][$k-1]; + } + $p = $p + $s; + $x = $p / $s; + $y = $q / $s; + $z = $r / $s; + $q = $q / $p; + $r = $r / $p; + // Row modification + for ($j = $k; $j < $nn; ++$j) { + $p = $this->H[$k][$j] + $q * $this->H[$k+1][$j]; + if ($notlast) { + $p = $p + $r * $this->H[$k+2][$j]; + $this->H[$k+2][$j] = $this->H[$k+2][$j] - $p * $z; + } + $this->H[$k][$j] = $this->H[$k][$j] - $p * $x; + $this->H[$k+1][$j] = $this->H[$k+1][$j] - $p * $y; + } + // Column modification + for ($i = 0; $i <= min($n, $k+3); ++$i) { + $p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k+1]; + if ($notlast) { + $p = $p + $z * $this->H[$i][$k+2]; + $this->H[$i][$k+2] = $this->H[$i][$k+2] - $p * $r; + } + $this->H[$i][$k] = $this->H[$i][$k] - $p; + $this->H[$i][$k+1] = $this->H[$i][$k+1] - $p * $q; + } + // Accumulate transformations + for ($i = $low; $i <= $high; ++$i) { + $p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k+1]; + if ($notlast) { + $p = $p + $z * $this->V[$i][$k+2]; + $this->V[$i][$k+2] = $this->V[$i][$k+2] - $p * $r; + } + $this->V[$i][$k] = $this->V[$i][$k] - $p; + $this->V[$i][$k+1] = $this->V[$i][$k+1] - $p * $q; + } + } // ($s != 0) + } // k loop + } // check convergence + } // while ($n >= $low) + + // Backsubstitute to find vectors of upper triangular form + if ($norm == 0.0) { + return; + } + + for ($n = $nn-1; $n >= 0; --$n) { + $p = $this->d[$n]; + $q = $this->e[$n]; + // Real vector + if ($q == 0) { + $l = $n; + $this->H[$n][$n] = 1.0; + for ($i = $n-1; $i >= 0; --$i) { + $w = $this->H[$i][$i] - $p; + $r = 0.0; + for ($j = $l; $j <= $n; ++$j) { + $r = $r + $this->H[$i][$j] * $this->H[$j][$n]; + } + if ($this->e[$i] < 0.0) { + $z = $w; + $s = $r; + } else { + $l = $i; + if ($this->e[$i] == 0.0) { + if ($w != 0.0) { + $this->H[$i][$n] = -$r / $w; + } else { + $this->H[$i][$n] = -$r / ($eps * $norm); + } + // Solve real equations + } else { + $x = $this->H[$i][$i+1]; + $y = $this->H[$i+1][$i]; + $q = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i]; + $t = ($x * $s - $z * $r) / $q; + $this->H[$i][$n] = $t; + if (abs($x) > abs($z)) { + $this->H[$i+1][$n] = (-$r - $w * $t) / $x; + } else { + $this->H[$i+1][$n] = (-$s - $y * $t) / $z; + } + } + // Overflow control + $t = abs($this->H[$i][$n]); + if (($eps * $t) * $t > 1) { + for ($j = $i; $j <= $n; ++$j) { + $this->H[$j][$n] = $this->H[$j][$n] / $t; + } + } + } + } + // Complex vector + } elseif ($q < 0) { + $l = $n-1; + // Last vector component imaginary so matrix is triangular + if (abs($this->H[$n][$n-1]) > abs($this->H[$n-1][$n])) { + $this->H[$n-1][$n-1] = $q / $this->H[$n][$n-1]; + $this->H[$n-1][$n] = -($this->H[$n][$n] - $p) / $this->H[$n][$n-1]; + } else { + $this->cdiv(0.0, -$this->H[$n-1][$n], $this->H[$n-1][$n-1] - $p, $q); + $this->H[$n-1][$n-1] = $this->cdivr; + $this->H[$n-1][$n] = $this->cdivi; + } + $this->H[$n][$n-1] = 0.0; + $this->H[$n][$n] = 1.0; + for ($i = $n-2; $i >= 0; --$i) { + // double ra,sa,vr,vi; + $ra = 0.0; + $sa = 0.0; + for ($j = $l; $j <= $n; ++$j) { + $ra = $ra + $this->H[$i][$j] * $this->H[$j][$n-1]; + $sa = $sa + $this->H[$i][$j] * $this->H[$j][$n]; + } + $w = $this->H[$i][$i] - $p; + if ($this->e[$i] < 0.0) { + $z = $w; + $r = $ra; + $s = $sa; + } else { + $l = $i; + if ($this->e[$i] == 0) { + $this->cdiv(-$ra, -$sa, $w, $q); + $this->H[$i][$n-1] = $this->cdivr; + $this->H[$i][$n] = $this->cdivi; + } else { + // Solve complex equations + $x = $this->H[$i][$i+1]; + $y = $this->H[$i+1][$i]; + $vr = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i] - $q * $q; + $vi = ($this->d[$i] - $p) * 2.0 * $q; + if ($vr == 0.0 & $vi == 0.0) { + $vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z)); + } + $this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi); + $this->H[$i][$n-1] = $this->cdivr; + $this->H[$i][$n] = $this->cdivi; + if (abs($x) > (abs($z) + abs($q))) { + $this->H[$i+1][$n-1] = (-$ra - $w * $this->H[$i][$n-1] + $q * $this->H[$i][$n]) / $x; + $this->H[$i+1][$n] = (-$sa - $w * $this->H[$i][$n] - $q * $this->H[$i][$n-1]) / $x; + } else { + $this->cdiv(-$r - $y * $this->H[$i][$n-1], -$s - $y * $this->H[$i][$n], $z, $q); + $this->H[$i+1][$n-1] = $this->cdivr; + $this->H[$i+1][$n] = $this->cdivi; + } + } + // Overflow control + $t = max(abs($this->H[$i][$n-1]), abs($this->H[$i][$n])); + if (($eps * $t) * $t > 1) { + for ($j = $i; $j <= $n; ++$j) { + $this->H[$j][$n-1] = $this->H[$j][$n-1] / $t; + $this->H[$j][$n] = $this->H[$j][$n] / $t; + } + } + } // end else + } // end for + } // end else for complex case + } // end for + + // Vectors of isolated roots + for ($i = 0; $i < $nn; ++$i) { + if ($i < $low | $i > $high) { + for ($j = $i; $j < $nn; ++$j) { + $this->V[$i][$j] = $this->H[$i][$j]; + } + } + } + + // Back transformation to get eigenvectors of original matrix + for ($j = $nn-1; $j >= $low; --$j) { + for ($i = $low; $i <= $high; ++$i) { + $z = 0.0; + for ($k = $low; $k <= min($j, $high); ++$k) { + $z = $z + $this->V[$i][$k] * $this->H[$k][$j]; + } + $this->V[$i][$j] = $z; + } + } + } // end hqr2 + + + /** + * Constructor: Check for symmetry, then construct the eigenvalue decomposition + * + * @param array $Arg + */ + public function __construct(array $Arg) + { + $this->A = $Arg; + $this->n = count($Arg[0]); + + $issymmetric = true; + for ($j = 0; ($j < $this->n) & $issymmetric; ++$j) { + for ($i = 0; ($i < $this->n) & $issymmetric; ++$i) { + $issymmetric = ($this->A[$i][$j] == $this->A[$j][$i]); + } + } + + if ($issymmetric) { + $this->V = $this->A; + // Tridiagonalize. + $this->tred2(); + // Diagonalize. + $this->tql2(); + } else { + $this->H = $this->A; + $this->ort = []; + // Reduce to Hessenberg form. + $this->orthes(); + // Reduce Hessenberg to real Schur form. + $this->hqr2(); + } + } + + /** + * Return the eigenvector matrix + * + * @access public + * @return array + */ + public function getEigenvectors() + { + $vectors = $this->V; + + // Always return the eigenvectors of length 1.0 + $vectors = new Matrix($vectors); + $vectors = array_map(function ($vect) { + $sum = 0; + for ($i=0; $itranspose()->toArray()); + + return $vectors; + } + + + /** + * Return the real parts of the eigenvalues
+ * d = real(diag(D)); + * + * @return array + */ + public function getRealEigenvalues() + { + return $this->d; + } + + + /** + * Return the imaginary parts of the eigenvalues
+ * d = imag(diag(D)) + * + * @return array + */ + public function getImagEigenvalues() + { + return $this->e; + } + + + /** + * Return the block diagonal eigenvalue matrix + * + * @return array + */ + public function getDiagonalEigenvalues() + { + for ($i = 0; $i < $this->n; ++$i) { + $D[$i] = array_fill(0, $this->n, 0.0); + $D[$i][$i] = $this->d[$i]; + if ($this->e[$i] == 0) { + continue; + } + $o = ($this->e[$i] > 0) ? $i + 1 : $i - 1; + $D[$i][$o] = $this->e[$i]; + } + return $D; + } +} // class EigenvalueDecomposition diff --git a/src/Phpml/Math/Matrix.php b/src/Phpml/Math/Matrix.php index 39f5c5a..25101f3 100644 --- a/src/Phpml/Math/Matrix.php +++ b/src/Phpml/Math/Matrix.php @@ -37,8 +37,15 @@ class Matrix */ public function __construct(array $matrix, bool $validate = true) { - $this->rows = count($matrix); - $this->columns = count($matrix[0]); + // When a row vector is given + if (!is_array($matrix[0])) { + $this->rows = 1; + $this->columns = count($matrix); + $matrix = [$matrix]; + } else { + $this->rows = count($matrix); + $this->columns = count($matrix[0]); + } if ($validate) { for ($i = 0; $i < $this->rows; ++$i) { @@ -74,6 +81,14 @@ class Matrix return $this->matrix; } + /** + * @return float + */ + public function toScalar() + { + return $this->matrix[0][0]; + } + /** * @return int */ @@ -103,14 +118,10 @@ class Matrix throw MatrixException::columnOutOfRange(); } - $values = []; - for ($i = 0; $i < $this->rows; ++$i) { - $values[] = $this->matrix[$i][$column]; - } - - return $values; + return array_column($this->matrix, $column); } + /** * @return float|int * @@ -167,14 +178,15 @@ class Matrix */ public function transpose() { - $newMatrix = []; - for ($i = 0; $i < $this->rows; ++$i) { - for ($j = 0; $j < $this->columns; ++$j) { - $newMatrix[$j][$i] = $this->matrix[$i][$j]; - } + if ($this->rows == 1) { + $matrix = array_map(function ($el) { + return [$el]; + }, $this->matrix[0]); + } else { + $matrix = array_map(null, ...$this->matrix); } - return new self($newMatrix, false); + return new self($matrix, false); } /** @@ -222,6 +234,64 @@ class Matrix return new self($newMatrix, false); } + /** + * @param $value + * + * @return Matrix + */ + public function multiplyByScalar($value) + { + $newMatrix = []; + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $newMatrix[$i][$j] = $this->matrix[$i][$j] * $value; + } + } + + return new self($newMatrix, false); + } + + /** + * Element-wise addition of the matrix with another one + * + * @param Matrix $other + */ + public function add(Matrix $other) + { + return $this->_add($other); + } + + /** + * Element-wise subtracting of another matrix from this one + * + * @param Matrix $other + */ + public function subtract(Matrix $other) + { + return $this->_add($other, -1); + } + + /** + * Element-wise addition or substraction depending on the given sign parameter + * + * @param Matrix $other + * @param type $sign + */ + protected function _add(Matrix $other, $sign = 1) + { + $a1 = $this->toArray(); + $a2 = $other->toArray(); + + $newMatrix = []; + for ($i=0; $i < $this->rows; $i++) { + for ($k=0; $k < $this->columns; $k++) { + $newMatrix[$i][$k] = $a1[$i][$k] + $sign * $a2[$i][$k]; + } + } + + return new Matrix($newMatrix, false); + } + /** * @return Matrix * @@ -283,4 +353,33 @@ class Matrix { return 0 == $this->getDeterminant(); } + + /** + * Returns the transpose of given array + * + * @param array $array + * + * @return array + */ + public static function transposeArray(array $array) + { + return (new Matrix($array, false))->transpose()->toArray(); + } + + /** + * Returns the dot product of two arrays
+ * Matrix::dot(x, y) ==> x.y' + * + * @param array $array1 + * @param array $array2 + * + * @return array + */ + public static function dot(array $array1, array $array2) + { + $m1 = new Matrix($array1, false); + $m2 = new Matrix($array2, false); + + return $m1->multiply($m2->transpose())->toArray()[0]; + } } diff --git a/src/Phpml/Math/Statistic/Covariance.php b/src/Phpml/Math/Statistic/Covariance.php new file mode 100644 index 0000000..4a9b613 --- /dev/null +++ b/src/Phpml/Math/Statistic/Covariance.php @@ -0,0 +1,155 @@ + $xi) { + $yi = $y[$index]; + $sum += ($xi - $meanX) * ($yi - $meanY); + } + + if ($sample) { + --$n; + } + + return $sum / $n; + } + + /** + * Calculates covariance of two dimensions, i and k in the given data. + * + * @param array $data + * @param int $i + * @param int $k + * @param type $sample + * @param int $n + * @param float $meanX + * @param float $meanY + */ + public static function fromDataset(array $data, int $i, int $k, $sample = true, float $meanX = null, float $meanY = null) + { + if (empty($data)) { + throw InvalidArgumentException::arrayCantBeEmpty(); + } + + $n = count($data); + if ($sample && $n === 1) { + throw InvalidArgumentException::arraySizeToSmall(2); + } + + if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) { + throw new \Exception("Given indices i and k do not match with the dimensionality of data"); + } + + if ($meanX === null || $meanY === null) { + $x = array_column($data, $i); + $y = array_column($data, $k); + + $meanX = Mean::arithmetic($x); + $meanY = Mean::arithmetic($y); + $sum = 0.0; + foreach ($x as $index => $xi) { + $yi = $y[$index]; + $sum += ($xi - $meanX) * ($yi - $meanY); + } + } else { + // In the case, whole dataset given along with dimension indices, i and k, + // we would like to avoid getting column data with array_column and operate + // over this extra copy of column data for memory efficiency purposes. + // + // Instead we traverse through the whole data and get what we actually need + // without copying the data. This way, memory use will be reduced + // with a slight cost of CPU utilization. + $sum = 0.0; + foreach ($data as $row) { + $val = []; + foreach ($row as $index => $col) { + if ($index == $i) { + $val[0] = $col - $meanX; + } + if ($index == $k) { + $val[1] = $col - $meanY; + } + } + $sum += $val[0] * $val[1]; + } + } + + if ($sample) { + --$n; + } + + return $sum / $n; + } + + /** + * Returns the covariance matrix of n-dimensional data + * + * @param array $data + * + * @return array + */ + public static function covarianceMatrix(array $data, array $means = null) + { + $n = count($data[0]); + + if ($means === null) { + $means = []; + for ($i=0; $i < $n; $i++) { + $means[] = Mean::arithmetic(array_column($data, $i)); + } + } + + $cov = []; + for ($i=0; $i < $n; $i++) { + for ($k=0; $k < $n; $k++) { + if ($i > $k) { + $cov[$i][$k] = $cov[$k][$i]; + } else { + $cov[$i][$k] = Covariance::fromDataset( + $data, $i, $k, true, $means[$i], $means[$k]); + } + } + } + + return $cov; + } +} diff --git a/tests/Phpml/DimensionReduction/KernelPCATest.php b/tests/Phpml/DimensionReduction/KernelPCATest.php new file mode 100644 index 0000000..14b2d7d --- /dev/null +++ b/tests/Phpml/DimensionReduction/KernelPCATest.php @@ -0,0 +1,51 @@ +fit($data); + + // Due to the fact that the sign of values can be flipped + // during the calculation of eigenValues, we have to compare + // absolute value of the values + array_map(function ($val1, $val2) use ($epsilon) { + $this->assertEquals(abs($val1), abs($val2), '', $epsilon); + }, $transformed, $reducedData); + + // Fitted KernelPCA object can also transform an arbitrary sample of the + // same dimensionality with the original dataset + $newData = [1.25, 2.25]; + $newTransformed = [0.18956227539216]; + $newTransformed2 = $kpca->transform($newData); + $this->assertEquals(abs($newTransformed[0]), abs($newTransformed2[0]), '', $epsilon); + } +} diff --git a/tests/Phpml/DimensionReduction/PCATest.php b/tests/Phpml/DimensionReduction/PCATest.php new file mode 100644 index 0000000..8f65e98 --- /dev/null +++ b/tests/Phpml/DimensionReduction/PCATest.php @@ -0,0 +1,57 @@ +fit($data); + + // Due to the fact that the sign of values can be flipped + // during the calculation of eigenValues, we have to compare + // absolute value of the values + array_map(function ($val1, $val2) use ($epsilon) { + $this->assertEquals(abs($val1), abs($val2), '', $epsilon); + }, $transformed, $reducedData); + + // Test fitted PCA object to transform an arbitrary sample of the + // same dimensionality with the original dataset + foreach ($data as $i => $row) { + $newRow = [[$transformed[$i]]]; + $newRow2= $pca->transform($row); + + array_map(function ($val1, $val2) use ($epsilon) { + $this->assertEquals(abs($val1), abs($val2), '', $epsilon); + }, $newRow, $newRow2); + } + } +} diff --git a/tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php b/tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php new file mode 100644 index 0000000..4bca1bd --- /dev/null +++ b/tests/Phpml/Math/LinearAlgebra/EigenDecompositionTest.php @@ -0,0 +1,65 @@ +getEigenvectors(); + $eigValues = $decomp->getRealEigenvalues(); + $this->assertEquals($knownEigvalues, $eigValues, '', $epsilon); + $this->assertEquals($knownEigvectors, $eigVectors, '', $epsilon); + + // Secondly, generate a symmetric square matrix + // and test for A.v=λ.v + // + // (We, for now, omit non-symmetric matrices whose eigenvalues can be complex numbers) + $len = 3; + $A = array_fill(0, $len, array_fill(0, $len, 0.0)); + srand(intval(microtime(true) * 1000)); + for ($i=0; $i < $len; $i++) { + for ($k=0; $k < $len; $k++) { + if ($i > $k) { + $A[$i][$k] = $A[$k][$i]; + } else { + $A[$i][$k] = rand(0, 10); + } + } + } + + $decomp = new EigenvalueDecomposition($A); + $eigValues = $decomp->getRealEigenvalues(); + $eigVectors= $decomp->getEigenvectors(); + + foreach ($eigValues as $index => $lambda) { + $m1 = new Matrix($A); + $m2 = (new Matrix($eigVectors[$index]))->transpose(); + + // A.v=λ.v + $leftSide = $m1->multiply($m2)->toArray(); + $rightSide= $m2->multiplyByScalar($lambda)->toArray(); + + $this->assertEquals($leftSide, $rightSide, '', $epsilon); + } + } +} diff --git a/tests/Phpml/Math/MatrixTest.php b/tests/Phpml/Math/MatrixTest.php index 0b46612..257fd72 100644 --- a/tests/Phpml/Math/MatrixTest.php +++ b/tests/Phpml/Math/MatrixTest.php @@ -188,4 +188,80 @@ class MatrixTest extends TestCase $this->assertEquals($crossOuted, $matrix->crossOut(1, 1)->toArray()); } + + public function testToScalar() + { + $matrix = new Matrix([[1, 2, 3], [3, 2, 3]]); + + $this->assertEquals($matrix->toScalar(), 1); + } + + public function testMultiplyByScalar() + { + $matrix = new Matrix([ + [4, 6, 8], + [2, 10, 20], + ]); + + $result = [ + [-8, -12, -16], + [-4, -20, -40], + ]; + + $this->assertEquals($result, $matrix->multiplyByScalar(-2)->toArray()); + } + + public function testAdd() + { + $array1 = [1, 1, 1]; + $array2 = [2, 2, 2]; + $result = [3, 3, 3]; + + $m1 = new Matrix($array1); + $m2 = new Matrix($array2); + + $this->assertEquals($result, $m1->add($m2)->toArray()[0]); + } + + public function testSubtract() + { + $array1 = [1, 1, 1]; + $array2 = [2, 2, 2]; + $result = [-1, -1, -1]; + + $m1 = new Matrix($array1); + $m2 = new Matrix($array2); + + $this->assertEquals($result, $m1->subtract($m2)->toArray()[0]); + } + + public function testTransposeArray() + { + $array = [ + [1, 1, 1], + [2, 2, 2] + ]; + $transposed = [ + [1, 2], + [1, 2], + [1, 2] + ]; + + $this->assertEquals($transposed, Matrix::transposeArray($array)); + } + + public function testDot() + { + $vect1 = [2, 2, 2]; + $vect2 = [3, 3, 3]; + $dot = [18]; + + $this->assertEquals($dot, Matrix::dot($vect1, $vect2)); + + $matrix1 = [[1, 1], [2, 2]]; + $matrix2 = [[3, 3], [3, 3], [3, 3]]; + $dot = [6, 12]; + + $this->assertEquals($dot, Matrix::dot($matrix2, $matrix1)); + } } diff --git a/tests/Phpml/Math/Statistic/CovarianceTest.php b/tests/Phpml/Math/Statistic/CovarianceTest.php new file mode 100644 index 0000000..4b025a3 --- /dev/null +++ b/tests/Phpml/Math/Statistic/CovarianceTest.php @@ -0,0 +1,62 @@ +assertEquals($cov1, $knownCovariance[0][0], '', $epsilon); + $cov1 = Covariance::fromXYArrays($x, $x); + $this->assertEquals($cov1, $knownCovariance[0][0], '', $epsilon); + + $cov2 = Covariance::fromDataset($matrix, 0, 1); + $this->assertEquals($cov2, $knownCovariance[0][1], '', $epsilon); + $cov2 = Covariance::fromXYArrays($x, $y); + $this->assertEquals($cov2, $knownCovariance[0][1], '', $epsilon); + + // Second: calculation cov matrix with automatic means for each column + $covariance = Covariance::covarianceMatrix($matrix); + $this->assertEquals($knownCovariance, $covariance, '', $epsilon); + + // Thirdly, CovMatrix: Means are precalculated and given to the method + $x = array_column($matrix, 0); + $y = array_column($matrix, 1); + $meanX = Mean::arithmetic($x); + $meanY = Mean::arithmetic($y); + + $covariance = Covariance::covarianceMatrix($matrix, [$meanX, $meanY]); + $this->assertEquals($knownCovariance, $covariance, '', $epsilon); + } +}