Linear algebra operations, Dimensionality reduction and some other minor changes (#81)

* Lineer Algebra operations

* Covariance

* PCA and KernelPCA

* Tests for PCA, Eigenvalues and Covariance

* KernelPCA update

* KernelPCA and its test

* KernelPCA and its test

* MatrixTest, KernelPCA and PCA tests

* Readme update

* Readme update
This commit is contained in:
Mustafa Karabulut 2017-04-23 10:03:30 +03:00 committed by Arkadiusz Kondas
parent 6296e44db0
commit a87859dd97
13 changed files with 1965 additions and 17 deletions

View File

@ -24,7 +24,7 @@ $labels = ['a', 'a', 'a', 'b', 'b', 'b'];
$classifier = new KNearestNeighbors();
$classifier->train($samples, $labels);
$classifier->predict([3, 2]);
$classifier->predict([3, 2]);
// return 'b'
```
@ -61,12 +61,14 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples](
* Adaline
* Decision Stump
* Perceptron
* LogisticRegression
* Regression
* [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/)
* [SVR](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/svr/)
* Clustering
* [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means/)
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan/)
* Fuzzy C-Means
* Metric
* [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/)
* [Confusion Matrix](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/confusion-matrix/)
@ -85,6 +87,9 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples](
* Feature Extraction
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
* [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/)
* Dimensionality Reduction
* PCA
* Kernel PCA
* Datasets
* [Array](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/array-dataset/)
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
@ -100,7 +105,8 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples](
* [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/)
* [Set](http://php-ml.readthedocs.io/en/latest/math/set/)
* [Statistic](http://php-ml.readthedocs.io/en/latest/math/statistic/)
* Linear Algebra
## Contribute
- [Issue Tracker: github.com/php-ai/php-ml](https://github.com/php-ai/php-ml/issues)

View File

@ -0,0 +1,246 @@
<?php
declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Phpml\Math\Distance\Euclidean;
use Phpml\Math\Distance\Manhattan;
use Phpml\Math\Matrix;
class KernelPCA extends PCA
{
const KERNEL_RBF = 1;
const KERNEL_SIGMOID = 2;
const KERNEL_LAPLACIAN = 3;
const KERNEL_LINEAR = 4;
/**
* Selected kernel function
*
* @var int
*/
protected $kernel;
/**
* Gamma value used by the kernel
*
* @var float
*/
protected $gamma;
/**
* Original dataset used to fit KernelPCA
*
* @var array
*/
protected $data;
/**
* Kernel principal component analysis (KernelPCA) is an extension of PCA using
* techniques of kernel methods. It is more suitable for data that involves
* vectors that are not linearly separable<br><br>
* Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
* will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
* This transformation will return the same number of rows with only <i>2</i> columns.
*
* @param int $kernel
* @param float $totalVariance Total variance to be preserved if numFeatures is not given
* @param int $numFeatures Number of columns to be returned
* @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels
*
* @throws \Exception
*/
public function __construct(int $kernel = self::KERNEL_RBF, $totalVariance = null, $numFeatures = null, $gamma = null)
{
$availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
if (! in_array($kernel, $availableKernels)) {
throw new \Exception("KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian");
}
parent::__construct($totalVariance, $numFeatures);
$this->kernel = $kernel;
$this->gamma = $gamma;
}
/**
* Takes a data and returns a lower dimensional version
* of this data while preserving $totalVariance or $numFeatures. <br>
* $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m
*
* @param array $data
*
* @return array
*/
public function fit(array $data)
{
$numRows = count($data);
$this->data = $data;
if ($this->gamma === null) {
$this->gamma = 1.0 / $numRows;
}
$matrix = $this->calculateKernelMatrix($this->data, $numRows);
$matrix = $this->centerMatrix($matrix, $numRows);
list($this->eigValues, $this->eigVectors) = $this->eigenDecomposition($matrix, $numRows);
$this->fit = true;
return Matrix::transposeArray($this->eigVectors);
}
/**
* Calculates similarity matrix by use of selected kernel function<br>
* An n-by-m matrix is given and an n-by-n matrix is returned
*
* @param array $data
* @param int $numRows
*
* @return array
*/
protected function calculateKernelMatrix(array $data, int $numRows)
{
$kernelFunc = $this->getKernel();
$matrix = [];
for ($i=0; $i < $numRows; $i++) {
for ($k=0; $k < $numRows; $k++) {
if ($i <= $k) {
$matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
} else {
$matrix[$i][$k] = $matrix[$k][$i];
}
}
}
return $matrix;
}
/**
* Kernel matrix is centered in its original space by using the following
* conversion:
*
* K = K N.K K.N + N.K.N where N is n-by-n matrix filled with 1/n
*
* @param array $matrix
* @param int $n
*/
protected function centerMatrix(array $matrix, int $n)
{
$N = array_fill(0, $n, array_fill(0, $n, 1.0/$n));
$N = new Matrix($N, false);
$K = new Matrix($matrix, false);
// K.N (This term is repeated so we cache it once)
$K_N = $K->multiply($N);
// N.K
$N_K = $N->multiply($K);
// N.K.N
$N_K_N = $N->multiply($K_N);
return $K->subtract($N_K)
->subtract($K_N)
->add($N_K_N)
->toArray();
}
/**
* Returns the callable kernel function
*
* @return \Closure
*/
protected function getKernel()
{
switch ($this->kernel) {
case self::KERNEL_LINEAR:
// k(x,y) = xT.y
return function ($x, $y) {
return Matrix::dot($x, $y)[0];
};
case self::KERNEL_RBF:
// k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
$dist = new Euclidean();
return function ($x, $y) use ($dist) {
return exp(-$this->gamma * $dist->sqDistance($x, $y));
};
case self::KERNEL_SIGMOID:
// k(x,y)=tanh(γ.xT.y+c0) where c0=1
return function ($x, $y) {
$res = Matrix::dot($x, $y)[0] + 1.0;
return tanh($this->gamma * $res);
};
case self::KERNEL_LAPLACIAN:
// k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
$dist = new Manhattan();
return function ($x, $y) use ($dist) {
return exp(-$this->gamma * $dist->distance($x, $y));
};
}
}
/**
* @param array $sample
*
* @return array
*/
protected function getDistancePairs(array $sample)
{
$kernel = $this->getKernel();
$pairs = [];
foreach ($this->data as $row) {
$pairs[] = $kernel($row, $sample);
}
return $pairs;
}
/**
* @param array $pairs
*
* @return array
*/
protected function projectSample(array $pairs)
{
// Normalize eigenvectors by eig = eigVectors / eigValues
$func = function ($eigVal, $eigVect) {
$m = new Matrix($eigVect, false);
$a = $m->divideByScalar($eigVal)->toArray();
return $a[0];
};
$eig = array_map($func, $this->eigValues, $this->eigVectors);
// return k.dot(eig)
return Matrix::dot($pairs, $eig);
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the variables obtained during the last run of <code>fit</code>.
*
* @param array $sample
*
* @return array
*/
public function transform(array $sample)
{
if (!$this->fit) {
throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first");
}
if (is_array($sample[0])) {
throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays");
}
$pairs = $this->getDistancePairs($sample);
return $this->projectSample($pairs);
}
}

View File

@ -0,0 +1,228 @@
<?php
declare(strict_types=1);
namespace Phpml\DimensionReduction;
use Phpml\Math\LinearAlgebra\EigenvalueDecomposition;
use Phpml\Math\Statistic\Covariance;
use Phpml\Math\Statistic\Mean;
use Phpml\Math\Matrix;
class PCA
{
/**
* Total variance to be conserved after the reduction
*
* @var float
*/
public $totalVariance = 0.9;
/**
* Number of features to be preserved after the reduction
*
* @var int
*/
public $numFeatures = null;
/**
* Temporary storage for mean values for each dimension in given data
*
* @var array
*/
protected $means = [];
/**
* Eigenvectors of the covariance matrix
*
* @var array
*/
protected $eigVectors = [];
/**
* Top eigenValues of the covariance matrix
*
* @var type
*/
protected $eigValues = [];
/**
* @var bool
*/
protected $fit = false;
/**
* PCA (Principal Component Analysis) used to explain given
* data with lower number of dimensions. This analysis transforms the
* data to a lower dimensional version of it by conserving a proportion of total variance
* within the data. It is a lossy data compression technique.<br>
*
* @param float $totalVariance Total explained variance to be preserved
* @param int $numFeatures Number of features to be preserved
*
* @throws \Exception
*/
public function __construct($totalVariance = null, $numFeatures = null)
{
if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
throw new \Exception("Total variance can be a value between 0.1 and 0.99");
}
if ($numFeatures !== null && $numFeatures <= 0) {
throw new \Exception("Number of features to be preserved should be greater than 0");
}
if ($totalVariance !== null && $numFeatures !== null) {
throw new \Exception("Either totalVariance or numFeatures should be specified in order to run the algorithm");
}
if ($numFeatures !== null) {
$this->numFeatures = $numFeatures;
}
if ($totalVariance !== null) {
$this->totalVariance = $totalVariance;
}
}
/**
* Takes a data and returns a lower dimensional version
* of this data while preserving $totalVariance or $numFeatures. <br>
* $data is an n-by-m matrix and returned array is
* n-by-k matrix where k <= m
*
* @param array $data
*
* @return array
*/
public function fit(array $data)
{
$n = count($data[0]);
$data = $this->normalize($data, $n);
$covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0));
list($this->eigValues, $this->eigVectors) = $this->eigenDecomposition($covMatrix, $n);
$this->fit = true;
return $this->reduce($data);
}
/**
* @param array $data
* @param int $n
*/
protected function calculateMeans(array $data, int $n)
{
// Calculate means for each dimension
$this->means = [];
for ($i=0; $i < $n; $i++) {
$column = array_column($data, $i);
$this->means[] = Mean::arithmetic($column);
}
}
/**
* Normalization of the data includes subtracting mean from
* each dimension therefore dimensions will be centered to zero
*
* @param array $data
* @param int $n
*
* @return array
*/
protected function normalize(array $data, int $n)
{
if (empty($this->means)) {
$this->calculateMeans($data, $n);
}
// Normalize data
foreach ($data as $i => $row) {
for ($k=0; $k < $n; $k++) {
$data[$i][$k] -= $this->means[$k];
}
}
return $data;
}
/**
* Calculates eigenValues and eigenVectors of the given matrix. Returns
* top eigenVectors along with the largest eigenValues. The total explained variance
* of these eigenVectors will be no less than desired $totalVariance value
*
* @param array $matrix
* @param int $n
*
* @return array
*/
protected function eigenDecomposition(array $matrix, int $n)
{
$eig = new EigenvalueDecomposition($matrix);
$eigVals = $eig->getRealEigenvalues();
$eigVects= $eig->getEigenvectors();
$totalEigVal = array_sum($eigVals);
// Sort eigenvalues in descending order
arsort($eigVals);
$explainedVar = 0.0;
$vectors = [];
$values = [];
foreach ($eigVals as $i => $eigVal) {
$explainedVar += $eigVal / $totalEigVal;
$vectors[] = $eigVects[$i];
$values[] = $eigVal;
if ($this->numFeatures !== null) {
if (count($vectors) == $this->numFeatures) {
break;
}
} else {
if ($explainedVar >= $this->totalVariance) {
break;
}
}
}
return [$values, $vectors];
}
/**
* Returns the reduced data
*
* @param array $data
*
* @return array
*/
protected function reduce(array $data)
{
$m1 = new Matrix($data);
$m2 = new Matrix($this->eigVectors);
return $m1->multiply($m2->transpose())->toArray();
}
/**
* Transforms the given sample to a lower dimensional vector by using
* the eigenVectors obtained in the last run of <code>fit</code>.
*
* @param array $sample
*
* @return array
*/
public function transform(array $sample)
{
if (!$this->fit) {
throw new \Exception("PCA has not been fitted with respect to original dataset, please run PCA::fit() first");
}
if (! is_array($sample[0])) {
$sample = [$sample];
}
$sample = $this->normalize($sample, count($sample[0]));
return $this->reduce($sample);
}
}

View File

@ -55,7 +55,7 @@ class InvalidArgumentException extends \Exception
*/
public static function inconsistentMatrixSupplied()
{
return new self('Inconsistent matrix aupplied');
return new self('Inconsistent matrix applied');
}
/**

View File

@ -31,4 +31,17 @@ class Euclidean implements Distance
return sqrt((float) $distance);
}
/**
* Square of Euclidean distance
*
* @param array $a
* @param array $b
*
* @return float
*/
public function sqDistance(array $a, array $b): float
{
return $this->distance($a, $b) ** 2;
}
}

View File

@ -0,0 +1,890 @@
<?php
declare(strict_types=1);
/**
*
* Class to obtain eigenvalues and eigenvectors of a real matrix.
*
* If A is symmetric, then A = V*D*V' where the eigenvalue matrix D
* is diagonal and the eigenvector matrix V is orthogonal (i.e.
* A = V.times(D.times(V.transpose())) and V.times(V.transpose())
* equals the identity matrix).
*
* If A is not symmetric, then the eigenvalue matrix D is block diagonal
* with the real eigenvalues in 1-by-1 blocks and any complex eigenvalues,
* lambda + i*mu, in 2-by-2 blocks, [lambda, mu; -mu, lambda]. The
* columns of V represent the eigenvectors in the sense that A*V = V*D,
* i.e. A.times(V) equals V.times(D). The matrix V may be badly
* conditioned, or even singular, so the validity of the equation
* A = V*D*inverse(V) depends upon V.cond().
*
* @author Paul Meagher
* @license PHP v3.0
* @version 1.1
*
* Slightly changed to adapt the original code to PHP-ML library
* @date 2017/04/11
* @author Mustafa Karabulut
*/
namespace Phpml\Math\LinearAlgebra;
use Phpml\Math\Matrix;
class EigenvalueDecomposition
{
/**
* Row and column dimension (square matrix).
* @var int
*/
private $n;
/**
* Internal symmetry flag.
* @var int
*/
private $issymmetric;
/**
* Arrays for internal storage of eigenvalues.
* @var array
*/
private $d = [];
private $e = [];
/**
* Array for internal storage of eigenvectors.
* @var array
*/
private $V = [];
/**
* Array for internal storage of nonsymmetric Hessenberg form.
* @var array
*/
private $H = [];
/**
* Working storage for nonsymmetric algorithm.
* @var array
*/
private $ort;
/**
* Used for complex scalar division.
* @var float
*/
private $cdivr;
private $cdivi;
/**
* Symmetric Householder reduction to tridiagonal form.
*/
private function tred2()
{
// This is derived from the Algol procedures tred2 by
// Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
// Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
// Fortran subroutine in EISPACK.
$this->d = $this->V[$this->n-1];
// Householder reduction to tridiagonal form.
for ($i = $this->n-1; $i > 0; --$i) {
$i_ = $i -1;
// Scale to avoid under/overflow.
$h = $scale = 0.0;
$scale += array_sum(array_map('abs', $this->d));
if ($scale == 0.0) {
$this->e[$i] = $this->d[$i_];
$this->d = array_slice($this->V[$i_], 0, $i_);
for ($j = 0; $j < $i; ++$j) {
$this->V[$j][$i] = $this->V[$i][$j] = 0.0;
}
} else {
// Generate Householder vector.
for ($k = 0; $k < $i; ++$k) {
$this->d[$k] /= $scale;
$h += pow($this->d[$k], 2);
}
$f = $this->d[$i_];
$g = sqrt($h);
if ($f > 0) {
$g = -$g;
}
$this->e[$i] = $scale * $g;
$h = $h - $f * $g;
$this->d[$i_] = $f - $g;
for ($j = 0; $j < $i; ++$j) {
$this->e[$j] = 0.0;
}
// Apply similarity transformation to remaining columns.
for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j];
$this->V[$j][$i] = $f;
$g = $this->e[$j] + $this->V[$j][$j] * $f;
for ($k = $j+1; $k <= $i_; ++$k) {
$g += $this->V[$k][$j] * $this->d[$k];
$this->e[$k] += $this->V[$k][$j] * $f;
}
$this->e[$j] = $g;
}
$f = 0.0;
for ($j = 0; $j < $i; ++$j) {
if ($h === 0) {
$h = 1e-20;
}
$this->e[$j] /= $h;
$f += $this->e[$j] * $this->d[$j];
}
$hh = $f / (2 * $h);
for ($j=0; $j < $i; ++$j) {
$this->e[$j] -= $hh * $this->d[$j];
}
for ($j = 0; $j < $i; ++$j) {
$f = $this->d[$j];
$g = $this->e[$j];
for ($k = $j; $k <= $i_; ++$k) {
$this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]);
}
$this->d[$j] = $this->V[$i-1][$j];
$this->V[$i][$j] = 0.0;
}
}
$this->d[$i] = $h;
}
// Accumulate transformations.
for ($i = 0; $i < $this->n-1; ++$i) {
$this->V[$this->n-1][$i] = $this->V[$i][$i];
$this->V[$i][$i] = 1.0;
$h = $this->d[$i+1];
if ($h != 0.0) {
for ($k = 0; $k <= $i; ++$k) {
$this->d[$k] = $this->V[$k][$i+1] / $h;
}
for ($j = 0; $j <= $i; ++$j) {
$g = 0.0;
for ($k = 0; $k <= $i; ++$k) {
$g += $this->V[$k][$i+1] * $this->V[$k][$j];
}
for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$j] -= $g * $this->d[$k];
}
}
}
for ($k = 0; $k <= $i; ++$k) {
$this->V[$k][$i+1] = 0.0;
}
}
$this->d = $this->V[$this->n-1];
$this->V[$this->n-1] = array_fill(0, $j, 0.0);
$this->V[$this->n-1][$this->n-1] = 1.0;
$this->e[0] = 0.0;
}
/**
* Symmetric tridiagonal QL algorithm.
*
* This is derived from the Algol procedures tql2, by
* Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
* Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
* Fortran subroutine in EISPACK.
*/
private function tql2()
{
for ($i = 1; $i < $this->n; ++$i) {
$this->e[$i-1] = $this->e[$i];
}
$this->e[$this->n-1] = 0.0;
$f = 0.0;
$tst1 = 0.0;
$eps = pow(2.0, -52.0);
for ($l = 0; $l < $this->n; ++$l) {
// Find small subdiagonal element
$tst1 = max($tst1, abs($this->d[$l]) + abs($this->e[$l]));
$m = $l;
while ($m < $this->n) {
if (abs($this->e[$m]) <= $eps * $tst1) {
break;
}
++$m;
}
// If m == l, $this->d[l] is an eigenvalue,
// otherwise, iterate.
if ($m > $l) {
$iter = 0;
do {
// Could check iteration count here.
$iter += 1;
// Compute implicit shift
$g = $this->d[$l];
$p = ($this->d[$l+1] - $g) / (2.0 * $this->e[$l]);
$r = hypot($p, 1.0);
if ($p < 0) {
$r *= -1;
}
$this->d[$l] = $this->e[$l] / ($p + $r);
$this->d[$l+1] = $this->e[$l] * ($p + $r);
$dl1 = $this->d[$l+1];
$h = $g - $this->d[$l];
for ($i = $l + 2; $i < $this->n; ++$i) {
$this->d[$i] -= $h;
}
$f += $h;
// Implicit QL transformation.
$p = $this->d[$m];
$c = 1.0;
$c2 = $c3 = $c;
$el1 = $this->e[$l + 1];
$s = $s2 = 0.0;
for ($i = $m-1; $i >= $l; --$i) {
$c3 = $c2;
$c2 = $c;
$s2 = $s;
$g = $c * $this->e[$i];
$h = $c * $p;
$r = hypot($p, $this->e[$i]);
$this->e[$i+1] = $s * $r;
$s = $this->e[$i] / $r;
$c = $p / $r;
$p = $c * $this->d[$i] - $s * $g;
$this->d[$i+1] = $h + $s * ($c * $g + $s * $this->d[$i]);
// Accumulate transformation.
for ($k = 0; $k < $this->n; ++$k) {
$h = $this->V[$k][$i+1];
$this->V[$k][$i+1] = $s * $this->V[$k][$i] + $c * $h;
$this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h;
}
}
$p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1;
$this->e[$l] = $s * $p;
$this->d[$l] = $c * $p;
// Check for convergence.
} while (abs($this->e[$l]) > $eps * $tst1);
}
$this->d[$l] = $this->d[$l] + $f;
$this->e[$l] = 0.0;
}
// Sort eigenvalues and corresponding vectors.
for ($i = 0; $i < $this->n - 1; ++$i) {
$k = $i;
$p = $this->d[$i];
for ($j = $i+1; $j < $this->n; ++$j) {
if ($this->d[$j] < $p) {
$k = $j;
$p = $this->d[$j];
}
}
if ($k != $i) {
$this->d[$k] = $this->d[$i];
$this->d[$i] = $p;
for ($j = 0; $j < $this->n; ++$j) {
$p = $this->V[$j][$i];
$this->V[$j][$i] = $this->V[$j][$k];
$this->V[$j][$k] = $p;
}
}
}
}
/**
* Nonsymmetric reduction to Hessenberg form.
*
* This is derived from the Algol procedures orthes and ortran,
* by Martin and Wilkinson, Handbook for Auto. Comp.,
* Vol.ii-Linear Algebra, and the corresponding
* Fortran subroutines in EISPACK.
*/
private function orthes()
{
$low = 0;
$high = $this->n-1;
for ($m = $low+1; $m <= $high-1; ++$m) {
// Scale column.
$scale = 0.0;
for ($i = $m; $i <= $high; ++$i) {
$scale = $scale + abs($this->H[$i][$m-1]);
}
if ($scale != 0.0) {
// Compute Householder transformation.
$h = 0.0;
for ($i = $high; $i >= $m; --$i) {
$this->ort[$i] = $this->H[$i][$m-1] / $scale;
$h += $this->ort[$i] * $this->ort[$i];
}
$g = sqrt($h);
if ($this->ort[$m] > 0) {
$g *= -1;
}
$h -= $this->ort[$m] * $g;
$this->ort[$m] -= $g;
// Apply Householder similarity transformation
// H = (I -u * u' / h) * H * (I -u * u') / h)
for ($j = $m; $j < $this->n; ++$j) {
$f = 0.0;
for ($i = $high; $i >= $m; --$i) {
$f += $this->ort[$i] * $this->H[$i][$j];
}
$f /= $h;
for ($i = $m; $i <= $high; ++$i) {
$this->H[$i][$j] -= $f * $this->ort[$i];
}
}
for ($i = 0; $i <= $high; ++$i) {
$f = 0.0;
for ($j = $high; $j >= $m; --$j) {
$f += $this->ort[$j] * $this->H[$i][$j];
}
$f = $f / $h;
for ($j = $m; $j <= $high; ++$j) {
$this->H[$i][$j] -= $f * $this->ort[$j];
}
}
$this->ort[$m] = $scale * $this->ort[$m];
$this->H[$m][$m-1] = $scale * $g;
}
}
// Accumulate transformations (Algol's ortran).
for ($i = 0; $i < $this->n; ++$i) {
for ($j = 0; $j < $this->n; ++$j) {
$this->V[$i][$j] = ($i == $j ? 1.0 : 0.0);
}
}
for ($m = $high-1; $m >= $low+1; --$m) {
if ($this->H[$m][$m-1] != 0.0) {
for ($i = $m+1; $i <= $high; ++$i) {
$this->ort[$i] = $this->H[$i][$m-1];
}
for ($j = $m; $j <= $high; ++$j) {
$g = 0.0;
for ($i = $m; $i <= $high; ++$i) {
$g += $this->ort[$i] * $this->V[$i][$j];
}
// Double division avoids possible underflow
$g = ($g / $this->ort[$m]) / $this->H[$m][$m-1];
for ($i = $m; $i <= $high; ++$i) {
$this->V[$i][$j] += $g * $this->ort[$i];
}
}
}
}
}
/**
* Performs complex division.
*/
private function cdiv($xr, $xi, $yr, $yi)
{
if (abs($yr) > abs($yi)) {
$r = $yi / $yr;
$d = $yr + $r * $yi;
$this->cdivr = ($xr + $r * $xi) / $d;
$this->cdivi = ($xi - $r * $xr) / $d;
} else {
$r = $yr / $yi;
$d = $yi + $r * $yr;
$this->cdivr = ($r * $xr + $xi) / $d;
$this->cdivi = ($r * $xi - $xr) / $d;
}
}
/**
* Nonsymmetric reduction from Hessenberg to real Schur form.
*
* Code is derived from the Algol procedure hqr2,
* by Martin and Wilkinson, Handbook for Auto. Comp.,
* Vol.ii-Linear Algebra, and the corresponding
* Fortran subroutine in EISPACK.
*/
private function hqr2()
{
// Initialize
$nn = $this->n;
$n = $nn - 1;
$low = 0;
$high = $nn - 1;
$eps = pow(2.0, -52.0);
$exshift = 0.0;
$p = $q = $r = $s = $z = 0;
// Store roots isolated by balanc and compute matrix norm
$norm = 0.0;
for ($i = 0; $i < $nn; ++$i) {
if (($i < $low) or ($i > $high)) {
$this->d[$i] = $this->H[$i][$i];
$this->e[$i] = 0.0;
}
for ($j = max($i-1, 0); $j < $nn; ++$j) {
$norm = $norm + abs($this->H[$i][$j]);
}
}
// Outer loop over eigenvalue index
$iter = 0;
while ($n >= $low) {
// Look for single small sub-diagonal element
$l = $n;
while ($l > $low) {
$s = abs($this->H[$l-1][$l-1]) + abs($this->H[$l][$l]);
if ($s == 0.0) {
$s = $norm;
}
if (abs($this->H[$l][$l-1]) < $eps * $s) {
break;
}
--$l;
}
// Check for convergence
// One root found
if ($l == $n) {
$this->H[$n][$n] = $this->H[$n][$n] + $exshift;
$this->d[$n] = $this->H[$n][$n];
$this->e[$n] = 0.0;
--$n;
$iter = 0;
// Two roots found
} elseif ($l == $n-1) {
$w = $this->H[$n][$n-1] * $this->H[$n-1][$n];
$p = ($this->H[$n-1][$n-1] - $this->H[$n][$n]) / 2.0;
$q = $p * $p + $w;
$z = sqrt(abs($q));
$this->H[$n][$n] = $this->H[$n][$n] + $exshift;
$this->H[$n-1][$n-1] = $this->H[$n-1][$n-1] + $exshift;
$x = $this->H[$n][$n];
// Real pair
if ($q >= 0) {
if ($p >= 0) {
$z = $p + $z;
} else {
$z = $p - $z;
}
$this->d[$n-1] = $x + $z;
$this->d[$n] = $this->d[$n-1];
if ($z != 0.0) {
$this->d[$n] = $x - $w / $z;
}
$this->e[$n-1] = 0.0;
$this->e[$n] = 0.0;
$x = $this->H[$n][$n-1];
$s = abs($x) + abs($z);
$p = $x / $s;
$q = $z / $s;
$r = sqrt($p * $p + $q * $q);
$p = $p / $r;
$q = $q / $r;
// Row modification
for ($j = $n-1; $j < $nn; ++$j) {
$z = $this->H[$n-1][$j];
$this->H[$n-1][$j] = $q * $z + $p * $this->H[$n][$j];
$this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z;
}
// Column modification
for ($i = 0; $i <= $n; ++$i) {
$z = $this->H[$i][$n-1];
$this->H[$i][$n-1] = $q * $z + $p * $this->H[$i][$n];
$this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z;
}
// Accumulate transformations
for ($i = $low; $i <= $high; ++$i) {
$z = $this->V[$i][$n-1];
$this->V[$i][$n-1] = $q * $z + $p * $this->V[$i][$n];
$this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z;
}
// Complex pair
} else {
$this->d[$n-1] = $x + $p;
$this->d[$n] = $x + $p;
$this->e[$n-1] = $z;
$this->e[$n] = -$z;
}
$n = $n - 2;
$iter = 0;
// No convergence yet
} else {
// Form shift
$x = $this->H[$n][$n];
$y = 0.0;
$w = 0.0;
if ($l < $n) {
$y = $this->H[$n-1][$n-1];
$w = $this->H[$n][$n-1] * $this->H[$n-1][$n];
}
// Wilkinson's original ad hoc shift
if ($iter == 10) {
$exshift += $x;
for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $x;
}
$s = abs($this->H[$n][$n-1]) + abs($this->H[$n-1][$n-2]);
$x = $y = 0.75 * $s;
$w = -0.4375 * $s * $s;
}
// MATLAB's new ad hoc shift
if ($iter == 30) {
$s = ($y - $x) / 2.0;
$s = $s * $s + $w;
if ($s > 0) {
$s = sqrt($s);
if ($y < $x) {
$s = -$s;
}
$s = $x - $w / (($y - $x) / 2.0 + $s);
for ($i = $low; $i <= $n; ++$i) {
$this->H[$i][$i] -= $s;
}
$exshift += $s;
$x = $y = $w = 0.964;
}
}
// Could check iteration count here.
$iter = $iter + 1;
// Look for two consecutive small sub-diagonal elements
$m = $n - 2;
while ($m >= $l) {
$z = $this->H[$m][$m];
$r = $x - $z;
$s = $y - $z;
$p = ($r * $s - $w) / $this->H[$m+1][$m] + $this->H[$m][$m+1];
$q = $this->H[$m+1][$m+1] - $z - $r - $s;
$r = $this->H[$m+2][$m+1];
$s = abs($p) + abs($q) + abs($r);
$p = $p / $s;
$q = $q / $s;
$r = $r / $s;
if ($m == $l) {
break;
}
if (abs($this->H[$m][$m-1]) * (abs($q) + abs($r)) <
$eps * (abs($p) * (abs($this->H[$m-1][$m-1]) + abs($z) + abs($this->H[$m+1][$m+1])))) {
break;
}
--$m;
}
for ($i = $m + 2; $i <= $n; ++$i) {
$this->H[$i][$i-2] = 0.0;
if ($i > $m+2) {
$this->H[$i][$i-3] = 0.0;
}
}
// Double QR step involving rows l:n and columns m:n
for ($k = $m; $k <= $n-1; ++$k) {
$notlast = ($k != $n-1);
if ($k != $m) {
$p = $this->H[$k][$k-1];
$q = $this->H[$k+1][$k-1];
$r = ($notlast ? $this->H[$k+2][$k-1] : 0.0);
$x = abs($p) + abs($q) + abs($r);
if ($x != 0.0) {
$p = $p / $x;
$q = $q / $x;
$r = $r / $x;
}
}
if ($x == 0.0) {
break;
}
$s = sqrt($p * $p + $q * $q + $r * $r);
if ($p < 0) {
$s = -$s;
}
if ($s != 0) {
if ($k != $m) {
$this->H[$k][$k-1] = -$s * $x;
} elseif ($l != $m) {
$this->H[$k][$k-1] = -$this->H[$k][$k-1];
}
$p = $p + $s;
$x = $p / $s;
$y = $q / $s;
$z = $r / $s;
$q = $q / $p;
$r = $r / $p;
// Row modification
for ($j = $k; $j < $nn; ++$j) {
$p = $this->H[$k][$j] + $q * $this->H[$k+1][$j];
if ($notlast) {
$p = $p + $r * $this->H[$k+2][$j];
$this->H[$k+2][$j] = $this->H[$k+2][$j] - $p * $z;
}
$this->H[$k][$j] = $this->H[$k][$j] - $p * $x;
$this->H[$k+1][$j] = $this->H[$k+1][$j] - $p * $y;
}
// Column modification
for ($i = 0; $i <= min($n, $k+3); ++$i) {
$p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k+1];
if ($notlast) {
$p = $p + $z * $this->H[$i][$k+2];
$this->H[$i][$k+2] = $this->H[$i][$k+2] - $p * $r;
}
$this->H[$i][$k] = $this->H[$i][$k] - $p;
$this->H[$i][$k+1] = $this->H[$i][$k+1] - $p * $q;
}
// Accumulate transformations
for ($i = $low; $i <= $high; ++$i) {
$p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k+1];
if ($notlast) {
$p = $p + $z * $this->V[$i][$k+2];
$this->V[$i][$k+2] = $this->V[$i][$k+2] - $p * $r;
}
$this->V[$i][$k] = $this->V[$i][$k] - $p;
$this->V[$i][$k+1] = $this->V[$i][$k+1] - $p * $q;
}
} // ($s != 0)
} // k loop
} // check convergence
} // while ($n >= $low)
// Backsubstitute to find vectors of upper triangular form
if ($norm == 0.0) {
return;
}
for ($n = $nn-1; $n >= 0; --$n) {
$p = $this->d[$n];
$q = $this->e[$n];
// Real vector
if ($q == 0) {
$l = $n;
$this->H[$n][$n] = 1.0;
for ($i = $n-1; $i >= 0; --$i) {
$w = $this->H[$i][$i] - $p;
$r = 0.0;
for ($j = $l; $j <= $n; ++$j) {
$r = $r + $this->H[$i][$j] * $this->H[$j][$n];
}
if ($this->e[$i] < 0.0) {
$z = $w;
$s = $r;
} else {
$l = $i;
if ($this->e[$i] == 0.0) {
if ($w != 0.0) {
$this->H[$i][$n] = -$r / $w;
} else {
$this->H[$i][$n] = -$r / ($eps * $norm);
}
// Solve real equations
} else {
$x = $this->H[$i][$i+1];
$y = $this->H[$i+1][$i];
$q = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i];
$t = ($x * $s - $z * $r) / $q;
$this->H[$i][$n] = $t;
if (abs($x) > abs($z)) {
$this->H[$i+1][$n] = (-$r - $w * $t) / $x;
} else {
$this->H[$i+1][$n] = (-$s - $y * $t) / $z;
}
}
// Overflow control
$t = abs($this->H[$i][$n]);
if (($eps * $t) * $t > 1) {
for ($j = $i; $j <= $n; ++$j) {
$this->H[$j][$n] = $this->H[$j][$n] / $t;
}
}
}
}
// Complex vector
} elseif ($q < 0) {
$l = $n-1;
// Last vector component imaginary so matrix is triangular
if (abs($this->H[$n][$n-1]) > abs($this->H[$n-1][$n])) {
$this->H[$n-1][$n-1] = $q / $this->H[$n][$n-1];
$this->H[$n-1][$n] = -($this->H[$n][$n] - $p) / $this->H[$n][$n-1];
} else {
$this->cdiv(0.0, -$this->H[$n-1][$n], $this->H[$n-1][$n-1] - $p, $q);
$this->H[$n-1][$n-1] = $this->cdivr;
$this->H[$n-1][$n] = $this->cdivi;
}
$this->H[$n][$n-1] = 0.0;
$this->H[$n][$n] = 1.0;
for ($i = $n-2; $i >= 0; --$i) {
// double ra,sa,vr,vi;
$ra = 0.0;
$sa = 0.0;
for ($j = $l; $j <= $n; ++$j) {
$ra = $ra + $this->H[$i][$j] * $this->H[$j][$n-1];
$sa = $sa + $this->H[$i][$j] * $this->H[$j][$n];
}
$w = $this->H[$i][$i] - $p;
if ($this->e[$i] < 0.0) {
$z = $w;
$r = $ra;
$s = $sa;
} else {
$l = $i;
if ($this->e[$i] == 0) {
$this->cdiv(-$ra, -$sa, $w, $q);
$this->H[$i][$n-1] = $this->cdivr;
$this->H[$i][$n] = $this->cdivi;
} else {
// Solve complex equations
$x = $this->H[$i][$i+1];
$y = $this->H[$i+1][$i];
$vr = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i] - $q * $q;
$vi = ($this->d[$i] - $p) * 2.0 * $q;
if ($vr == 0.0 & $vi == 0.0) {
$vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z));
}
$this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi);
$this->H[$i][$n-1] = $this->cdivr;
$this->H[$i][$n] = $this->cdivi;
if (abs($x) > (abs($z) + abs($q))) {
$this->H[$i+1][$n-1] = (-$ra - $w * $this->H[$i][$n-1] + $q * $this->H[$i][$n]) / $x;
$this->H[$i+1][$n] = (-$sa - $w * $this->H[$i][$n] - $q * $this->H[$i][$n-1]) / $x;
} else {
$this->cdiv(-$r - $y * $this->H[$i][$n-1], -$s - $y * $this->H[$i][$n], $z, $q);
$this->H[$i+1][$n-1] = $this->cdivr;
$this->H[$i+1][$n] = $this->cdivi;
}
}
// Overflow control
$t = max(abs($this->H[$i][$n-1]), abs($this->H[$i][$n]));
if (($eps * $t) * $t > 1) {
for ($j = $i; $j <= $n; ++$j) {
$this->H[$j][$n-1] = $this->H[$j][$n-1] / $t;
$this->H[$j][$n] = $this->H[$j][$n] / $t;
}
}
} // end else
} // end for
} // end else for complex case
} // end for
// Vectors of isolated roots
for ($i = 0; $i < $nn; ++$i) {
if ($i < $low | $i > $high) {
for ($j = $i; $j < $nn; ++$j) {
$this->V[$i][$j] = $this->H[$i][$j];
}
}
}
// Back transformation to get eigenvectors of original matrix
for ($j = $nn-1; $j >= $low; --$j) {
for ($i = $low; $i <= $high; ++$i) {
$z = 0.0;
for ($k = $low; $k <= min($j, $high); ++$k) {
$z = $z + $this->V[$i][$k] * $this->H[$k][$j];
}
$this->V[$i][$j] = $z;
}
}
} // end hqr2
/**
* Constructor: Check for symmetry, then construct the eigenvalue decomposition
*
* @param array $Arg
*/
public function __construct(array $Arg)
{
$this->A = $Arg;
$this->n = count($Arg[0]);
$issymmetric = true;
for ($j = 0; ($j < $this->n) & $issymmetric; ++$j) {
for ($i = 0; ($i < $this->n) & $issymmetric; ++$i) {
$issymmetric = ($this->A[$i][$j] == $this->A[$j][$i]);
}
}
if ($issymmetric) {
$this->V = $this->A;
// Tridiagonalize.
$this->tred2();
// Diagonalize.
$this->tql2();
} else {
$this->H = $this->A;
$this->ort = [];
// Reduce to Hessenberg form.
$this->orthes();
// Reduce Hessenberg to real Schur form.
$this->hqr2();
}
}
/**
* Return the eigenvector matrix
*
* @access public
* @return array
*/
public function getEigenvectors()
{
$vectors = $this->V;
// Always return the eigenvectors of length 1.0
$vectors = new Matrix($vectors);
$vectors = array_map(function ($vect) {
$sum = 0;
for ($i=0; $i<count($vect); $i++) {
$sum += $vect[$i] ** 2;
}
$sum = sqrt($sum);
for ($i=0; $i<count($vect); $i++) {
$vect[$i] /= $sum;
}
return $vect;
}, $vectors->transpose()->toArray());
return $vectors;
}
/**
* Return the real parts of the eigenvalues<br>
* d = real(diag(D));
*
* @return array
*/
public function getRealEigenvalues()
{
return $this->d;
}
/**
* Return the imaginary parts of the eigenvalues <br>
* d = imag(diag(D))
*
* @return array
*/
public function getImagEigenvalues()
{
return $this->e;
}
/**
* Return the block diagonal eigenvalue matrix
*
* @return array
*/
public function getDiagonalEigenvalues()
{
for ($i = 0; $i < $this->n; ++$i) {
$D[$i] = array_fill(0, $this->n, 0.0);
$D[$i][$i] = $this->d[$i];
if ($this->e[$i] == 0) {
continue;
}
$o = ($this->e[$i] > 0) ? $i + 1 : $i - 1;
$D[$i][$o] = $this->e[$i];
}
return $D;
}
} // class EigenvalueDecomposition

View File

@ -37,8 +37,15 @@ class Matrix
*/
public function __construct(array $matrix, bool $validate = true)
{
$this->rows = count($matrix);
$this->columns = count($matrix[0]);
// When a row vector is given
if (!is_array($matrix[0])) {
$this->rows = 1;
$this->columns = count($matrix);
$matrix = [$matrix];
} else {
$this->rows = count($matrix);
$this->columns = count($matrix[0]);
}
if ($validate) {
for ($i = 0; $i < $this->rows; ++$i) {
@ -74,6 +81,14 @@ class Matrix
return $this->matrix;
}
/**
* @return float
*/
public function toScalar()
{
return $this->matrix[0][0];
}
/**
* @return int
*/
@ -103,14 +118,10 @@ class Matrix
throw MatrixException::columnOutOfRange();
}
$values = [];
for ($i = 0; $i < $this->rows; ++$i) {
$values[] = $this->matrix[$i][$column];
}
return $values;
return array_column($this->matrix, $column);
}
/**
* @return float|int
*
@ -167,14 +178,15 @@ class Matrix
*/
public function transpose()
{
$newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$newMatrix[$j][$i] = $this->matrix[$i][$j];
}
if ($this->rows == 1) {
$matrix = array_map(function ($el) {
return [$el];
}, $this->matrix[0]);
} else {
$matrix = array_map(null, ...$this->matrix);
}
return new self($newMatrix, false);
return new self($matrix, false);
}
/**
@ -222,6 +234,64 @@ class Matrix
return new self($newMatrix, false);
}
/**
* @param $value
*
* @return Matrix
*/
public function multiplyByScalar($value)
{
$newMatrix = [];
for ($i = 0; $i < $this->rows; ++$i) {
for ($j = 0; $j < $this->columns; ++$j) {
$newMatrix[$i][$j] = $this->matrix[$i][$j] * $value;
}
}
return new self($newMatrix, false);
}
/**
* Element-wise addition of the matrix with another one
*
* @param Matrix $other
*/
public function add(Matrix $other)
{
return $this->_add($other);
}
/**
* Element-wise subtracting of another matrix from this one
*
* @param Matrix $other
*/
public function subtract(Matrix $other)
{
return $this->_add($other, -1);
}
/**
* Element-wise addition or substraction depending on the given sign parameter
*
* @param Matrix $other
* @param type $sign
*/
protected function _add(Matrix $other, $sign = 1)
{
$a1 = $this->toArray();
$a2 = $other->toArray();
$newMatrix = [];
for ($i=0; $i < $this->rows; $i++) {
for ($k=0; $k < $this->columns; $k++) {
$newMatrix[$i][$k] = $a1[$i][$k] + $sign * $a2[$i][$k];
}
}
return new Matrix($newMatrix, false);
}
/**
* @return Matrix
*
@ -283,4 +353,33 @@ class Matrix
{
return 0 == $this->getDeterminant();
}
/**
* Returns the transpose of given array
*
* @param array $array
*
* @return array
*/
public static function transposeArray(array $array)
{
return (new Matrix($array, false))->transpose()->toArray();
}
/**
* Returns the dot product of two arrays<br>
* Matrix::dot(x, y) ==> x.y'
*
* @param array $array1
* @param array $array2
*
* @return array
*/
public static function dot(array $array1, array $array2)
{
$m1 = new Matrix($array1, false);
$m2 = new Matrix($array2, false);
return $m1->multiply($m2->transpose())->toArray()[0];
}
}

View File

@ -0,0 +1,155 @@
<?php
declare(strict_types=1);
namespace Phpml\Math\Statistic;
use Phpml\Exception\InvalidArgumentException;
class Covariance
{
/**
* Calculates covariance from two given arrays, x and y, respectively
*
* @param array $x
* @param array $y
* @param bool $sample
* @param float $meanX
* @param float $meanY
*
* @return float
*
* @throws InvalidArgumentException
*/
public static function fromXYArrays(array $x, array $y, $sample = true, float $meanX = null, float $meanY = null)
{
if (empty($x) || empty($y)) {
throw InvalidArgumentException::arrayCantBeEmpty();
}
$n = count($x);
if ($sample && $n === 1) {
throw InvalidArgumentException::arraySizeToSmall(2);
}
if ($meanX === null) {
$meanX = Mean::arithmetic($x);
}
if ($meanY === null) {
$meanY = Mean::arithmetic($y);
}
$sum = 0.0;
foreach ($x as $index => $xi) {
$yi = $y[$index];
$sum += ($xi - $meanX) * ($yi - $meanY);
}
if ($sample) {
--$n;
}
return $sum / $n;
}
/**
* Calculates covariance of two dimensions, i and k in the given data.
*
* @param array $data
* @param int $i
* @param int $k
* @param type $sample
* @param int $n
* @param float $meanX
* @param float $meanY
*/
public static function fromDataset(array $data, int $i, int $k, $sample = true, float $meanX = null, float $meanY = null)
{
if (empty($data)) {
throw InvalidArgumentException::arrayCantBeEmpty();
}
$n = count($data);
if ($sample && $n === 1) {
throw InvalidArgumentException::arraySizeToSmall(2);
}
if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) {
throw new \Exception("Given indices i and k do not match with the dimensionality of data");
}
if ($meanX === null || $meanY === null) {
$x = array_column($data, $i);
$y = array_column($data, $k);
$meanX = Mean::arithmetic($x);
$meanY = Mean::arithmetic($y);
$sum = 0.0;
foreach ($x as $index => $xi) {
$yi = $y[$index];
$sum += ($xi - $meanX) * ($yi - $meanY);
}
} else {
// In the case, whole dataset given along with dimension indices, i and k,
// we would like to avoid getting column data with array_column and operate
// over this extra copy of column data for memory efficiency purposes.
//
// Instead we traverse through the whole data and get what we actually need
// without copying the data. This way, memory use will be reduced
// with a slight cost of CPU utilization.
$sum = 0.0;
foreach ($data as $row) {
$val = [];
foreach ($row as $index => $col) {
if ($index == $i) {
$val[0] = $col - $meanX;
}
if ($index == $k) {
$val[1] = $col - $meanY;
}
}
$sum += $val[0] * $val[1];
}
}
if ($sample) {
--$n;
}
return $sum / $n;
}
/**
* Returns the covariance matrix of n-dimensional data
*
* @param array $data
*
* @return array
*/
public static function covarianceMatrix(array $data, array $means = null)
{
$n = count($data[0]);
if ($means === null) {
$means = [];
for ($i=0; $i < $n; $i++) {
$means[] = Mean::arithmetic(array_column($data, $i));
}
}
$cov = [];
for ($i=0; $i < $n; $i++) {
for ($k=0; $k < $n; $k++) {
if ($i > $k) {
$cov[$i][$k] = $cov[$k][$i];
} else {
$cov[$i][$k] = Covariance::fromDataset(
$data, $i, $k, true, $means[$i], $means[$k]);
}
}
}
return $cov;
}
}

View File

@ -0,0 +1,51 @@
<?php
declare(strict_types=1);
namespace tests\DimensionReduction;
use Phpml\DimensionReduction\KernelPCA;
use PHPUnit\Framework\TestCase;
class KernelPCATest extends TestCase
{
public function testKernelPCA()
{
// Acceptable error
$epsilon = 0.001;
// A simple example whose result is known beforehand
$data = [
[2,2], [1.5,1], [1.,1.5], [1.,1.],
[2.,1.],[2,2.5], [2.,3.], [1.5,3],
[1.,2.5], [1.,2.7], [1.,3.], [1,3],
[1,2], [1.5,2], [1.5,2.2], [1.3,1.7],
[1.7,1.3], [1.5,1.5], [1.5,1.6], [1.6,2],
[1.7,2.1], [1.3,1.3], [1.3,2.2], [1.4,2.4]
];
$transformed = [
[0.016485613899708], [-0.089805657741674], [-0.088695974245924], [-0.069761503810802],
[-0.068049558133392], [-0.054702087779187], [-0.063229228729333], [-0.06852813588679],
[-0.10098315410297], [-0.15617881000654], [-0.21266832077299], [-0.21266832077299],
[-0.039234518840831], [0.40858295942991], [0.40110375047242], [-0.10555116296691],
[-0.13128352866095], [-0.20865959471756], [-0.17531601535848], [0.4240660966961],
[0.36351946685163], [-0.14334173054136], [0.22454914091011], [0.15035027480881]];
$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 1, 15);
$reducedData = $kpca->fit($data);
// Due to the fact that the sign of values can be flipped
// during the calculation of eigenValues, we have to compare
// absolute value of the values
array_map(function ($val1, $val2) use ($epsilon) {
$this->assertEquals(abs($val1), abs($val2), '', $epsilon);
}, $transformed, $reducedData);
// Fitted KernelPCA object can also transform an arbitrary sample of the
// same dimensionality with the original dataset
$newData = [1.25, 2.25];
$newTransformed = [0.18956227539216];
$newTransformed2 = $kpca->transform($newData);
$this->assertEquals(abs($newTransformed[0]), abs($newTransformed2[0]), '', $epsilon);
}
}

View File

@ -0,0 +1,57 @@
<?php
declare(strict_types=1);
namespace tests\DimensionReduction;
use Phpml\DimensionReduction\PCA;
use PHPUnit\Framework\TestCase;
class PCATest extends TestCase
{
public function testPCA()
{
// Acceptable error
$epsilon = 0.001;
// First a simple example whose result is known and given in
// http://www.cs.otago.ac.nz/cosc453/student_tutorials/principal_components.pdf
$data = [
[2.5, 2.4],
[0.5, 0.7],
[2.2, 2.9],
[1.9, 2.2],
[3.1, 3.0],
[2.3, 2.7],
[2.0, 1.6],
[1.0, 1.1],
[1.5, 1.6],
[1.1, 0.9]
];
$transformed = [
[-0.827970186], [1.77758033], [-0.992197494],
[-0.274210416], [-1.67580142], [-0.912949103], [0.0991094375],
[1.14457216], [0.438046137], [1.22382056]];
$pca = new PCA(0.90);
$reducedData = $pca->fit($data);
// Due to the fact that the sign of values can be flipped
// during the calculation of eigenValues, we have to compare
// absolute value of the values
array_map(function ($val1, $val2) use ($epsilon) {
$this->assertEquals(abs($val1), abs($val2), '', $epsilon);
}, $transformed, $reducedData);
// Test fitted PCA object to transform an arbitrary sample of the
// same dimensionality with the original dataset
foreach ($data as $i => $row) {
$newRow = [[$transformed[$i]]];
$newRow2= $pca->transform($row);
array_map(function ($val1, $val2) use ($epsilon) {
$this->assertEquals(abs($val1), abs($val2), '', $epsilon);
}, $newRow, $newRow2);
}
}
}

View File

@ -0,0 +1,65 @@
<?php
declare(strict_types=1);
namespace tests\Math\LinearAlgebra;
use Phpml\Math\LinearAlgebra\EigenvalueDecomposition;
use Phpml\Math\Matrix;
use PHPUnit\Framework\TestCase;
class EigenDecompositionTest extends TestCase
{
public function testSymmetricMatrixEigenPairs()
{
// Acceptable error
$epsilon = 0.001;
// First a simple example whose result is known and given in
// http://www.cs.otago.ac.nz/cosc453/student_tutorials/principal_components.pdf
$matrix = [
[0.616555556, 0.615444444],
[0.614444444, 0.716555556]
];
$knownEigvalues = [0.0490833989, 1.28402771];
$knownEigvectors= [[-0.735178656, 0.677873399], [-0.677873399, -0.735178656]];
$decomp = new EigenvalueDecomposition($matrix);
$eigVectors = $decomp->getEigenvectors();
$eigValues = $decomp->getRealEigenvalues();
$this->assertEquals($knownEigvalues, $eigValues, '', $epsilon);
$this->assertEquals($knownEigvectors, $eigVectors, '', $epsilon);
// Secondly, generate a symmetric square matrix
// and test for A.v=λ.v
//
// (We, for now, omit non-symmetric matrices whose eigenvalues can be complex numbers)
$len = 3;
$A = array_fill(0, $len, array_fill(0, $len, 0.0));
srand(intval(microtime(true) * 1000));
for ($i=0; $i < $len; $i++) {
for ($k=0; $k < $len; $k++) {
if ($i > $k) {
$A[$i][$k] = $A[$k][$i];
} else {
$A[$i][$k] = rand(0, 10);
}
}
}
$decomp = new EigenvalueDecomposition($A);
$eigValues = $decomp->getRealEigenvalues();
$eigVectors= $decomp->getEigenvectors();
foreach ($eigValues as $index => $lambda) {
$m1 = new Matrix($A);
$m2 = (new Matrix($eigVectors[$index]))->transpose();
// A.v=λ.v
$leftSide = $m1->multiply($m2)->toArray();
$rightSide= $m2->multiplyByScalar($lambda)->toArray();
$this->assertEquals($leftSide, $rightSide, '', $epsilon);
}
}
}

View File

@ -188,4 +188,80 @@ class MatrixTest extends TestCase
$this->assertEquals($crossOuted, $matrix->crossOut(1, 1)->toArray());
}
public function testToScalar()
{
$matrix = new Matrix([[1, 2, 3], [3, 2, 3]]);
$this->assertEquals($matrix->toScalar(), 1);
}
public function testMultiplyByScalar()
{
$matrix = new Matrix([
[4, 6, 8],
[2, 10, 20],
]);
$result = [
[-8, -12, -16],
[-4, -20, -40],
];
$this->assertEquals($result, $matrix->multiplyByScalar(-2)->toArray());
}
public function testAdd()
{
$array1 = [1, 1, 1];
$array2 = [2, 2, 2];
$result = [3, 3, 3];
$m1 = new Matrix($array1);
$m2 = new Matrix($array2);
$this->assertEquals($result, $m1->add($m2)->toArray()[0]);
}
public function testSubtract()
{
$array1 = [1, 1, 1];
$array2 = [2, 2, 2];
$result = [-1, -1, -1];
$m1 = new Matrix($array1);
$m2 = new Matrix($array2);
$this->assertEquals($result, $m1->subtract($m2)->toArray()[0]);
}
public function testTransposeArray()
{
$array = [
[1, 1, 1],
[2, 2, 2]
];
$transposed = [
[1, 2],
[1, 2],
[1, 2]
];
$this->assertEquals($transposed, Matrix::transposeArray($array));
}
public function testDot()
{
$vect1 = [2, 2, 2];
$vect2 = [3, 3, 3];
$dot = [18];
$this->assertEquals($dot, Matrix::dot($vect1, $vect2));
$matrix1 = [[1, 1], [2, 2]];
$matrix2 = [[3, 3], [3, 3], [3, 3]];
$dot = [6, 12];
$this->assertEquals($dot, Matrix::dot($matrix2, $matrix1));
}
}

View File

@ -0,0 +1,62 @@
<?php
declare(strict_types=1);
namespace tests\Math\Statistic;
use Phpml\Math\Statistic\Covariance;
use Phpml\Math\Statistic\Mean;
use PHPUnit\Framework\TestCase;
class CovarianceTest extends TestCase
{
public function testSimpleCovariance()
{
// Acceptable error
$epsilon = 0.001;
// First a simple example whose result is known and given in
// http://www.cs.otago.ac.nz/cosc453/student_tutorials/principal_components.pdf
$matrix = [
[0.69, 0.49],
[-1.31, -1.21],
[0.39, 0.99],
[0.09, 0.29],
[1.29, 1.09],
[0.49, 0.79],
[0.19, -0.31],
[-0.81, -0.81],
[-0.31, -0.31],
[-0.71, -1.01],
];
$knownCovariance = [
[0.616555556, 0.615444444],
[0.615444444, 0.716555556]];
$x = array_column($matrix, 0);
$y = array_column($matrix, 1);
// Calculate only one covariance value: Cov(x, y)
$cov1 = Covariance::fromDataset($matrix, 0, 0);
$this->assertEquals($cov1, $knownCovariance[0][0], '', $epsilon);
$cov1 = Covariance::fromXYArrays($x, $x);
$this->assertEquals($cov1, $knownCovariance[0][0], '', $epsilon);
$cov2 = Covariance::fromDataset($matrix, 0, 1);
$this->assertEquals($cov2, $knownCovariance[0][1], '', $epsilon);
$cov2 = Covariance::fromXYArrays($x, $y);
$this->assertEquals($cov2, $knownCovariance[0][1], '', $epsilon);
// Second: calculation cov matrix with automatic means for each column
$covariance = Covariance::covarianceMatrix($matrix);
$this->assertEquals($knownCovariance, $covariance, '', $epsilon);
// Thirdly, CovMatrix: Means are precalculated and given to the method
$x = array_column($matrix, 0);
$y = array_column($matrix, 1);
$meanX = Mean::arithmetic($x);
$meanY = Mean::arithmetic($y);
$covariance = Covariance::covarianceMatrix($matrix, [$meanX, $meanY]);
$this->assertEquals($knownCovariance, $covariance, '', $epsilon);
}
}