2017-04-23 07:03:30 +00:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
declare(strict_types=1);
|
|
|
|
|
|
|
|
|
|
namespace Phpml\DimensionReduction;
|
|
|
|
|
|
|
|
|
|
use Phpml\Math\Distance\Euclidean;
|
|
|
|
|
use Phpml\Math\Distance\Manhattan;
|
|
|
|
|
use Phpml\Math\Matrix;
|
|
|
|
|
|
|
|
|
|
class KernelPCA extends PCA
|
|
|
|
|
{
|
|
|
|
|
const KERNEL_RBF = 1;
|
|
|
|
|
const KERNEL_SIGMOID = 2;
|
|
|
|
|
const KERNEL_LAPLACIAN = 3;
|
|
|
|
|
const KERNEL_LINEAR = 4;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Selected kernel function
|
|
|
|
|
*
|
|
|
|
|
* @var int
|
|
|
|
|
*/
|
|
|
|
|
protected $kernel;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Gamma value used by the kernel
|
|
|
|
|
*
|
|
|
|
|
* @var float
|
|
|
|
|
*/
|
|
|
|
|
protected $gamma;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Original dataset used to fit KernelPCA
|
|
|
|
|
*
|
|
|
|
|
* @var array
|
|
|
|
|
*/
|
|
|
|
|
protected $data;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Kernel principal component analysis (KernelPCA) is an extension of PCA using
|
|
|
|
|
* techniques of kernel methods. It is more suitable for data that involves
|
|
|
|
|
* vectors that are not linearly separable<br><br>
|
|
|
|
|
* Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
|
|
|
|
|
* will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
|
|
|
|
|
* This transformation will return the same number of rows with only <i>2</i> columns.
|
|
|
|
|
*
|
|
|
|
|
* @param float $totalVariance Total variance to be preserved if numFeatures is not given
|
2017-08-17 06:50:37 +00:00
|
|
|
|
* @param int $numFeatures Number of columns to be returned
|
|
|
|
|
* @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels
|
2017-04-23 07:03:30 +00:00
|
|
|
|
*
|
|
|
|
|
* @throws \Exception
|
|
|
|
|
*/
|
2017-11-06 07:56:37 +00:00
|
|
|
|
public function __construct(int $kernel = self::KERNEL_RBF, float $totalVariance = null, int $numFeatures = null, float $gamma = null)
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
|
|
|
|
$availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
|
2017-05-17 07:03:25 +00:00
|
|
|
|
if (!in_array($kernel, $availableKernels)) {
|
2017-08-17 06:50:37 +00:00
|
|
|
|
throw new \Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
|
2017-04-23 07:03:30 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
parent::__construct($totalVariance, $numFeatures);
|
|
|
|
|
|
|
|
|
|
$this->kernel = $kernel;
|
|
|
|
|
$this->gamma = $gamma;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Takes a data and returns a lower dimensional version
|
|
|
|
|
* of this data while preserving $totalVariance or $numFeatures. <br>
|
|
|
|
|
* $data is an n-by-m matrix and returned array is
|
|
|
|
|
* n-by-k matrix where k <= m
|
|
|
|
|
*/
|
2017-11-06 07:56:37 +00:00
|
|
|
|
public function fit(array $data) : array
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
|
|
|
|
$numRows = count($data);
|
|
|
|
|
$this->data = $data;
|
|
|
|
|
|
|
|
|
|
if ($this->gamma === null) {
|
|
|
|
|
$this->gamma = 1.0 / $numRows;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$matrix = $this->calculateKernelMatrix($this->data, $numRows);
|
|
|
|
|
$matrix = $this->centerMatrix($matrix, $numRows);
|
|
|
|
|
|
2017-05-17 07:03:25 +00:00
|
|
|
|
$this->eigenDecomposition($matrix);
|
2017-04-23 07:03:30 +00:00
|
|
|
|
|
|
|
|
|
$this->fit = true;
|
|
|
|
|
|
|
|
|
|
return Matrix::transposeArray($this->eigVectors);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Calculates similarity matrix by use of selected kernel function<br>
|
|
|
|
|
* An n-by-m matrix is given and an n-by-n matrix is returned
|
|
|
|
|
*/
|
2017-11-06 07:56:37 +00:00
|
|
|
|
protected function calculateKernelMatrix(array $data, int $numRows) : array
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
|
|
|
|
$kernelFunc = $this->getKernel();
|
|
|
|
|
|
|
|
|
|
$matrix = [];
|
2017-05-17 07:03:25 +00:00
|
|
|
|
for ($i = 0; $i < $numRows; ++$i) {
|
|
|
|
|
for ($k = 0; $k < $numRows; ++$k) {
|
2017-04-23 07:03:30 +00:00
|
|
|
|
if ($i <= $k) {
|
|
|
|
|
$matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
|
|
|
|
|
} else {
|
|
|
|
|
$matrix[$i][$k] = $matrix[$k][$i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $matrix;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Kernel matrix is centered in its original space by using the following
|
|
|
|
|
* conversion:
|
|
|
|
|
*
|
|
|
|
|
* K′ = K − N.K − K.N + N.K.N where N is n-by-n matrix filled with 1/n
|
|
|
|
|
*/
|
2017-11-06 07:56:37 +00:00
|
|
|
|
protected function centerMatrix(array $matrix, int $n) : array
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
2017-08-17 06:50:37 +00:00
|
|
|
|
$N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
|
2017-04-23 07:03:30 +00:00
|
|
|
|
$N = new Matrix($N, false);
|
|
|
|
|
$K = new Matrix($matrix, false);
|
|
|
|
|
|
|
|
|
|
// K.N (This term is repeated so we cache it once)
|
|
|
|
|
$K_N = $K->multiply($N);
|
|
|
|
|
// N.K
|
|
|
|
|
$N_K = $N->multiply($K);
|
|
|
|
|
// N.K.N
|
|
|
|
|
$N_K_N = $N->multiply($K_N);
|
|
|
|
|
|
|
|
|
|
return $K->subtract($N_K)
|
|
|
|
|
->subtract($K_N)
|
|
|
|
|
->add($N_K_N)
|
|
|
|
|
->toArray();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the callable kernel function
|
|
|
|
|
*
|
2017-05-17 07:03:25 +00:00
|
|
|
|
* @throws \Exception
|
2017-04-23 07:03:30 +00:00
|
|
|
|
*/
|
2017-11-06 07:56:37 +00:00
|
|
|
|
protected function getKernel(): \Closure
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
|
|
|
|
switch ($this->kernel) {
|
|
|
|
|
case self::KERNEL_LINEAR:
|
|
|
|
|
// k(x,y) = xT.y
|
|
|
|
|
return function ($x, $y) {
|
|
|
|
|
return Matrix::dot($x, $y)[0];
|
|
|
|
|
};
|
|
|
|
|
case self::KERNEL_RBF:
|
|
|
|
|
// k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
|
|
|
|
|
$dist = new Euclidean();
|
2017-08-17 06:50:37 +00:00
|
|
|
|
|
2017-04-23 07:03:30 +00:00
|
|
|
|
return function ($x, $y) use ($dist) {
|
|
|
|
|
return exp(-$this->gamma * $dist->sqDistance($x, $y));
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
case self::KERNEL_SIGMOID:
|
|
|
|
|
// k(x,y)=tanh(γ.xT.y+c0) where c0=1
|
|
|
|
|
return function ($x, $y) {
|
|
|
|
|
$res = Matrix::dot($x, $y)[0] + 1.0;
|
2017-08-17 06:50:37 +00:00
|
|
|
|
|
2017-04-23 07:03:30 +00:00
|
|
|
|
return tanh($this->gamma * $res);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
case self::KERNEL_LAPLACIAN:
|
|
|
|
|
// k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
|
|
|
|
|
$dist = new Manhattan();
|
2017-08-17 06:50:37 +00:00
|
|
|
|
|
2017-04-23 07:03:30 +00:00
|
|
|
|
return function ($x, $y) use ($dist) {
|
|
|
|
|
return exp(-$this->gamma * $dist->distance($x, $y));
|
|
|
|
|
};
|
2017-05-17 07:03:25 +00:00
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
throw new \Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
|
2017-04-23 07:03:30 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
|
protected function getDistancePairs(array $sample) : array
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
|
|
|
|
$kernel = $this->getKernel();
|
|
|
|
|
|
|
|
|
|
$pairs = [];
|
|
|
|
|
foreach ($this->data as $row) {
|
|
|
|
|
$pairs[] = $kernel($row, $sample);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $pairs;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
|
protected function projectSample(array $pairs) : array
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
|
|
|
|
// Normalize eigenvectors by eig = eigVectors / eigValues
|
|
|
|
|
$func = function ($eigVal, $eigVect) {
|
|
|
|
|
$m = new Matrix($eigVect, false);
|
|
|
|
|
$a = $m->divideByScalar($eigVal)->toArray();
|
|
|
|
|
|
|
|
|
|
return $a[0];
|
|
|
|
|
};
|
|
|
|
|
$eig = array_map($func, $this->eigValues, $this->eigVectors);
|
|
|
|
|
|
|
|
|
|
// return k.dot(eig)
|
|
|
|
|
return Matrix::dot($pairs, $eig);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Transforms the given sample to a lower dimensional vector by using
|
|
|
|
|
* the variables obtained during the last run of <code>fit</code>.
|
|
|
|
|
*
|
2017-05-17 07:03:25 +00:00
|
|
|
|
* @throws \Exception
|
2017-04-23 07:03:30 +00:00
|
|
|
|
*/
|
2017-11-06 07:56:37 +00:00
|
|
|
|
public function transform(array $sample) : array
|
2017-04-23 07:03:30 +00:00
|
|
|
|
{
|
|
|
|
|
if (!$this->fit) {
|
2017-08-17 06:50:37 +00:00
|
|
|
|
throw new \Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
|
2017-04-23 07:03:30 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (is_array($sample[0])) {
|
2017-08-17 06:50:37 +00:00
|
|
|
|
throw new \Exception('KernelPCA::transform() accepts only one-dimensional arrays');
|
2017-04-23 07:03:30 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$pairs = $this->getDistancePairs($sample);
|
|
|
|
|
|
|
|
|
|
return $this->projectSample($pairs);
|
|
|
|
|
}
|
|
|
|
|
}
|