* * @param float $totalVariance Total explained variance to be preserved * @param int $numFeatures Number of features to be preserved * * @throws InvalidArgumentException */ public function __construct(?float $totalVariance = null, ?int $numFeatures = null) { if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99'); } if ($numFeatures !== null && $numFeatures <= 0) { throw new InvalidArgumentException('Number of features to be preserved should be greater than 0'); } if (($totalVariance !== null) === ($numFeatures !== null)) { throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm'); } if ($numFeatures !== null) { $this->numFeatures = $numFeatures; } if ($totalVariance !== null) { $this->totalVariance = $totalVariance; } } /** * Takes a data and returns a lower dimensional version * of this data while preserving $totalVariance or $numFeatures.
* $data is an n-by-m matrix and returned array is * n-by-k matrix where k <= m */ public function fit(array $data): array { $n = count($data[0]); $data = $this->normalize($data, $n); $covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0)); $this->eigenDecomposition($covMatrix); $this->fit = true; return $this->reduce($data); } /** * Transforms the given sample to a lower dimensional vector by using * the eigenVectors obtained in the last run of fit. * * @throws InvalidOperationException */ public function transform(array $sample): array { if (!$this->fit) { throw new InvalidOperationException('PCA has not been fitted with respect to original dataset, please run PCA::fit() first'); } if (!is_array($sample[0])) { $sample = [$sample]; } $sample = $this->normalize($sample, count($sample[0])); return $this->reduce($sample); } protected function calculateMeans(array $data, int $n): void { // Calculate means for each dimension $this->means = []; for ($i = 0; $i < $n; ++$i) { $column = array_column($data, $i); $this->means[] = Mean::arithmetic($column); } } /** * Normalization of the data includes subtracting mean from * each dimension therefore dimensions will be centered to zero */ protected function normalize(array $data, int $n): array { if (count($this->means) === 0) { $this->calculateMeans($data, $n); } // Normalize data foreach (array_keys($data) as $i) { for ($k = 0; $k < $n; ++$k) { $data[$i][$k] -= $this->means[$k]; } } return $data; } }