From 60c796f5d9e04b0fd22ebb4c41382e8965ed9153 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Sat, 30 Apr 2016 00:58:54 +0200 Subject: [PATCH] create matrix calculation for ls regression for multiple variable --- .../Exception/InvalidArgumentException.php | 17 ++ src/Phpml/Exception/MatrixException.php | 25 ++ src/Phpml/Math/Matrix.php | 253 ++++++++++++++++++ src/Phpml/Regression/LeastSquares.php | 73 ++--- tests/Phpml/Regression/LeastSquaresTest.php | 16 +- 5 files changed, 324 insertions(+), 60 deletions(-) create mode 100644 src/Phpml/Exception/MatrixException.php create mode 100644 src/Phpml/Math/Matrix.php diff --git a/src/Phpml/Exception/InvalidArgumentException.php b/src/Phpml/Exception/InvalidArgumentException.php index 6b10a1a..c852ecf 100644 --- a/src/Phpml/Exception/InvalidArgumentException.php +++ b/src/Phpml/Exception/InvalidArgumentException.php @@ -41,4 +41,21 @@ class InvalidArgumentException extends \Exception { return new self(sprintf('The array must have at least %s elements', $minimumSize)); } + + /** + * @return InvalidArgumentException + */ + public static function matrixDimensionsDidNotMatch() + { + return new self('Matrix dimensions did not match'); + } + + /** + * @return InvalidArgumentException + */ + public static function inconsistentMatrixSupplied() + { + return new self('Inconsistent matrix aupplied'); + } + } diff --git a/src/Phpml/Exception/MatrixException.php b/src/Phpml/Exception/MatrixException.php new file mode 100644 index 0000000..8186f0a --- /dev/null +++ b/src/Phpml/Exception/MatrixException.php @@ -0,0 +1,25 @@ +rows = count($matrix); + $this->columns = count($matrix[0]); + + if($validate) { + for ($i = 0; $i < $this->rows; $i++) { + if (count($matrix[$i]) !== $this->columns) { + throw InvalidArgumentException::matrixDimensionsDidNotMatch(); + } + } + } + + $this->matrix = $matrix; + } + + /** + * @return array + */ + public function toArray() + { + return $this->matrix; + } + + /** + * @return int + */ + public function getRows() + { + return $this->rows; + } + + /** + * @return int + */ + public function getColumns() + { + return $this->columns; + } + + /** + * @param $column + * + * @return array + * + * @throws MatrixException + */ + public function getColumnValues($column) + { + if($column >= $this->columns) { + throw MatrixException::columnOutOfRange(); + } + + $values = []; + for ($i = 0; $i < $this->rows; $i++) { + $values[] = $this->matrix[$i][$column]; + } + + return $values; + } + + /** + * @return float|int + * + * @throws MatrixException + */ + public function getDeterminant() + { + if($this->determinant) { + return $this->determinant; + } + + if (!$this->isSquare()) { + throw MatrixException::notSquareMatrix(); + } + + $determinant = 0; + if ($this->rows == 1 && $this->columns == 1) { + $determinant = $this->matrix[0][0]; + } else if ($this->rows == 2 && $this->columns == 2) { + $determinant = $this->matrix[0][0] * $this->matrix[1][1] - + $this->matrix[0][1] * $this->matrix[1][0]; + } else { + for ($j = 0; $j < $this->columns; $j++) { + $subMatrix = $this->crossOut(0, $j); + if (fmod($j, 2) == 0) { + $determinant += $this->matrix[0][$j] * $subMatrix->getDeterminant(); + } else { + $determinant -= $this->matrix[0][$j] * $subMatrix->getDeterminant(); + } + } + } + + return $this->determinant = $determinant; + } + + /** + * @return bool + */ + public function isSquare() + { + return $this->columns === $this->rows; + } + + /** + * @return Matrix + */ + public function transpose() + { + $newMatrix = []; + for ($i = 0; $i < $this->rows; $i++) { + for ($j = 0; $j < $this->columns; $j++) { + $newMatrix[$j][$i] = $this->matrix[$i][$j]; + } + } + + return new self($newMatrix, false); + } + + /** + * @param Matrix $matrix + * + * @return Matrix + * + * @throws InvalidArgumentException + */ + public function multiply(Matrix $matrix) + { + if ($this->columns != $matrix->getRows()) { + throw InvalidArgumentException::inconsistentMatrixSupplied(); + } + + $product = []; + $multiplier = $matrix->toArray(); + for ($i = 0; $i < $this->rows; $i++) { + for ($j = 0; $j < $matrix->getColumns(); $j++) { + $product[$i][$j] = 0; + for ($k = 0; $k < $this->columns; $k++) { + $product[$i][$j] += $this->matrix[$i][$k] * $multiplier[$k][$j]; + } + } + } + return new self($product, false); + } + + /** + * @param $value + * + * @return Matrix + */ + public function divideByScalar($value) + { + $newMatrix = array(); + for ($i = 0; $i < $this->rows; $i++) { + for ($j = 0; $j < $this->columns; $j++) { + $newMatrix[$i][$j] = $this->matrix[$i][$j] / $value; + } + } + + return new self($newMatrix, false); + } + + /** + * @return Matrix + * + * @throws MatrixException + */ + public function inverse() + { + if (!$this->isSquare()) { + throw MatrixException::notSquareMatrix(); + } + + $newMatrix = array(); + for ($i = 0; $i < $this->rows; $i++) { + for ($j = 0; $j < $this->columns; $j++) { + $subMatrix = $this->crossOut($i, $j); + if (fmod($i + $j, 2) == 0) { + $newMatrix[$i][$j] = ($subMatrix->getDeterminant()); + } else { + $newMatrix[$i][$j] = -($subMatrix->getDeterminant()); + } + } + } + + $cofactorMatrix = new self($newMatrix, false); + + return $cofactorMatrix->transpose()->divideByScalar($this->getDeterminant()); + } + + /** + * @param int $row + * @param int $column + * + * @return Matrix + */ + public function crossOut(int $row, int $column) + { + $newMatrix = []; + $r = 0; + for ($i = 0; $i < $this->rows; $i++) { + $c = 0; + if ($row != $i) { + for ($j = 0; $j < $this->columns; $j++) { + if ($column != $j) { + $newMatrix[$r][$c] = $this->matrix[$i][$j]; + $c++; + } + } + $r++; + } + } + + return new self($newMatrix, false); + } + +} diff --git a/src/Phpml/Regression/LeastSquares.php b/src/Phpml/Regression/LeastSquares.php index 622c187..34dc745 100644 --- a/src/Phpml/Regression/LeastSquares.php +++ b/src/Phpml/Regression/LeastSquares.php @@ -4,9 +4,7 @@ declare (strict_types = 1); namespace Phpml\Regression; -use Phpml\Math\Statistic\Correlation; -use Phpml\Math\Statistic\StandardDeviation; -use Phpml\Math\Statistic\Mean; +use Phpml\Math\Matrix; class LeastSquares implements Regression { @@ -15,26 +13,21 @@ class LeastSquares implements Regression */ private $samples; - /** - * @var array - */ - private $features; - /** * @var array */ private $targets; - /** - * @var array - */ - private $slopes; - /** * @var float */ private $intercept; + /** + * @var array + */ + private $coefficients; + /** * @param array $samples * @param array $targets @@ -43,22 +36,20 @@ class LeastSquares implements Regression { $this->samples = $samples; $this->targets = $targets; - $this->features = []; - $this->computeSlopes(); - $this->computeIntercept(); + $this->computeCoefficients(); } /** - * @param float $sample + * @param array $sample * * @return mixed */ public function predict($sample) { $result = $this->intercept; - foreach ($this->slopes as $index => $slope) { - $result += ($slope * $sample[$index]); + foreach ($this->coefficients as $index => $coefficient) { + $result += $coefficient * $sample[$index]; } return $result; @@ -67,45 +58,23 @@ class LeastSquares implements Regression /** * @return array */ - public function getSlopes() + public function getCoefficients() { - return $this->slopes; - } - - private function computeSlopes() - { - $features = count($this->samples[0]); - $sdY = StandardDeviation::population($this->targets); - - for($i=0; $i<$features; $i++) { - $correlation = Correlation::pearson($this->getFeatures($i), $this->targets); - $sdXi = StandardDeviation::population($this->getFeatures($i)); - $this->slopes[] = $correlation * ($sdY / $sdXi); - } - } - - private function computeIntercept() - { - $this->intercept = Mean::arithmetic($this->targets); - foreach ($this->slopes as $index => $slope) { - $this->intercept -= $slope * Mean::arithmetic($this->getFeatures($index)); - } + return $this->coefficients; } /** - * @param $index - * - * @return array + * coefficient(b) = (X'X)-1X'Y */ - private function getFeatures($index) + private function computeCoefficients() { - if(!isset($this->features[$index])) { - $this->features[$index] = []; - foreach ($this->samples as $sample) { - $this->features[$index][] = $sample[$index]; - } - } + $samplesMatrix = new Matrix($this->samples); + $targetsMatrix = new Matrix($this->targets); - return $this->features[$index]; + $ts = $samplesMatrix->transpose()->multiply($samplesMatrix)->inverse(); + $tf = $samplesMatrix->transpose()->multiply($targetsMatrix); + + $this->coefficients = $ts->multiply($tf)->getColumnValues(0); + $this->intercept = array_shift($this->coefficients); } } diff --git a/tests/Phpml/Regression/LeastSquaresTest.php b/tests/Phpml/Regression/LeastSquaresTest.php index d5975d8..8859ba9 100644 --- a/tests/Phpml/Regression/LeastSquaresTest.php +++ b/tests/Phpml/Regression/LeastSquaresTest.php @@ -13,8 +13,8 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase $delta = 0.01; //https://www.easycalculation.com/analytical/learn-least-square-regression.php - $samples = [[60], [61], [62], [63], [65]]; - $targets = [3.1, 3.6, 3.8, 4, 4.1]; + $samples = [[1, 60], [1, 61], [1, 62], [1, 63], [1, 65]]; + $targets = [[3.1], [3.6], [3.8], [4], [4.1]]; $regression = new LeastSquares(); $regression->train($samples, $targets); @@ -22,8 +22,8 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase $this->assertEquals(4.06, $regression->predict([64]), '', $delta); //http://www.stat.wmich.edu/s216/book/node127.html - $samples = [[9300], [10565], [15000], [15000], [17764], [57000], [65940], [73676], [77006], [93739], [146088], [153260]]; - $targets = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025]; + $samples = [[1 ,9300], [1, 10565], [1, 15000], [1, 15000], [1, 17764], [1, 57000], [1, 65940], [1, 73676], [1, 77006], [1, 93739], [1, 146088], [1, 153260]]; + $targets = [[7100], [15500], [4400], [4400], [5900], [4600], [8800], [2000], [2750], [2550], [960], [1025]]; $regression = new LeastSquares(); $regression->train($samples, $targets); @@ -37,16 +37,16 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase public function testPredictMultiFeaturesSamples() { - $delta = 0.01; + $delta = 1; //http://www.stat.wmich.edu/s216/book/node129.html - $samples = [[73676, 1996],[77006,1998],[ 10565, 2000],[146088, 1995],[ 15000, 2001],[ 65940, 2000],[ 9300, 2000],[ 93739, 1996],[153260, 1994],[ 17764, 2002],[ 57000, 1998],[ 15000, 2000]]; - $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + $samples = [[1, 73676, 1996],[1, 77006, 1998],[1, 10565, 2000],[1, 146088, 1995],[1, 15000, 2001],[1, 65940, 2000],[1, 9300, 2000],[1, 93739, 1996],[1, 153260, 1994],[1, 17764, 2002],[1, 57000, 1998],[1, 15000, 2000]]; + $targets = [[2000], [ 2750], [15500], [ 960], [ 4400], [ 8800], [ 7100], [ 2550], [ 1025], [ 5900], [ 4600], [ 4400]]; $regression = new LeastSquares(); $regression->train($samples, $targets); - $this->assertEquals(3807, $regression->predict([60000, 1996]), '', $delta); + $this->assertEquals(4094, $regression->predict([60000, 1996]), '', $delta); } }