create matrix calculation for ls regression for multiple variable

This commit is contained in:
Arkadiusz Kondas 2016-04-30 00:58:54 +02:00
parent 9d74174a68
commit 60c796f5d9
5 changed files with 324 additions and 60 deletions

View File

@ -41,4 +41,21 @@ class InvalidArgumentException extends \Exception
{
return new self(sprintf('The array must have at least %s elements', $minimumSize));
}
/**
* @return InvalidArgumentException
*/
public static function matrixDimensionsDidNotMatch()
{
return new self('Matrix dimensions did not match');
}
/**
* @return InvalidArgumentException
*/
public static function inconsistentMatrixSupplied()
{
return new self('Inconsistent matrix aupplied');
}
}

View File

@ -0,0 +1,25 @@
<?php
declare (strict_types = 1);
namespace Phpml\Exception;
class MatrixException extends \Exception
{
/**
* @return MatrixException
*/
public static function notSquareMatrix()
{
return new self('Matrix is not square matrix');
}
/**
* @return MatrixException
*/
public static function columnOutOfRange()
{
return new self('Column out of range');
}
}

253
src/Phpml/Math/Matrix.php Normal file
View File

@ -0,0 +1,253 @@
<?php
declare(strict_types = 1);
namespace Phpml\Math;
use Phpml\Exception\InvalidArgumentException;
use Phpml\Exception\MatrixException;
class Matrix
{
/**
* @var array
*/
private $matrix;
/**
* @var int
*/
private $rows;
/**
* @var int
*/
private $columns;
/**
* @var float
*/
private $determinant;
/**
* @param array $matrix
* @param bool $validate
*
* @throws InvalidArgumentException
*/
public function __construct(array $matrix, bool $validate = true)
{
$this->rows = count($matrix);
$this->columns = count($matrix[0]);
if($validate) {
for ($i = 0; $i < $this->rows; $i++) {
if (count($matrix[$i]) !== $this->columns) {
throw InvalidArgumentException::matrixDimensionsDidNotMatch();
}
}
}
$this->matrix = $matrix;
}
/**
* @return array
*/
public function toArray()
{
return $this->matrix;
}
/**
* @return int
*/
public function getRows()
{
return $this->rows;
}
/**
* @return int
*/
public function getColumns()
{
return $this->columns;
}
/**
* @param $column
*
* @return array
*
* @throws MatrixException
*/
public function getColumnValues($column)
{
if($column >= $this->columns) {
throw MatrixException::columnOutOfRange();
}
$values = [];
for ($i = 0; $i < $this->rows; $i++) {
$values[] = $this->matrix[$i][$column];
}
return $values;
}
/**
* @return float|int
*
* @throws MatrixException
*/
public function getDeterminant()
{
if($this->determinant) {
return $this->determinant;
}
if (!$this->isSquare()) {
throw MatrixException::notSquareMatrix();
}
$determinant = 0;
if ($this->rows == 1 && $this->columns == 1) {
$determinant = $this->matrix[0][0];
} else if ($this->rows == 2 && $this->columns == 2) {
$determinant = $this->matrix[0][0] * $this->matrix[1][1] -
$this->matrix[0][1] * $this->matrix[1][0];
} else {
for ($j = 0; $j < $this->columns; $j++) {
$subMatrix = $this->crossOut(0, $j);
if (fmod($j, 2) == 0) {
$determinant += $this->matrix[0][$j] * $subMatrix->getDeterminant();
} else {
$determinant -= $this->matrix[0][$j] * $subMatrix->getDeterminant();
}
}
}
return $this->determinant = $determinant;
}
/**
* @return bool
*/
public function isSquare()
{
return $this->columns === $this->rows;
}
/**
* @return Matrix
*/
public function transpose()
{
$newMatrix = [];
for ($i = 0; $i < $this->rows; $i++) {
for ($j = 0; $j < $this->columns; $j++) {
$newMatrix[$j][$i] = $this->matrix[$i][$j];
}
}
return new self($newMatrix, false);
}
/**
* @param Matrix $matrix
*
* @return Matrix
*
* @throws InvalidArgumentException
*/
public function multiply(Matrix $matrix)
{
if ($this->columns != $matrix->getRows()) {
throw InvalidArgumentException::inconsistentMatrixSupplied();
}
$product = [];
$multiplier = $matrix->toArray();
for ($i = 0; $i < $this->rows; $i++) {
for ($j = 0; $j < $matrix->getColumns(); $j++) {
$product[$i][$j] = 0;
for ($k = 0; $k < $this->columns; $k++) {
$product[$i][$j] += $this->matrix[$i][$k] * $multiplier[$k][$j];
}
}
}
return new self($product, false);
}
/**
* @param $value
*
* @return Matrix
*/
public function divideByScalar($value)
{
$newMatrix = array();
for ($i = 0; $i < $this->rows; $i++) {
for ($j = 0; $j < $this->columns; $j++) {
$newMatrix[$i][$j] = $this->matrix[$i][$j] / $value;
}
}
return new self($newMatrix, false);
}
/**
* @return Matrix
*
* @throws MatrixException
*/
public function inverse()
{
if (!$this->isSquare()) {
throw MatrixException::notSquareMatrix();
}
$newMatrix = array();
for ($i = 0; $i < $this->rows; $i++) {
for ($j = 0; $j < $this->columns; $j++) {
$subMatrix = $this->crossOut($i, $j);
if (fmod($i + $j, 2) == 0) {
$newMatrix[$i][$j] = ($subMatrix->getDeterminant());
} else {
$newMatrix[$i][$j] = -($subMatrix->getDeterminant());
}
}
}
$cofactorMatrix = new self($newMatrix, false);
return $cofactorMatrix->transpose()->divideByScalar($this->getDeterminant());
}
/**
* @param int $row
* @param int $column
*
* @return Matrix
*/
public function crossOut(int $row, int $column)
{
$newMatrix = [];
$r = 0;
for ($i = 0; $i < $this->rows; $i++) {
$c = 0;
if ($row != $i) {
for ($j = 0; $j < $this->columns; $j++) {
if ($column != $j) {
$newMatrix[$r][$c] = $this->matrix[$i][$j];
$c++;
}
}
$r++;
}
}
return new self($newMatrix, false);
}
}

View File

@ -4,9 +4,7 @@ declare (strict_types = 1);
namespace Phpml\Regression;
use Phpml\Math\Statistic\Correlation;
use Phpml\Math\Statistic\StandardDeviation;
use Phpml\Math\Statistic\Mean;
use Phpml\Math\Matrix;
class LeastSquares implements Regression
{
@ -15,26 +13,21 @@ class LeastSquares implements Regression
*/
private $samples;
/**
* @var array
*/
private $features;
/**
* @var array
*/
private $targets;
/**
* @var array
*/
private $slopes;
/**
* @var float
*/
private $intercept;
/**
* @var array
*/
private $coefficients;
/**
* @param array $samples
* @param array $targets
@ -43,22 +36,20 @@ class LeastSquares implements Regression
{
$this->samples = $samples;
$this->targets = $targets;
$this->features = [];
$this->computeSlopes();
$this->computeIntercept();
$this->computeCoefficients();
}
/**
* @param float $sample
* @param array $sample
*
* @return mixed
*/
public function predict($sample)
{
$result = $this->intercept;
foreach ($this->slopes as $index => $slope) {
$result += ($slope * $sample[$index]);
foreach ($this->coefficients as $index => $coefficient) {
$result += $coefficient * $sample[$index];
}
return $result;
@ -67,45 +58,23 @@ class LeastSquares implements Regression
/**
* @return array
*/
public function getSlopes()
public function getCoefficients()
{
return $this->slopes;
}
private function computeSlopes()
{
$features = count($this->samples[0]);
$sdY = StandardDeviation::population($this->targets);
for($i=0; $i<$features; $i++) {
$correlation = Correlation::pearson($this->getFeatures($i), $this->targets);
$sdXi = StandardDeviation::population($this->getFeatures($i));
$this->slopes[] = $correlation * ($sdY / $sdXi);
}
}
private function computeIntercept()
{
$this->intercept = Mean::arithmetic($this->targets);
foreach ($this->slopes as $index => $slope) {
$this->intercept -= $slope * Mean::arithmetic($this->getFeatures($index));
}
return $this->coefficients;
}
/**
* @param $index
*
* @return array
* coefficient(b) = (X'X)-1X'Y
*/
private function getFeatures($index)
private function computeCoefficients()
{
if(!isset($this->features[$index])) {
$this->features[$index] = [];
foreach ($this->samples as $sample) {
$this->features[$index][] = $sample[$index];
}
}
$samplesMatrix = new Matrix($this->samples);
$targetsMatrix = new Matrix($this->targets);
return $this->features[$index];
$ts = $samplesMatrix->transpose()->multiply($samplesMatrix)->inverse();
$tf = $samplesMatrix->transpose()->multiply($targetsMatrix);
$this->coefficients = $ts->multiply($tf)->getColumnValues(0);
$this->intercept = array_shift($this->coefficients);
}
}

View File

@ -13,8 +13,8 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase
$delta = 0.01;
//https://www.easycalculation.com/analytical/learn-least-square-regression.php
$samples = [[60], [61], [62], [63], [65]];
$targets = [3.1, 3.6, 3.8, 4, 4.1];
$samples = [[1, 60], [1, 61], [1, 62], [1, 63], [1, 65]];
$targets = [[3.1], [3.6], [3.8], [4], [4.1]];
$regression = new LeastSquares();
$regression->train($samples, $targets);
@ -22,8 +22,8 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase
$this->assertEquals(4.06, $regression->predict([64]), '', $delta);
//http://www.stat.wmich.edu/s216/book/node127.html
$samples = [[9300], [10565], [15000], [15000], [17764], [57000], [65940], [73676], [77006], [93739], [146088], [153260]];
$targets = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025];
$samples = [[1 ,9300], [1, 10565], [1, 15000], [1, 15000], [1, 17764], [1, 57000], [1, 65940], [1, 73676], [1, 77006], [1, 93739], [1, 146088], [1, 153260]];
$targets = [[7100], [15500], [4400], [4400], [5900], [4600], [8800], [2000], [2750], [2550], [960], [1025]];
$regression = new LeastSquares();
$regression->train($samples, $targets);
@ -37,16 +37,16 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase
public function testPredictMultiFeaturesSamples()
{
$delta = 0.01;
$delta = 1;
//http://www.stat.wmich.edu/s216/book/node129.html
$samples = [[73676, 1996],[77006,1998],[ 10565, 2000],[146088, 1995],[ 15000, 2001],[ 65940, 2000],[ 9300, 2000],[ 93739, 1996],[153260, 1994],[ 17764, 2002],[ 57000, 1998],[ 15000, 2000]];
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];
$samples = [[1, 73676, 1996],[1, 77006, 1998],[1, 10565, 2000],[1, 146088, 1995],[1, 15000, 2001],[1, 65940, 2000],[1, 9300, 2000],[1, 93739, 1996],[1, 153260, 1994],[1, 17764, 2002],[1, 57000, 1998],[1, 15000, 2000]];
$targets = [[2000], [ 2750], [15500], [ 960], [ 4400], [ 8800], [ 7100], [ 2550], [ 1025], [ 5900], [ 4600], [ 4400]];
$regression = new LeastSquares();
$regression->train($samples, $targets);
$this->assertEquals(3807, $regression->predict([60000, 1996]), '', $delta);
$this->assertEquals(4094, $regression->predict([60000, 1996]), '', $delta);
}
}