mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-05 04:57:52 +00:00
Implement first regression scoring function UnivariateLinearRegression
This commit is contained in:
parent
fbf84ca95f
commit
9e5b3a0c69
@ -0,0 +1,81 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\FeatureSelection\ScoringFunction;
|
||||
|
||||
use Phpml\FeatureSelection\ScoringFunction;
|
||||
use Phpml\Math\Matrix;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
/**
|
||||
* Quick linear model for testing the effect of a single regressor,
|
||||
* sequentially for many regressors.
|
||||
*
|
||||
* This is done in 2 steps:
|
||||
*
|
||||
* 1. The cross correlation between each regressor and the target is computed,
|
||||
* that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)).
|
||||
* 2. It is converted to an F score then to a p-value.
|
||||
*
|
||||
* Ported from scikit-learn f_regression function (http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html#sklearn.feature_selection.f_regression)
|
||||
*/
|
||||
final class UnivariateLinearRegression implements ScoringFunction
|
||||
{
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $center;
|
||||
|
||||
/**
|
||||
* @param bool $center - if true samples and targets will be centered
|
||||
*/
|
||||
public function __construct(bool $center = true)
|
||||
{
|
||||
$this->center = $center;
|
||||
}
|
||||
|
||||
public function score(array $samples, array $targets): array
|
||||
{
|
||||
if ($this->center) {
|
||||
$this->centerTargets($targets);
|
||||
$this->centerSamples($samples);
|
||||
}
|
||||
|
||||
$correlations = [];
|
||||
foreach ($samples[0] as $index => $feature) {
|
||||
$featureColumn = array_column($samples, $index);
|
||||
$correlations[$index] =
|
||||
(Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm())
|
||||
/ (new Matrix($targets, false))->frobeniusNorm();
|
||||
}
|
||||
|
||||
$degreesOfFreedom = count($targets) - ($this->center ? 2 : 1);
|
||||
|
||||
return array_map(function (float $correlation) use ($degreesOfFreedom): float {
|
||||
return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom;
|
||||
}, $correlations);
|
||||
}
|
||||
|
||||
private function centerTargets(&$targets): void
|
||||
{
|
||||
$mean = Mean::arithmetic($targets);
|
||||
foreach ($targets as &$target) {
|
||||
$target -= $mean;
|
||||
}
|
||||
}
|
||||
|
||||
private function centerSamples(&$samples): void
|
||||
{
|
||||
$means = [];
|
||||
foreach ($samples[0] as $index => $feature) {
|
||||
$means[$index] = Mean::arithmetic(array_column($samples, $index));
|
||||
}
|
||||
|
||||
foreach ($samples as &$sample) {
|
||||
foreach ($sample as $index => &$feature) {
|
||||
$feature -= $means[$index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -236,6 +236,29 @@ class Matrix
|
||||
return $this->getDeterminant() == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Frobenius norm (Hilbert–Schmidt norm, Euclidean norm) (‖A‖F)
|
||||
* Square root of the sum of the square of all elements.
|
||||
*
|
||||
* https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm
|
||||
*
|
||||
* _____________
|
||||
* /ᵐ ⁿ
|
||||
* ‖A‖F = √ Σ Σ |aᵢⱼ|²
|
||||
* ᵢ₌₁ ᵢ₌₁
|
||||
*/
|
||||
public function frobeniusNorm(): float
|
||||
{
|
||||
$squareSum = 0;
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
for ($j = 0; $j < $this->columns; ++$j) {
|
||||
$squareSum += ($this->matrix[$i][$j]) ** 2;
|
||||
}
|
||||
}
|
||||
|
||||
return sqrt($squareSum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the transpose of given array
|
||||
*/
|
||||
@ -259,7 +282,7 @@ class Matrix
|
||||
/**
|
||||
* Element-wise addition or substraction depending on the given sign parameter
|
||||
*/
|
||||
protected function _add(self $other, int $sign = 1): self
|
||||
private function _add(self $other, int $sign = 1): self
|
||||
{
|
||||
$a1 = $this->toArray();
|
||||
$a2 = $other->toArray();
|
||||
@ -277,7 +300,7 @@ class Matrix
|
||||
/**
|
||||
* Returns diagonal identity matrix of the same size of this matrix
|
||||
*/
|
||||
protected function getIdentity(): self
|
||||
private function getIdentity(): self
|
||||
{
|
||||
$array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0));
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
|
@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Tests\FeatureSelection\ScoringFunction;
|
||||
|
||||
use Phpml\FeatureSelection\ScoringFunction\UnivariateLinearRegression;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
final class UnivariateLinearRegressionTest extends TestCase
|
||||
{
|
||||
public function testRegressionScore(): void
|
||||
{
|
||||
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
||||
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];
|
||||
|
||||
$function = new UnivariateLinearRegression();
|
||||
self::assertEquals([6.97286, 6.48558], $function->score($samples, $targets), '', 0.0001);
|
||||
}
|
||||
|
||||
public function testRegressionScoreWithoutCenter(): void
|
||||
{
|
||||
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
||||
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];
|
||||
|
||||
$function = new UnivariateLinearRegression(false);
|
||||
self::assertEquals([1.74450, 18.08347], $function->score($samples, $targets), '', 0.0001);
|
||||
}
|
||||
}
|
@ -8,6 +8,7 @@ use Phpml\Dataset\Demo\IrisDataset;
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
use Phpml\Exception\InvalidOperationException;
|
||||
use Phpml\FeatureSelection\ScoringFunction\ANOVAFValue;
|
||||
use Phpml\FeatureSelection\ScoringFunction\UnivariateLinearRegression;
|
||||
use Phpml\FeatureSelection\SelectKBest;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
@ -45,6 +46,21 @@ final class SelectKBestTest extends TestCase
|
||||
self::assertEquals(2, count($samples[0]));
|
||||
}
|
||||
|
||||
public function testSelectKBestWithRegressionScoring(): void
|
||||
{
|
||||
$samples = [[73676, 1996, 2], [77006, 1998, 5], [10565, 2000, 4], [146088, 1995, 2], [15000, 2001, 2], [65940, 2000, 2], [9300, 2000, 2], [93739, 1996, 2], [153260, 1994, 2], [17764, 2002, 2], [57000, 1998, 2], [15000, 2000, 2]];
|
||||
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];
|
||||
|
||||
$selector = new SelectKBest(new UnivariateLinearRegression(), 2);
|
||||
$selector->fit($samples, $targets);
|
||||
$selector->transform($samples);
|
||||
|
||||
self::assertEquals(
|
||||
[[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]],
|
||||
$samples
|
||||
);
|
||||
}
|
||||
|
||||
public function testThrowExceptionOnEmptyTargets(): void
|
||||
{
|
||||
$this->expectException(InvalidArgumentException::class);
|
||||
|
@ -251,4 +251,55 @@ class MatrixTest extends TestCase
|
||||
$dot = [6, 12];
|
||||
$this->assertEquals($dot, Matrix::dot($matrix2, $matrix1));
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider dataProviderForFrobeniusNorm
|
||||
*/
|
||||
public function testFrobeniusNorm(array $matrix, float $norm): void
|
||||
{
|
||||
$matrix = new Matrix($matrix);
|
||||
|
||||
$this->assertEquals($norm, $matrix->frobeniusNorm(), '', 0.0001);
|
||||
}
|
||||
|
||||
public function dataProviderForFrobeniusNorm()
|
||||
{
|
||||
return [
|
||||
[
|
||||
[
|
||||
[1, -7],
|
||||
[2, 3],
|
||||
], 7.93725,
|
||||
],
|
||||
[
|
||||
[
|
||||
[1, 2, 3],
|
||||
[2, 3, 4],
|
||||
[3, 4, 5],
|
||||
], 9.643651,
|
||||
],
|
||||
[
|
||||
[
|
||||
[1, 5, 3, 9],
|
||||
[2, 3, 4, 12],
|
||||
[4, 2, 5, 11],
|
||||
], 21.330729,
|
||||
],
|
||||
[
|
||||
[
|
||||
[1, 5, 3],
|
||||
[2, 3, 4],
|
||||
[4, 2, 5],
|
||||
[6, 6, 3],
|
||||
], 13.784049,
|
||||
],
|
||||
[
|
||||
[
|
||||
[5, -4, 2],
|
||||
[-1, 2, 3],
|
||||
[-2, 1, 0],
|
||||
], 8,
|
||||
],
|
||||
];
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user