2016-05-08 18:35:01 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-05-08 18:35:01 +00:00
|
|
|
|
|
|
|
namespace Phpml\Preprocessing;
|
|
|
|
|
|
|
|
use Phpml\Exception\NormalizerException;
|
2017-02-16 22:23:55 +00:00
|
|
|
use Phpml\Math\Statistic\Mean;
|
2017-11-06 07:56:37 +00:00
|
|
|
use Phpml\Math\Statistic\StandardDeviation;
|
2016-05-08 18:35:01 +00:00
|
|
|
|
|
|
|
class Normalizer implements Preprocessor
|
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public const NORM_L1 = 1;
|
2017-11-22 21:16:10 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public const NORM_L2 = 2;
|
2017-11-22 21:16:10 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public const NORM_STD = 3;
|
2016-05-08 18:35:01 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $norm;
|
|
|
|
|
2017-02-16 22:23:55 +00:00
|
|
|
/**
|
|
|
|
* @var bool
|
|
|
|
*/
|
|
|
|
private $fitted = false;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
2017-11-22 21:16:10 +00:00
|
|
|
private $std = [];
|
2017-02-16 22:23:55 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
2017-11-22 21:16:10 +00:00
|
|
|
private $mean = [];
|
2017-02-16 22:23:55 +00:00
|
|
|
|
2016-05-08 18:35:01 +00:00
|
|
|
/**
|
|
|
|
* @throws NormalizerException
|
|
|
|
*/
|
|
|
|
public function __construct(int $norm = self::NORM_L2)
|
|
|
|
{
|
2017-02-16 22:23:55 +00:00
|
|
|
if (!in_array($norm, [self::NORM_L1, self::NORM_L2, self::NORM_STD])) {
|
2016-05-08 18:35:01 +00:00
|
|
|
throw NormalizerException::unknownNorm();
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->norm = $norm;
|
|
|
|
}
|
|
|
|
|
2018-02-11 21:10:12 +00:00
|
|
|
public function fit(array $samples, ?array $targets = null): void
|
2016-06-16 22:08:10 +00:00
|
|
|
{
|
2017-02-16 22:23:55 +00:00
|
|
|
if ($this->fitted) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->norm == self::NORM_STD) {
|
|
|
|
$features = range(0, count($samples[0]) - 1);
|
|
|
|
foreach ($features as $i) {
|
|
|
|
$values = array_column($samples, $i);
|
|
|
|
$this->std[$i] = StandardDeviation::population($values);
|
|
|
|
$this->mean[$i] = Mean::arithmetic($values);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->fitted = true;
|
2016-06-16 22:08:10 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function transform(array &$samples): void
|
2016-05-08 18:35:01 +00:00
|
|
|
{
|
2017-02-16 22:23:55 +00:00
|
|
|
$methods = [
|
|
|
|
self::NORM_L1 => 'normalizeL1',
|
|
|
|
self::NORM_L2 => 'normalizeL2',
|
2017-11-22 21:16:10 +00:00
|
|
|
self::NORM_STD => 'normalizeSTD',
|
2017-02-16 22:23:55 +00:00
|
|
|
];
|
|
|
|
$method = $methods[$this->norm];
|
|
|
|
|
|
|
|
$this->fit($samples);
|
|
|
|
|
2016-05-08 18:35:01 +00:00
|
|
|
foreach ($samples as &$sample) {
|
2017-05-17 07:03:25 +00:00
|
|
|
$this->{$method}($sample);
|
2016-05-08 18:35:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
private function normalizeL1(array &$sample): void
|
2016-05-08 18:35:01 +00:00
|
|
|
{
|
|
|
|
$norm1 = 0;
|
|
|
|
foreach ($sample as $feature) {
|
|
|
|
$norm1 += abs($feature);
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
if ($norm1 == 0) {
|
2016-05-08 18:35:01 +00:00
|
|
|
$count = count($sample);
|
|
|
|
$sample = array_fill(0, $count, 1.0 / $count);
|
|
|
|
} else {
|
|
|
|
foreach ($sample as &$feature) {
|
2016-12-12 17:34:20 +00:00
|
|
|
$feature /= $norm1;
|
2016-05-08 18:35:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
private function normalizeL2(array &$sample): void
|
2016-05-08 18:35:01 +00:00
|
|
|
{
|
|
|
|
$norm2 = 0;
|
|
|
|
foreach ($sample as $feature) {
|
|
|
|
$norm2 += $feature * $feature;
|
|
|
|
}
|
2017-11-22 21:16:10 +00:00
|
|
|
|
2017-08-17 06:50:37 +00:00
|
|
|
$norm2 = sqrt((float) $norm2);
|
2016-05-08 18:35:01 +00:00
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
if ($norm2 == 0) {
|
2016-05-08 18:35:01 +00:00
|
|
|
$sample = array_fill(0, count($sample), 1);
|
|
|
|
} else {
|
|
|
|
foreach ($sample as &$feature) {
|
2016-12-12 17:34:20 +00:00
|
|
|
$feature /= $norm2;
|
2016-05-08 18:35:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-02-16 22:23:55 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
private function normalizeSTD(array &$sample): void
|
2017-02-16 22:23:55 +00:00
|
|
|
{
|
|
|
|
foreach ($sample as $i => $val) {
|
2017-04-24 09:47:30 +00:00
|
|
|
if ($this->std[$i] != 0) {
|
|
|
|
$sample[$i] = ($sample[$i] - $this->mean[$i]) / $this->std[$i];
|
|
|
|
} else {
|
|
|
|
// Same value for all samples.
|
|
|
|
$sample[$i] = 0;
|
|
|
|
}
|
2017-02-16 22:23:55 +00:00
|
|
|
}
|
|
|
|
}
|
2016-05-08 18:35:01 +00:00
|
|
|
}
|