2016-05-08 18:35:01 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-05-08 18:35:01 +00:00
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests\Preprocessing;
|
2016-05-08 18:35:01 +00:00
|
|
|
|
2017-11-28 07:00:13 +00:00
|
|
|
use Phpml\Exception\NormalizerException;
|
2016-05-08 18:35:01 +00:00
|
|
|
use Phpml\Preprocessing\Normalizer;
|
2017-02-03 11:58:25 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-05-08 18:35:01 +00:00
|
|
|
|
2017-02-03 11:58:25 +00:00
|
|
|
class NormalizerTest extends TestCase
|
2016-05-08 18:35:01 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testThrowExceptionOnInvalidNorm(): void
|
2016-05-08 18:35:01 +00:00
|
|
|
{
|
2017-11-28 07:00:13 +00:00
|
|
|
$this->expectException(NormalizerException::class);
|
2016-05-08 18:35:01 +00:00
|
|
|
new Normalizer(99);
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testNormalizeSamplesWithL2Norm(): void
|
2016-05-08 18:35:01 +00:00
|
|
|
{
|
|
|
|
$samples = [
|
|
|
|
[1, -1, 2],
|
|
|
|
[2, 0, 0],
|
|
|
|
[0, 1, -1],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalized = [
|
|
|
|
[0.4, -0.4, 0.81],
|
|
|
|
[1.0, 0.0, 0.0],
|
|
|
|
[0.0, 0.7, -0.7],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalizer = new Normalizer();
|
2016-06-16 08:03:57 +00:00
|
|
|
$normalizer->transform($samples);
|
2016-05-08 18:35:01 +00:00
|
|
|
|
2019-04-10 18:42:59 +00:00
|
|
|
self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01);
|
2016-05-08 18:35:01 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testNormalizeSamplesWithL1Norm(): void
|
2016-05-08 18:35:01 +00:00
|
|
|
{
|
|
|
|
$samples = [
|
|
|
|
[1, -1, 2],
|
|
|
|
[2, 0, 0],
|
|
|
|
[0, 1, -1],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalized = [
|
|
|
|
[0.25, -0.25, 0.5],
|
|
|
|
[1.0, 0.0, 0.0],
|
|
|
|
[0.0, 0.5, -0.5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalizer = new Normalizer(Normalizer::NORM_L1);
|
2016-06-16 08:03:57 +00:00
|
|
|
$normalizer->transform($samples);
|
2016-05-08 18:35:01 +00:00
|
|
|
|
2019-04-10 18:42:59 +00:00
|
|
|
self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01);
|
2016-05-08 18:35:01 +00:00
|
|
|
}
|
2016-06-16 22:23:27 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testFitNotChangeNormalizerBehavior(): void
|
2016-06-16 22:23:27 +00:00
|
|
|
{
|
|
|
|
$samples = [
|
|
|
|
[1, -1, 2],
|
|
|
|
[2, 0, 0],
|
|
|
|
[0, 1, -1],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalized = [
|
|
|
|
[0.4, -0.4, 0.81],
|
|
|
|
[1.0, 0.0, 0.0],
|
|
|
|
[0.0, 0.7, -0.7],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalizer = new Normalizer();
|
|
|
|
$normalizer->transform($samples);
|
|
|
|
|
2019-04-10 18:42:59 +00:00
|
|
|
self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01);
|
2016-06-16 22:23:27 +00:00
|
|
|
|
|
|
|
$normalizer->fit($samples);
|
|
|
|
|
2019-04-10 18:42:59 +00:00
|
|
|
self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01);
|
2016-06-16 22:23:27 +00:00
|
|
|
}
|
2016-07-24 11:42:50 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testL1NormWithZeroSumCondition(): void
|
2016-07-24 11:42:50 +00:00
|
|
|
{
|
|
|
|
$samples = [
|
|
|
|
[0, 0, 0],
|
|
|
|
[2, 0, 0],
|
|
|
|
[0, 1, -1],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalized = [
|
|
|
|
[0.33, 0.33, 0.33],
|
|
|
|
[1.0, 0.0, 0.0],
|
|
|
|
[0.0, 0.5, -0.5],
|
|
|
|
];
|
|
|
|
|
|
|
|
$normalizer = new Normalizer(Normalizer::NORM_L1);
|
|
|
|
$normalizer->transform($samples);
|
|
|
|
|
2019-04-10 18:42:59 +00:00
|
|
|
self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01);
|
2016-07-24 11:42:50 +00:00
|
|
|
}
|
2017-02-16 22:23:55 +00:00
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testStandardNorm(): void
|
2017-02-16 22:23:55 +00:00
|
|
|
{
|
|
|
|
// Generate 10 random vectors of length 3
|
|
|
|
$samples = [];
|
|
|
|
srand(time());
|
2017-08-17 06:50:37 +00:00
|
|
|
for ($i = 0; $i < 10; ++$i) {
|
2017-02-16 22:23:55 +00:00
|
|
|
$sample = array_fill(0, 3, 0);
|
2017-08-17 06:50:37 +00:00
|
|
|
for ($k = 0; $k < 3; ++$k) {
|
2017-11-22 21:16:10 +00:00
|
|
|
$sample[$k] = random_int(1, 100);
|
2017-02-16 22:23:55 +00:00
|
|
|
}
|
2017-11-22 21:16:10 +00:00
|
|
|
|
2017-04-24 09:47:30 +00:00
|
|
|
// Last feature's value shared across samples.
|
|
|
|
$sample[] = 1;
|
|
|
|
|
2017-02-16 22:23:55 +00:00
|
|
|
$samples[] = $sample;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use standard normalization
|
|
|
|
$normalizer = new Normalizer(Normalizer::NORM_STD);
|
|
|
|
$normalizer->transform($samples);
|
|
|
|
|
|
|
|
// Values in the vector should be some value between -3 and +3
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertCount(10, $samples);
|
2017-02-16 22:23:55 +00:00
|
|
|
foreach ($samples as $sample) {
|
2017-07-26 06:24:47 +00:00
|
|
|
$errors = array_filter(
|
|
|
|
$sample,
|
2020-03-03 17:52:29 +00:00
|
|
|
function ($element): bool {
|
2017-02-16 22:23:55 +00:00
|
|
|
return $element < -3 || $element > 3;
|
2017-07-26 06:24:47 +00:00
|
|
|
}
|
|
|
|
);
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertCount(0, $errors);
|
|
|
|
self::assertEquals(0, $sample[3]);
|
2017-02-16 22:23:55 +00:00
|
|
|
}
|
|
|
|
}
|
2016-05-08 18:35:01 +00:00
|
|
|
}
|