2016-06-15 16:04:09 +02:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 22:53:17 +01:00
|
|
|
declare(strict_types=1);
|
2016-06-15 16:04:09 +02:00
|
|
|
|
2018-01-06 13:09:33 +01:00
|
|
|
namespace Phpml\Tests\FeatureExtraction;
|
2016-06-15 16:04:09 +02:00
|
|
|
|
|
|
|
use Phpml\FeatureExtraction\TfIdfTransformer;
|
2017-02-03 12:58:25 +01:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-06-15 16:04:09 +02:00
|
|
|
|
2017-02-03 12:58:25 +01:00
|
|
|
class TfIdfTransformerTest extends TestCase
|
2016-06-15 16:04:09 +02:00
|
|
|
{
|
2017-11-14 21:21:23 +01:00
|
|
|
public function testTfIdfTransformation(): void
|
2016-06-15 16:04:09 +02:00
|
|
|
{
|
2016-07-12 00:21:34 +02:00
|
|
|
// https://en.wikipedia.org/wiki/Tf-idf
|
2016-06-15 16:04:09 +02:00
|
|
|
|
|
|
|
$samples = [
|
2017-11-22 22:16:10 +01:00
|
|
|
[
|
|
|
|
0 => 1,
|
|
|
|
1 => 1,
|
|
|
|
2 => 2,
|
|
|
|
3 => 1,
|
|
|
|
4 => 0,
|
|
|
|
5 => 0,
|
|
|
|
],
|
|
|
|
[
|
|
|
|
0 => 1,
|
|
|
|
1 => 1,
|
|
|
|
2 => 0,
|
|
|
|
3 => 0,
|
|
|
|
4 => 2,
|
|
|
|
5 => 3,
|
|
|
|
],
|
2016-06-15 16:04:09 +02:00
|
|
|
];
|
|
|
|
|
|
|
|
$tfIdfSamples = [
|
2017-11-22 22:16:10 +01:00
|
|
|
[
|
|
|
|
0 => 0,
|
|
|
|
1 => 0,
|
|
|
|
2 => 0.602,
|
|
|
|
3 => 0.301,
|
|
|
|
4 => 0,
|
|
|
|
5 => 0,
|
|
|
|
],
|
|
|
|
[
|
|
|
|
0 => 0,
|
|
|
|
1 => 0,
|
|
|
|
2 => 0,
|
|
|
|
3 => 0,
|
|
|
|
4 => 0.602,
|
|
|
|
5 => 0.903,
|
|
|
|
],
|
2016-06-15 16:04:09 +02:00
|
|
|
];
|
|
|
|
|
2016-06-17 00:08:10 +02:00
|
|
|
$transformer = new TfIdfTransformer($samples);
|
2016-06-16 10:01:40 +02:00
|
|
|
$transformer->transform($samples);
|
2016-06-15 16:04:09 +02:00
|
|
|
|
2016-06-16 10:01:40 +02:00
|
|
|
$this->assertEquals($tfIdfSamples, $samples, '', 0.001);
|
2016-06-15 16:04:09 +02:00
|
|
|
}
|
|
|
|
}
|