2016-06-15 14:04:09 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-06-15 14:04:09 +00:00
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests\FeatureExtraction;
|
2016-06-15 14:04:09 +00:00
|
|
|
|
|
|
|
use Phpml\FeatureExtraction\TfIdfTransformer;
|
2017-02-03 11:58:25 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-06-15 14:04:09 +00:00
|
|
|
|
2017-02-03 11:58:25 +00:00
|
|
|
class TfIdfTransformerTest extends TestCase
|
2016-06-15 14:04:09 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testTfIdfTransformation(): void
|
2016-06-15 14:04:09 +00:00
|
|
|
{
|
2016-07-11 22:21:34 +00:00
|
|
|
// https://en.wikipedia.org/wiki/Tf-idf
|
2016-06-15 14:04:09 +00:00
|
|
|
|
|
|
|
$samples = [
|
2017-11-22 21:16:10 +00:00
|
|
|
[
|
|
|
|
0 => 1,
|
|
|
|
1 => 1,
|
|
|
|
2 => 2,
|
|
|
|
3 => 1,
|
|
|
|
4 => 0,
|
|
|
|
5 => 0,
|
|
|
|
],
|
|
|
|
[
|
|
|
|
0 => 1,
|
|
|
|
1 => 1,
|
|
|
|
2 => 0,
|
|
|
|
3 => 0,
|
|
|
|
4 => 2,
|
|
|
|
5 => 3,
|
|
|
|
],
|
2016-06-15 14:04:09 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
$tfIdfSamples = [
|
2017-11-22 21:16:10 +00:00
|
|
|
[
|
|
|
|
0 => 0,
|
|
|
|
1 => 0,
|
|
|
|
2 => 0.602,
|
|
|
|
3 => 0.301,
|
|
|
|
4 => 0,
|
|
|
|
5 => 0,
|
|
|
|
],
|
|
|
|
[
|
|
|
|
0 => 0,
|
|
|
|
1 => 0,
|
|
|
|
2 => 0,
|
|
|
|
3 => 0,
|
|
|
|
4 => 0.602,
|
|
|
|
5 => 0.903,
|
|
|
|
],
|
2016-06-15 14:04:09 +00:00
|
|
|
];
|
|
|
|
|
2016-06-16 22:08:10 +00:00
|
|
|
$transformer = new TfIdfTransformer($samples);
|
2016-06-16 08:01:40 +00:00
|
|
|
$transformer->transform($samples);
|
2016-06-15 14:04:09 +00:00
|
|
|
|
2019-04-10 18:42:59 +00:00
|
|
|
self::assertEqualsWithDelta($tfIdfSamples, $samples, 0.001);
|
2016-06-15 14:04:09 +00:00
|
|
|
}
|
|
|
|
}
|