implement TfIdf transformation

This commit is contained in:
Arkadiusz Kondas 2016-06-15 16:04:09 +02:00
parent 8a65026642
commit cc50d2c9b1
3 changed files with 84 additions and 1 deletions

View File

@ -6,7 +6,7 @@
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
Fresh approach to Machine Learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
Fresh approach to Machine Learning in PHP. Algorithms, Cross Validation, Preprocessing, Feature Extraction and much more in one library.
Simple example of classification:
```php

View File

@ -0,0 +1,54 @@
<?php
declare (strict_types = 1);
namespace Phpml\FeatureExtraction;
class TfIdfTransformer implements Transformer
{
/**
* @var array
*/
private $idf;
/**
* @param array $samples
*
* @return array
*/
public function transform(array $samples): array
{
$this->countTokensFrequency($samples);
$count = count($samples);
foreach ($this->idf as &$value) {
$value = log($count / $value, 10);
}
foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {
$feature = $feature * $this->idf[$index];
}
}
return $samples;
}
/**
* @param array $samples
*
* @return array
*/
private function countTokensFrequency(array $samples)
{
$this->idf = array_fill_keys(array_keys($samples[0]), 0);
foreach ($samples as $sample) {
foreach ($sample as $index => $count) {
if ($count > 0) {
++$this->idf[$index];
}
}
}
}
}

View File

@ -0,0 +1,29 @@
<?php
declare (strict_types = 1);
namespace tests\Phpml\FeatureExtraction;
use Phpml\FeatureExtraction\TfIdfTransformer;
class TfIdfTransformerTest extends \PHPUnit_Framework_TestCase
{
public function testTfIdfTransformation()
{
//https://en.wikipedia.org/wiki/Tf%E2%80%93idf
$samples = [
[0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 0, 5 => 0],
[0 => 1, 1 => 1, 2 => 0, 3 => 0, 4 => 2, 5 => 3],
];
$tfIdfSamples = [
[0 => 0, 1 => 0, 2 => 0.602, 3 => 0.301, 4 => 0, 5 => 0],
[0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 0.602, 5 => 0.903],
];
$transformer = new TfIdfTransformer();
$this->assertEquals($tfIdfSamples, $transformer->transform($samples), '', 0.001);
}
}