php-ml/src/Phpml/FeatureExtraction/TfIdfTransformer.php

69 lines
1.3 KiB
PHP
Raw Normal View History

2016-06-15 14:04:09 +00:00
<?php
declare (strict_types = 1);
namespace Phpml\FeatureExtraction;
use Phpml\Transformer;
2016-06-15 14:04:09 +00:00
class TfIdfTransformer implements Transformer
{
/**
* @var array
*/
private $idf;
/**
* @param array $samples
*/
public function __construct(array $samples = null)
{
2016-06-16 22:34:15 +00:00
if ($samples) {
$this->fit($samples);
}
}
/**
* @param array $samples
*/
public function fit(array $samples)
2016-06-15 14:04:09 +00:00
{
$this->countTokensFrequency($samples);
$count = count($samples);
foreach ($this->idf as &$value) {
$value = log($count / $value, 10);
}
}
2016-06-15 14:04:09 +00:00
/**
* @param array $samples
*/
public function transform(array &$samples)
{
2016-06-15 14:04:09 +00:00
foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {
$feature = $feature * $this->idf[$index];
}
}
}
/**
* @param array $samples
*
* @return array
*/
private function countTokensFrequency(array $samples)
{
$this->idf = array_fill_keys(array_keys($samples[0]), 0);
foreach ($samples as $sample) {
foreach ($sample as $index => $count) {
if ($count > 0) {
++$this->idf[$index];
}
}
}
}
}