php-ml/src/FeatureExtraction/TfIdfTransformer.php

55 lines
1.2 KiB
PHP
Raw Normal View History

2016-06-15 14:04:09 +00:00
<?php
2016-11-20 21:53:17 +00:00
declare(strict_types=1);
2016-06-15 14:04:09 +00:00
namespace Phpml\FeatureExtraction;
use Phpml\Transformer;
2016-06-15 14:04:09 +00:00
class TfIdfTransformer implements Transformer
{
/**
* @var array
*/
private $idf = [];
2016-06-15 14:04:09 +00:00
public function __construct(array $samples = [])
{
2018-10-28 06:44:52 +00:00
if (count($samples) > 0) {
$this->fit($samples);
}
}
public function fit(array $samples, ?array $targets = null): void
2016-06-15 14:04:09 +00:00
{
$this->countTokensFrequency($samples);
$count = count($samples);
foreach ($this->idf as &$value) {
$value = log((float) ($count / $value), 10.0);
2016-06-15 14:04:09 +00:00
}
}
2016-06-15 14:04:09 +00:00
2019-05-12 20:25:17 +00:00
public function transform(array &$samples, ?array &$targets = null): void
{
2016-06-15 14:04:09 +00:00
foreach ($samples as &$sample) {
foreach ($sample as $index => &$feature) {
2016-12-12 17:34:20 +00:00
$feature *= $this->idf[$index];
2016-06-15 14:04:09 +00:00
}
}
}
private function countTokensFrequency(array $samples): void
2016-06-15 14:04:09 +00:00
{
$this->idf = array_fill_keys(array_keys($samples[0]), 0);
foreach ($samples as $sample) {
foreach ($sample as $index => $count) {
if ($count > 0) {
++$this->idf[$index];
}
}
}
}
}