2016-05-05 21:29:11 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-05-05 21:29:11 +00:00
|
|
|
|
|
|
|
namespace Phpml\SupportVectorMachine;
|
|
|
|
|
|
|
|
class DataTransformer
|
|
|
|
{
|
2016-05-07 21:04:58 +00:00
|
|
|
public static function trainingSet(array $samples, array $labels, bool $targets = false): string
|
2016-05-05 21:29:11 +00:00
|
|
|
{
|
|
|
|
$set = '';
|
2018-01-06 12:09:33 +00:00
|
|
|
$numericLabels = [];
|
|
|
|
|
2016-05-07 21:04:58 +00:00
|
|
|
if (!$targets) {
|
|
|
|
$numericLabels = self::numericLabels($labels);
|
|
|
|
}
|
|
|
|
|
2016-05-05 21:29:11 +00:00
|
|
|
foreach ($labels as $index => $label) {
|
2016-05-07 21:04:58 +00:00
|
|
|
$set .= sprintf('%s %s %s', ($targets ? $label : $numericLabels[$label]), self::sampleRow($samples[$index]), PHP_EOL);
|
2016-05-05 21:29:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return $set;
|
|
|
|
}
|
|
|
|
|
2016-05-06 20:38:50 +00:00
|
|
|
public static function testSet(array $samples): string
|
|
|
|
{
|
2016-05-07 20:17:12 +00:00
|
|
|
if (!is_array($samples[0])) {
|
|
|
|
$samples = [$samples];
|
|
|
|
}
|
|
|
|
|
2016-05-06 20:38:50 +00:00
|
|
|
$set = '';
|
|
|
|
foreach ($samples as $sample) {
|
|
|
|
$set .= sprintf('0 %s %s', self::sampleRow($sample), PHP_EOL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $set;
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public static function predictions(string $rawPredictions, array $labels): array
|
2016-05-06 20:55:41 +00:00
|
|
|
{
|
|
|
|
$numericLabels = self::numericLabels($labels);
|
|
|
|
$results = [];
|
2016-05-07 20:17:12 +00:00
|
|
|
foreach (explode(PHP_EOL, $rawPredictions) as $result) {
|
2017-09-02 19:38:02 +00:00
|
|
|
if (isset($result[0])) {
|
2018-02-16 06:25:24 +00:00
|
|
|
$results[] = array_search((int) $result, $numericLabels, true);
|
2016-05-07 20:17:12 +00:00
|
|
|
}
|
2016-05-06 20:55:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return $results;
|
|
|
|
}
|
|
|
|
|
2018-02-06 19:39:25 +00:00
|
|
|
public static function probabilities(string $rawPredictions, array $labels): array
|
|
|
|
{
|
|
|
|
$numericLabels = self::numericLabels($labels);
|
|
|
|
|
|
|
|
$predictions = explode(PHP_EOL, trim($rawPredictions));
|
|
|
|
|
|
|
|
$header = array_shift($predictions);
|
|
|
|
$headerColumns = explode(' ', $header);
|
|
|
|
array_shift($headerColumns);
|
|
|
|
|
|
|
|
$columnLabels = [];
|
|
|
|
foreach ($headerColumns as $numericLabel) {
|
2018-02-16 06:25:24 +00:00
|
|
|
$columnLabels[] = array_search((int) $numericLabel, $numericLabels, true);
|
2018-02-06 19:39:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
$results = [];
|
|
|
|
foreach ($predictions as $rawResult) {
|
|
|
|
$probabilities = explode(' ', $rawResult);
|
|
|
|
array_shift($probabilities);
|
|
|
|
|
|
|
|
$result = [];
|
|
|
|
foreach ($probabilities as $i => $prob) {
|
|
|
|
$result[$columnLabels[$i]] = (float) $prob;
|
|
|
|
}
|
|
|
|
|
|
|
|
$results[] = $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
return $results;
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public static function numericLabels(array $labels): array
|
2016-05-05 21:29:11 +00:00
|
|
|
{
|
|
|
|
$numericLabels = [];
|
|
|
|
foreach ($labels as $label) {
|
|
|
|
if (isset($numericLabels[$label])) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
$numericLabels[$label] = count($numericLabels);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $numericLabels;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static function sampleRow(array $sample): string
|
|
|
|
{
|
|
|
|
$row = [];
|
|
|
|
foreach ($sample as $index => $feature) {
|
2016-05-06 20:33:04 +00:00
|
|
|
$row[] = sprintf('%s:%s', $index + 1, $feature);
|
2016-05-05 21:29:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return implode(' ', $row);
|
|
|
|
}
|
|
|
|
}
|