start to implement SVM with libsvm

This commit is contained in:
Arkadiusz Kondas 2016-05-05 23:29:11 +02:00
parent a2e8a89c3e
commit 95caef8692
10 changed files with 317 additions and 79 deletions

View File

@ -0,0 +1,53 @@
<?php
declare (strict_types = 1);
namespace Phpml\Classification;
use Phpml\Classification\Traits\Predictable;
use Phpml\Classification\Traits\Trainable;
use Phpml\Math\Kernel;
class SVC implements Classifier
{
use Trainable, Predictable;
/**
* @var int
*/
private $kernel;
/**
* @var float
*/
private $cost;
/**
* @param int $kernel
* @param float $cost
*/
public function __construct(int $kernel, float $cost)
{
$this->kernel = $kernel;
$this->cost = $cost;
}
/**
* @param array $samples
* @param array $labels
*/
public function train(array $samples, array $labels)
{
$this->samples = $samples;
$this->labels = $labels;
}
/**
* @param array $sample
*
* @return mixed
*/
protected function predictSample(array $sample)
{
}
}

View File

@ -1,78 +0,0 @@
<?php
declare (strict_types = 1);
namespace Phpml\Classification;
use Phpml\Classification\Traits\Predictable;
use Phpml\Classification\Traits\Trainable;
use Phpml\Math\Kernel;
class SupportVectorMachine implements Classifier
{
use Trainable, Predictable;
/**
* @var Kernel
*/
private $kernel;
/**
* @var float
*/
private $C;
/**
* @var float
*/
private $tolerance;
/**
* @var int
*/
private $upperBound;
/**
* @var string
*/
private $binPath;
/**
* @param Kernel $kernel
* @param float $C
* @param float $tolerance
* @param int $upperBound
*/
public function __construct(Kernel $kernel = null, float $C = 1.0, float $tolerance = .001, int $upperBound = 100)
{
if (null === $kernel) {
$kernel = new Kernel\RBF($gamma = .001);
}
$this->kernel = $kernel;
$this->C = $C;
$this->tolerance = $tolerance;
$this->upperBound = $upperBound;
$this->binPath = realpath(implode(DIRECTORY_SEPARATOR, array(dirname(__FILE__), '..', '..', '..', 'bin'))) . DIRECTORY_SEPARATOR;
}
/**
* @param array $samples
* @param array $labels
*/
public function train(array $samples, array $labels)
{
$this->samples = $samples;
$this->labels = $labels;
}
/**
* @param array $sample
*
* @return mixed
*/
protected function predictSample(array $sample)
{
}
}

View File

@ -6,7 +6,6 @@ namespace Phpml\Dataset;
interface Dataset
{
const SOME = 'z';
/**
* @return array
*/

View File

@ -0,0 +1,59 @@
<?php
declare (strict_types = 1);
namespace Phpml\SupportVectorMachine;
class DataTransformer
{
/**
* @param array $samples
* @param array $labels
*
* @return string
*/
public static function trainingSet(array $samples, array $labels): string
{
$set = '';
$numericLabels = self::numericLabels($labels);
foreach ($labels as $index => $label) {
$set .= sprintf('%s %s %s', $numericLabels[$label], self::sampleRow($samples[$index]), PHP_EOL);
}
return $set;
}
/**
* @param array $labels
*
* @return array
*/
public static function numericLabels(array $labels): array
{
$numericLabels = [];
foreach ($labels as $label) {
if (isset($numericLabels[$label])) {
continue;
}
$numericLabels[$label] = count($numericLabels);
}
return $numericLabels;
}
/**
* @param array $sample
*
* @return string
*/
private static function sampleRow(array $sample): string
{
$row = [];
foreach ($sample as $index => $feature) {
$row[] = sprintf('%s:%s', $index, $feature);
}
return implode(' ', $row);
}
}

View File

@ -0,0 +1,28 @@
<?php
declare (strict_types = 1);
namespace Phpml\SupportVectorMachine;
abstract class Kernel
{
/**
* u'*v.
*/
const LINEAR = 0;
/**
* (gamma*u'*v + coef0)^degree.
*/
const POLYNOMIAL = 1;
/**
* exp(-gamma*|u-v|^2).
*/
const RBF = 2;
/**
* tanh(gamma*u'*v + coef0).
*/
const SIGMOID = 3;
}

View File

@ -0,0 +1,83 @@
<?php
declare (strict_types = 1);
namespace Phpml\SupportVectorMachine;
class SupportVectorMachine
{
/**
* @var int
*/
private $type;
/**
* @var int
*/
private $kernel;
/**
* @var float
*/
private $cost;
/**
* @var string
*/
private $binPath;
/**
* @var
*/
private $varPath;
/**
* @var string
*/
private $model;
/**
* @param int $type
* @param int $kernel
* @param float $cost
*/
public function __construct(int $type, int $kernel, float $cost)
{
$this->type = $type;
$this->kernel = $kernel;
$this->cost = $cost;
$rootPath = realpath(implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', '..', '..'])).DIRECTORY_SEPARATOR;
$this->binPath = $rootPath.'bin'.DIRECTORY_SEPARATOR.'libsvm'.DIRECTORY_SEPARATOR;
$this->varPath = $rootPath.'var'.DIRECTORY_SEPARATOR;
}
/**
* @param array $samples
* @param array $labels
*/
public function train(array $samples, array $labels)
{
$trainingSet = DataTransformer::trainingSet($samples, $labels);
file_put_contents($trainingSetFileName = $this->varPath.uniqid(), $trainingSet);
$modelFileName = $trainingSetFileName.'-model';
$command = sprintf('%ssvm-train -s %s -t %s -c %s %s %s', $this->binPath, $this->type, $this->kernel, $this->cost, $trainingSetFileName, $modelFileName);
$output = '';
exec(escapeshellcmd($command), $output);
$this->model = file_get_contents($modelFileName);
unlink($trainingSetFileName);
unlink($modelFileName);
}
/**
* @return string
*/
public function getModel()
{
return $this->model;
}
}

View File

@ -0,0 +1,33 @@
<?php
declare (strict_types = 1);
namespace Phpml\SupportVectorMachine;
abstract class Type
{
/**
* classification.
*/
const C_SVC = 0;
/**
* classification.
*/
const NU_SVC = 1;
/**
* distribution estimation.
*/
const ONE_CLASS_SVM = 2;
/**
* regression.
*/
const EPSILON_SVR = 3;
/**
* regression.
*/
const NU_SVR = 4;
}

View File

@ -0,0 +1,25 @@
<?php
declare (strict_types = 1);
namespace tests\SupportVectorMachine;
use Phpml\SupportVectorMachine\DataTransformer;
class DataTransformerTest extends \PHPUnit_Framework_TestCase
{
public function testTransformDatasetToTrainingSet()
{
$samples = [[1, 1], [2, 1], [3, 2], [4, 5]];
$labels = ['a', 'a', 'b', 'b'];
$trainingSet =
'0 0:1 1:1 '.PHP_EOL.
'0 0:2 1:1 '.PHP_EOL.
'1 0:3 1:2 '.PHP_EOL.
'1 0:4 1:5 '.PHP_EOL
;
$this->assertEquals($trainingSet, DataTransformer::trainingSet($samples, $labels));
}
}

View File

@ -0,0 +1,36 @@
<?php
declare (strict_types = 1);
namespace tests\SupportVectorMachine;
use Phpml\SupportVectorMachine\Kernel;
use Phpml\SupportVectorMachine\SupportVectorMachine;
use Phpml\SupportVectorMachine\Type;
class SupportVectorMachineTest extends \PHPUnit_Framework_TestCase
{
public function testTrainCSVCModelWithLinearKernel()
{
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
$model =
'svm_type c_svc
kernel_type linear
nr_class 2
total_sv 2
rho 0
label 0 1
nr_sv 1 1
SV
0.25 0:2 1:4
-0.25 0:4 1:2
';
$svm = new SupportVectorMachine(Type::C_SVC, Kernel::LINEAR, 100.0);
$svm->train($samples, $labels);
$this->assertEquals($model, $svm->getModel());
}
}

0
var/.gitkeep Normal file
View File