mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-22 04:55:10 +00:00
Linear classifiers: Perceptron, Adaline, DecisionStump (#50)
* Linear classifiers * Code formatting to PSR-2 * Added basic test cases for linear classifiers
This commit is contained in:
parent
f0a7984f39
commit
cf222bcce4
@ -56,6 +56,11 @@ class DecisionTree implements Classifier
|
|||||||
*/
|
*/
|
||||||
private $numUsableFeatures = 0;
|
private $numUsableFeatures = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $selectedFeatures;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var array
|
* @var array
|
||||||
*/
|
*/
|
||||||
@ -126,33 +131,45 @@ class DecisionTree implements Classifier
|
|||||||
if ($this->actualDepth < $depth) {
|
if ($this->actualDepth < $depth) {
|
||||||
$this->actualDepth = $depth;
|
$this->actualDepth = $depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Traverse all records to see if all records belong to the same class,
|
||||||
|
// otherwise group the records so that we can classify the leaf
|
||||||
|
// in case maximum depth is reached
|
||||||
$leftRecords = [];
|
$leftRecords = [];
|
||||||
$rightRecords= [];
|
$rightRecords= [];
|
||||||
$remainingTargets = [];
|
$remainingTargets = [];
|
||||||
$prevRecord = null;
|
$prevRecord = null;
|
||||||
$allSame = true;
|
$allSame = true;
|
||||||
|
|
||||||
foreach ($records as $recordNo) {
|
foreach ($records as $recordNo) {
|
||||||
|
// Check if the previous record is the same with the current one
|
||||||
$record = $this->samples[$recordNo];
|
$record = $this->samples[$recordNo];
|
||||||
if ($prevRecord && $prevRecord != $record) {
|
if ($prevRecord && $prevRecord != $record) {
|
||||||
$allSame = false;
|
$allSame = false;
|
||||||
}
|
}
|
||||||
$prevRecord = $record;
|
$prevRecord = $record;
|
||||||
|
|
||||||
|
// According to the split criteron, this record will
|
||||||
|
// belong to either left or the right side in the next split
|
||||||
if ($split->evaluate($record)) {
|
if ($split->evaluate($record)) {
|
||||||
$leftRecords[] = $recordNo;
|
$leftRecords[] = $recordNo;
|
||||||
} else {
|
} else {
|
||||||
$rightRecords[]= $recordNo;
|
$rightRecords[]= $recordNo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Group remaining targets
|
||||||
$target = $this->targets[$recordNo];
|
$target = $this->targets[$recordNo];
|
||||||
if (! in_array($target, $remainingTargets)) {
|
if (! array_key_exists($target, $remainingTargets)) {
|
||||||
$remainingTargets[] = $target;
|
$remainingTargets[$target] = 1;
|
||||||
|
} else {
|
||||||
|
$remainingTargets[$target]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (count($remainingTargets) == 1 || $allSame || $depth >= $this->maxDepth) {
|
if (count($remainingTargets) == 1 || $allSame || $depth >= $this->maxDepth) {
|
||||||
$split->isTerminal = 1;
|
$split->isTerminal = 1;
|
||||||
$classes = array_count_values($remainingTargets);
|
arsort($remainingTargets);
|
||||||
arsort($classes);
|
$split->classValue = key($remainingTargets);
|
||||||
$split->classValue = key($classes);
|
|
||||||
} else {
|
} else {
|
||||||
if ($leftRecords) {
|
if ($leftRecords) {
|
||||||
$split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1);
|
$split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1);
|
||||||
@ -200,15 +217,31 @@ class DecisionTree implements Classifier
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Returns available features/columns to the tree for the decision making
|
||||||
|
* process. <br>
|
||||||
|
*
|
||||||
|
* If a number is given with setNumFeatures() method, then a random selection
|
||||||
|
* of features up to this number is returned. <br>
|
||||||
|
*
|
||||||
|
* If some features are manually selected by use of setSelectedFeatures(),
|
||||||
|
* then only these features are returned <br>
|
||||||
|
*
|
||||||
|
* If any of above methods were not called beforehand, then all features
|
||||||
|
* are returned by default.
|
||||||
|
*
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
protected function getSelectedFeatures()
|
protected function getSelectedFeatures()
|
||||||
{
|
{
|
||||||
$allFeatures = range(0, $this->featureCount - 1);
|
$allFeatures = range(0, $this->featureCount - 1);
|
||||||
if ($this->numUsableFeatures == 0) {
|
if ($this->numUsableFeatures == 0 && ! $this->selectedFeatures) {
|
||||||
return $allFeatures;
|
return $allFeatures;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($this->selectedFeatures) {
|
||||||
|
return $this->selectedFeatures;
|
||||||
|
}
|
||||||
|
|
||||||
$numFeatures = $this->numUsableFeatures;
|
$numFeatures = $this->numUsableFeatures;
|
||||||
if ($numFeatures > $this->featureCount) {
|
if ($numFeatures > $this->featureCount) {
|
||||||
$numFeatures = $this->featureCount;
|
$numFeatures = $this->featureCount;
|
||||||
@ -323,6 +356,16 @@ class DecisionTree implements Classifier
|
|||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to set predefined features to consider while deciding which column to use for a split,
|
||||||
|
*
|
||||||
|
* @param array $features
|
||||||
|
*/
|
||||||
|
protected function setSelectedFeatures(array $selectedFeatures)
|
||||||
|
{
|
||||||
|
$this->selectedFeatures = $selectedFeatures;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A string array to represent columns. Useful when HTML output or
|
* A string array to represent columns. Useful when HTML output or
|
||||||
* column importances are desired to be inspected.
|
* column importances are desired to be inspected.
|
||||||
|
148
src/Phpml/Classification/Linear/Adaline.php
Normal file
148
src/Phpml/Classification/Linear/Adaline.php
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace Phpml\Classification\Linear;
|
||||||
|
|
||||||
|
use Phpml\Helper\Predictable;
|
||||||
|
use Phpml\Helper\Trainable;
|
||||||
|
use Phpml\Classification\Classifier;
|
||||||
|
use Phpml\Classification\Linear\Perceptron;
|
||||||
|
use Phpml\Preprocessing\Normalizer;
|
||||||
|
|
||||||
|
class Adaline extends Perceptron
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Batch training is the default Adaline training algorithm
|
||||||
|
*/
|
||||||
|
const BATCH_TRAINING = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Online training: Stochastic gradient descent learning
|
||||||
|
*/
|
||||||
|
const ONLINE_TRAINING = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The function whose result will be used to calculate the network error
|
||||||
|
* for each instance
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected static $errorFunction = 'output';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Training type may be either 'Batch' or 'Online' learning
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected $trainingType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var Normalizer
|
||||||
|
*/
|
||||||
|
private $normalizer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initalize an Adaline (ADAptive LInear NEuron) classifier with given learning rate and maximum
|
||||||
|
* number of iterations used while training the classifier <br>
|
||||||
|
*
|
||||||
|
* Learning rate should be a float value between 0.0(exclusive) and 1.0 (inclusive) <br>
|
||||||
|
* Maximum number of iterations can be an integer value greater than 0 <br>
|
||||||
|
* If normalizeInputs is set to true, then every input given to the algorithm will be standardized
|
||||||
|
* by use of standard deviation and mean calculation
|
||||||
|
*
|
||||||
|
* @param int $learningRate
|
||||||
|
* @param int $maxIterations
|
||||||
|
*/
|
||||||
|
public function __construct(float $learningRate = 0.001, int $maxIterations = 1000,
|
||||||
|
bool $normalizeInputs = true, int $trainingType = self::BATCH_TRAINING)
|
||||||
|
{
|
||||||
|
if ($normalizeInputs) {
|
||||||
|
$this->normalizer = new Normalizer(Normalizer::NORM_STD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (! in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) {
|
||||||
|
throw new \Exception("Adaline can only be trained with batch and online/stochastic gradient descent algorithm");
|
||||||
|
}
|
||||||
|
$this->trainingType = $trainingType;
|
||||||
|
|
||||||
|
parent::__construct($learningRate, $maxIterations);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
* @param array $targets
|
||||||
|
*/
|
||||||
|
public function train(array $samples, array $targets)
|
||||||
|
{
|
||||||
|
if ($this->normalizer) {
|
||||||
|
$this->normalizer->transform($samples);
|
||||||
|
}
|
||||||
|
|
||||||
|
parent::train($samples, $targets);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adapts the weights with respect to given samples and targets
|
||||||
|
* by use of gradient descent learning rule
|
||||||
|
*/
|
||||||
|
protected function runTraining()
|
||||||
|
{
|
||||||
|
// If online training is chosen, then the parent runTraining method
|
||||||
|
// will be executed with the 'output' method as the error function
|
||||||
|
if ($this->trainingType == self::ONLINE_TRAINING) {
|
||||||
|
return parent::runTraining();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch learning is executed:
|
||||||
|
$currIter = 0;
|
||||||
|
while ($this->maxIterations > $currIter++) {
|
||||||
|
$outputs = array_map([$this, 'output'], $this->samples);
|
||||||
|
$updates = array_map([$this, 'gradient'], $this->targets, $outputs);
|
||||||
|
$sum = array_sum($updates);
|
||||||
|
|
||||||
|
// Updates all weights at once
|
||||||
|
for ($i=0; $i <= $this->featureCount; $i++) {
|
||||||
|
if ($i == 0) {
|
||||||
|
$this->weights[0] += $this->learningRate * $sum;
|
||||||
|
} else {
|
||||||
|
$col = array_column($this->samples, $i - 1);
|
||||||
|
$error = 0;
|
||||||
|
foreach ($col as $index => $val) {
|
||||||
|
$error += $val * $updates[$index];
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->weights[$i] += $this->learningRate * $error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the direction of gradient given the desired and actual outputs
|
||||||
|
*
|
||||||
|
* @param int $desired
|
||||||
|
* @param int $output
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
protected function gradient($desired, $output)
|
||||||
|
{
|
||||||
|
return $desired - $output;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $sample
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function predictSample(array $sample)
|
||||||
|
{
|
||||||
|
if ($this->normalizer) {
|
||||||
|
$samples = [$sample];
|
||||||
|
$this->normalizer->transform($samples);
|
||||||
|
$sample = $samples[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
return parent::predictSample($sample);
|
||||||
|
}
|
||||||
|
}
|
56
src/Phpml/Classification/Linear/DecisionStump.php
Normal file
56
src/Phpml/Classification/Linear/DecisionStump.php
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace Phpml\Classification\Linear;
|
||||||
|
|
||||||
|
use Phpml\Helper\Predictable;
|
||||||
|
use Phpml\Helper\Trainable;
|
||||||
|
use Phpml\Classification\Classifier;
|
||||||
|
use Phpml\Classification\DecisionTree;
|
||||||
|
|
||||||
|
class DecisionStump extends DecisionTree
|
||||||
|
{
|
||||||
|
use Trainable, Predictable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
protected $columnIndex;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A DecisionStump classifier is a one-level deep DecisionTree. It is generally
|
||||||
|
* used with ensemble algorithms as in the weak classifier role. <br>
|
||||||
|
*
|
||||||
|
* If columnIndex is given, then the stump tries to produce a decision node
|
||||||
|
* on this column, otherwise in cases given the value of -1, the stump itself
|
||||||
|
* decides which column to take for the decision (Default DecisionTree behaviour)
|
||||||
|
*
|
||||||
|
* @param int $columnIndex
|
||||||
|
*/
|
||||||
|
public function __construct(int $columnIndex = -1)
|
||||||
|
{
|
||||||
|
$this->columnIndex = $columnIndex;
|
||||||
|
|
||||||
|
parent::__construct(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
* @param array $targets
|
||||||
|
*/
|
||||||
|
public function train(array $samples, array $targets)
|
||||||
|
{
|
||||||
|
// Check if a column index was given
|
||||||
|
if ($this->columnIndex >= 0 && $this->columnIndex > count($samples[0]) - 1) {
|
||||||
|
$this->columnIndex = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->columnIndex >= 0) {
|
||||||
|
$this->setSelectedFeatures([$this->columnIndex]);
|
||||||
|
}
|
||||||
|
|
||||||
|
parent::train($samples, $targets);
|
||||||
|
}
|
||||||
|
}
|
174
src/Phpml/Classification/Linear/Perceptron.php
Normal file
174
src/Phpml/Classification/Linear/Perceptron.php
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace Phpml\Classification\Linear;
|
||||||
|
|
||||||
|
use Phpml\Helper\Predictable;
|
||||||
|
use Phpml\Helper\Trainable;
|
||||||
|
use Phpml\Classification\Classifier;
|
||||||
|
|
||||||
|
class Perceptron implements Classifier
|
||||||
|
{
|
||||||
|
use Predictable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The function whose result will be used to calculate the network error
|
||||||
|
* for each instance
|
||||||
|
*
|
||||||
|
* @var string
|
||||||
|
*/
|
||||||
|
protected static $errorFunction = 'outputClass';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
protected $samples = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
protected $targets = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
protected $labels = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
protected $featureCount = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
protected $weights;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
protected $learningRate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
protected $maxIterations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initalize a perceptron classifier with given learning rate and maximum
|
||||||
|
* number of iterations used while training the perceptron <br>
|
||||||
|
*
|
||||||
|
* Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive) <br>
|
||||||
|
* Maximum number of iterations can be an integer value greater than 0
|
||||||
|
* @param int $learningRate
|
||||||
|
* @param int $maxIterations
|
||||||
|
*/
|
||||||
|
public function __construct(float $learningRate = 0.001, int $maxIterations = 1000)
|
||||||
|
{
|
||||||
|
if ($learningRate <= 0.0 || $learningRate > 1.0) {
|
||||||
|
throw new \Exception("Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($maxIterations <= 0) {
|
||||||
|
throw new \Exception("Maximum number of iterations should be an integer greater than 0");
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->learningRate = $learningRate;
|
||||||
|
$this->maxIterations = $maxIterations;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
* @param array $targets
|
||||||
|
*/
|
||||||
|
public function train(array $samples, array $targets)
|
||||||
|
{
|
||||||
|
$this->labels = array_keys(array_count_values($targets));
|
||||||
|
if (count($this->labels) > 2) {
|
||||||
|
throw new \Exception("Perceptron is for only binary (two-class) classification");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set all target values to either -1 or 1
|
||||||
|
$this->labels = [1 => $this->labels[0], -1 => $this->labels[1]];
|
||||||
|
foreach ($targets as $target) {
|
||||||
|
$this->targets[] = $target == $this->labels[1] ? 1 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set samples and feature count vars
|
||||||
|
$this->samples = array_merge($this->samples, $samples);
|
||||||
|
$this->featureCount = count($this->samples[0]);
|
||||||
|
|
||||||
|
// Init weights with random values
|
||||||
|
$this->weights = array_fill(0, $this->featureCount + 1, 0);
|
||||||
|
foreach ($this->weights as &$weight) {
|
||||||
|
$weight = rand() / (float) getrandmax();
|
||||||
|
}
|
||||||
|
// Do training
|
||||||
|
$this->runTraining();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adapts the weights with respect to given samples and targets
|
||||||
|
* by use of perceptron learning rule
|
||||||
|
*/
|
||||||
|
protected function runTraining()
|
||||||
|
{
|
||||||
|
$currIter = 0;
|
||||||
|
while ($this->maxIterations > $currIter++) {
|
||||||
|
foreach ($this->samples as $index => $sample) {
|
||||||
|
$target = $this->targets[$index];
|
||||||
|
$prediction = $this->{static::$errorFunction}($sample);
|
||||||
|
$update = $target - $prediction;
|
||||||
|
// Update bias
|
||||||
|
$this->weights[0] += $update * $this->learningRate; // Bias
|
||||||
|
// Update other weights
|
||||||
|
for ($i=1; $i <= $this->featureCount; $i++) {
|
||||||
|
$this->weights[$i] += $update * $sample[$i - 1] * $this->learningRate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates net output of the network as a float value for the given input
|
||||||
|
*
|
||||||
|
* @param array $sample
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
protected function output(array $sample)
|
||||||
|
{
|
||||||
|
$sum = 0;
|
||||||
|
foreach ($this->weights as $index => $w) {
|
||||||
|
if ($index == 0) {
|
||||||
|
$sum += $w;
|
||||||
|
} else {
|
||||||
|
$sum += $w * $sample[$index - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the class value (either -1 or 1) for the given input
|
||||||
|
*
|
||||||
|
* @param array $sample
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
protected function outputClass(array $sample)
|
||||||
|
{
|
||||||
|
return $this->output($sample) > 0 ? 1 : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $sample
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
protected function predictSample(array $sample)
|
||||||
|
{
|
||||||
|
$predictedClass = $this->outputClass($sample);
|
||||||
|
|
||||||
|
return $this->labels[ $predictedClass ];
|
||||||
|
}
|
||||||
|
}
|
@ -5,17 +5,35 @@ declare(strict_types=1);
|
|||||||
namespace Phpml\Preprocessing;
|
namespace Phpml\Preprocessing;
|
||||||
|
|
||||||
use Phpml\Exception\NormalizerException;
|
use Phpml\Exception\NormalizerException;
|
||||||
|
use Phpml\Math\Statistic\StandardDeviation;
|
||||||
|
use Phpml\Math\Statistic\Mean;
|
||||||
|
|
||||||
class Normalizer implements Preprocessor
|
class Normalizer implements Preprocessor
|
||||||
{
|
{
|
||||||
const NORM_L1 = 1;
|
const NORM_L1 = 1;
|
||||||
const NORM_L2 = 2;
|
const NORM_L2 = 2;
|
||||||
|
const NORM_STD= 3;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var int
|
* @var int
|
||||||
*/
|
*/
|
||||||
private $norm;
|
private $norm;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var bool
|
||||||
|
*/
|
||||||
|
private $fitted = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $std;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $mean;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param int $norm
|
* @param int $norm
|
||||||
*
|
*
|
||||||
@ -23,7 +41,7 @@ class Normalizer implements Preprocessor
|
|||||||
*/
|
*/
|
||||||
public function __construct(int $norm = self::NORM_L2)
|
public function __construct(int $norm = self::NORM_L2)
|
||||||
{
|
{
|
||||||
if (!in_array($norm, [self::NORM_L1, self::NORM_L2])) {
|
if (!in_array($norm, [self::NORM_L1, self::NORM_L2, self::NORM_STD])) {
|
||||||
throw NormalizerException::unknownNorm();
|
throw NormalizerException::unknownNorm();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,7 +53,20 @@ class Normalizer implements Preprocessor
|
|||||||
*/
|
*/
|
||||||
public function fit(array $samples)
|
public function fit(array $samples)
|
||||||
{
|
{
|
||||||
// intentionally not implemented
|
if ($this->fitted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->norm == self::NORM_STD) {
|
||||||
|
$features = range(0, count($samples[0]) - 1);
|
||||||
|
foreach ($features as $i) {
|
||||||
|
$values = array_column($samples, $i);
|
||||||
|
$this->std[$i] = StandardDeviation::population($values);
|
||||||
|
$this->mean[$i] = Mean::arithmetic($values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->fitted = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -43,7 +74,15 @@ class Normalizer implements Preprocessor
|
|||||||
*/
|
*/
|
||||||
public function transform(array &$samples)
|
public function transform(array &$samples)
|
||||||
{
|
{
|
||||||
$method = sprintf('normalizeL%s', $this->norm);
|
$methods = [
|
||||||
|
self::NORM_L1 => 'normalizeL1',
|
||||||
|
self::NORM_L2 => 'normalizeL2',
|
||||||
|
self::NORM_STD=> 'normalizeSTD'
|
||||||
|
];
|
||||||
|
$method = $methods[$this->norm];
|
||||||
|
|
||||||
|
$this->fit($samples);
|
||||||
|
|
||||||
foreach ($samples as &$sample) {
|
foreach ($samples as &$sample) {
|
||||||
$this->$method($sample);
|
$this->$method($sample);
|
||||||
}
|
}
|
||||||
@ -88,4 +127,14 @@ class Normalizer implements Preprocessor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $sample
|
||||||
|
*/
|
||||||
|
private function normalizeSTD(array &$sample)
|
||||||
|
{
|
||||||
|
foreach ($sample as $i => $val) {
|
||||||
|
$sample[$i] = ($sample[$i] - $this->mean[$i]) / $this->std[$i];
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
55
tests/Phpml/Classification/Linear/AdalineTest.php
Normal file
55
tests/Phpml/Classification/Linear/AdalineTest.php
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace tests\Classification\Linear;
|
||||||
|
|
||||||
|
use Phpml\Classification\Linear\Adaline;
|
||||||
|
use Phpml\ModelManager;
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
|
class AdalineTest extends TestCase
|
||||||
|
{
|
||||||
|
public function testPredictSingleSample()
|
||||||
|
{
|
||||||
|
// AND problem
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 0, 0, 1];
|
||||||
|
$classifier = new Adaline();
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 0.2]));
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 0.99]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([1.1, 0.8]));
|
||||||
|
|
||||||
|
// OR problem
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 1, 1, 1];
|
||||||
|
$classifier = new Adaline();
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 0.2]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([0.1, 0.99]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([1.1, 0.8]));
|
||||||
|
|
||||||
|
return $classifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSaveAndRestore()
|
||||||
|
{
|
||||||
|
// Instantinate new Percetron trained for OR problem
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 1, 1, 1];
|
||||||
|
$classifier = new Adaline();
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$testSamples = [[0, 1], [1, 1], [0.2, 0.1]];
|
||||||
|
$predicted = $classifier->predict($testSamples);
|
||||||
|
|
||||||
|
$filename = 'adaline-test-'.rand(100, 999).'-'.uniqid();
|
||||||
|
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||||
|
$modelManager = new ModelManager();
|
||||||
|
$modelManager->saveToFile($classifier, $filepath);
|
||||||
|
|
||||||
|
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||||
|
$this->assertEquals($classifier, $restoredClassifier);
|
||||||
|
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||||
|
}
|
||||||
|
}
|
59
tests/Phpml/Classification/Linear/DecisionStumpTest.php
Normal file
59
tests/Phpml/Classification/Linear/DecisionStumpTest.php
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace tests\Classification\Linear;
|
||||||
|
|
||||||
|
use Phpml\Classification\Linear\DecisionStump;
|
||||||
|
use Phpml\ModelManager;
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
|
class DecisionStumpTest extends TestCase
|
||||||
|
{
|
||||||
|
public function testPredictSingleSample()
|
||||||
|
{
|
||||||
|
// Samples should be separable with a line perpendicular to any dimension
|
||||||
|
// given in the dataset
|
||||||
|
// First: horizontal test
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 0, 1, 1];
|
||||||
|
$classifier = new DecisionStump();
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 0.2]));
|
||||||
|
$this->assertEquals(0, $classifier->predict([1.1, 0.2]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([0.1, 0.99]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([1.1, 0.8]));
|
||||||
|
|
||||||
|
// Then: vertical test
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 1, 0, 1];
|
||||||
|
$classifier = new DecisionStump();
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 0.2]));
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 1.1]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([1.0, 0.99]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([1.1, 0.1]));
|
||||||
|
|
||||||
|
return $classifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSaveAndRestore()
|
||||||
|
{
|
||||||
|
// Instantinate new Percetron trained for OR problem
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 1, 1, 1];
|
||||||
|
$classifier = new DecisionStump();
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$testSamples = [[0, 1], [1, 1], [0.2, 0.1]];
|
||||||
|
$predicted = $classifier->predict($testSamples);
|
||||||
|
|
||||||
|
$filename = 'dstump-test-'.rand(100, 999).'-'.uniqid();
|
||||||
|
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||||
|
$modelManager = new ModelManager();
|
||||||
|
$modelManager->saveToFile($classifier, $filepath);
|
||||||
|
|
||||||
|
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||||
|
$this->assertEquals($classifier, $restoredClassifier);
|
||||||
|
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||||
|
}
|
||||||
|
}
|
55
tests/Phpml/Classification/Linear/PerceptronTest.php
Normal file
55
tests/Phpml/Classification/Linear/PerceptronTest.php
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace tests\Classification\Linear;
|
||||||
|
|
||||||
|
use Phpml\Classification\Linear\Perceptron;
|
||||||
|
use Phpml\ModelManager;
|
||||||
|
use PHPUnit\Framework\TestCase;
|
||||||
|
|
||||||
|
class PerceptronTest extends TestCase
|
||||||
|
{
|
||||||
|
public function testPredictSingleSample()
|
||||||
|
{
|
||||||
|
// AND problem
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.9, 0.8]];
|
||||||
|
$targets = [0, 0, 0, 1, 1];
|
||||||
|
$classifier = new Perceptron(0.001, 5000);
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 0.2]));
|
||||||
|
$this->assertEquals(0, $classifier->predict([0.1, 0.99]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([1.1, 0.8]));
|
||||||
|
|
||||||
|
// OR problem
|
||||||
|
$samples = [[0, 0], [0.1, 0.2], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 0, 1, 1, 1];
|
||||||
|
$classifier = new Perceptron(0.001, 5000);
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$this->assertEquals(0, $classifier->predict([0, 0]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([0.1, 0.99]));
|
||||||
|
$this->assertEquals(1, $classifier->predict([1.1, 0.8]));
|
||||||
|
|
||||||
|
return $classifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSaveAndRestore()
|
||||||
|
{
|
||||||
|
// Instantinate new Percetron trained for OR problem
|
||||||
|
$samples = [[0, 0], [1, 0], [0, 1], [1, 1]];
|
||||||
|
$targets = [0, 1, 1, 1];
|
||||||
|
$classifier = new Perceptron();
|
||||||
|
$classifier->train($samples, $targets);
|
||||||
|
$testSamples = [[0, 1], [1, 1], [0.2, 0.1]];
|
||||||
|
$predicted = $classifier->predict($testSamples);
|
||||||
|
|
||||||
|
$filename = 'perceptron-test-'.rand(100, 999).'-'.uniqid();
|
||||||
|
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||||
|
$modelManager = new ModelManager();
|
||||||
|
$modelManager->saveToFile($classifier, $filepath);
|
||||||
|
|
||||||
|
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||||
|
$this->assertEquals($classifier, $restoredClassifier);
|
||||||
|
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||||
|
}
|
||||||
|
}
|
@ -100,4 +100,32 @@ class NormalizerTest extends TestCase
|
|||||||
|
|
||||||
$this->assertEquals($normalized, $samples, '', $delta = 0.01);
|
$this->assertEquals($normalized, $samples, '', $delta = 0.01);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testStandardNorm()
|
||||||
|
{
|
||||||
|
// Generate 10 random vectors of length 3
|
||||||
|
$samples = [];
|
||||||
|
srand(time());
|
||||||
|
for ($i=0; $i<10; $i++) {
|
||||||
|
$sample = array_fill(0, 3, 0);
|
||||||
|
for ($k=0; $k<3; $k++) {
|
||||||
|
$sample[$k] = rand(1, 100);
|
||||||
|
}
|
||||||
|
$samples[] = $sample;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use standard normalization
|
||||||
|
$normalizer = new Normalizer(Normalizer::NORM_STD);
|
||||||
|
$normalizer->transform($samples);
|
||||||
|
|
||||||
|
// Values in the vector should be some value between -3 and +3
|
||||||
|
$this->assertCount(10, $samples);
|
||||||
|
foreach ($samples as $sample) {
|
||||||
|
$errors = array_filter($sample,
|
||||||
|
function ($element) {
|
||||||
|
return $element < -3 || $element > 3;
|
||||||
|
});
|
||||||
|
$this->assertCount(0, $errors);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user