LogisticRegression classifier & Optimization methods (#63)

* LogisticRegression classifier & Optimization methods

* Minor fixes to Logistic Regression & Optimizers PR

* Addition for getCostValues() method
This commit is contained in:
Mustafa Karabulut 2017-03-28 00:46:53 +03:00 committed by Arkadiusz Kondas
parent c44f3b2730
commit 49234429f0
10 changed files with 1162 additions and 152 deletions

View File

@ -19,14 +19,6 @@ class Adaline extends Perceptron
*/
const ONLINE_TRAINING = 2;
/**
* The function whose result will be used to calculate the network error
* for each instance
*
* @var string
*/
protected static $errorFunction = 'output';
/**
* Training type may be either 'Batch' or 'Online' learning
*
@ -64,62 +56,19 @@ class Adaline extends Perceptron
*/
protected function runTraining()
{
// If online training is chosen, then the parent runTraining method
// will be executed with the 'output' method as the error function
if ($this->trainingType == self::ONLINE_TRAINING) {
return parent::runTraining();
}
// The cost function is the sum of squares
$callback = function ($weights, $sample, $target) {
$this->weights = $weights;
// Batch learning is executed:
$currIter = 0;
while ($this->maxIterations > $currIter++) {
$weights = $this->weights;
$output = $this->output($sample);
$gradient = $output - $target;
$error = $gradient ** 2;
$outputs = array_map([$this, 'output'], $this->samples);
$updates = array_map([$this, 'gradient'], $this->targets, $outputs);
return [$error, $gradient];
};
$this->updateWeights($updates);
$isBatch = $this->trainingType == self::BATCH_TRAINING;
if ($this->earlyStop($weights)) {
break;
}
}
}
/**
* Returns the direction of gradient given the desired and actual outputs
*
* @param int $desired
* @param int $output
* @return int
*/
protected function gradient($desired, $output)
{
return $desired - $output;
}
/**
* Updates the weights of the network given the direction of the
* gradient for each sample
*
* @param array $updates
*/
protected function updateWeights(array $updates)
{
// Updates all weights at once
for ($i=0; $i <= $this->featureCount; $i++) {
if ($i == 0) {
$this->weights[0] += $this->learningRate * array_sum($updates);
} else {
$col = array_column($this->samples, $i - 1);
$error = 0;
foreach ($col as $index => $val) {
$error += $val * $updates[$index];
}
$this->weights[$i] += $this->learningRate * $error;
}
}
return parent::runGradientDescent($callback, $isBatch);
}
}

View File

@ -0,0 +1,276 @@
<?php
declare(strict_types=1);
namespace Phpml\Classification\Linear;
use Phpml\Classification\Classifier;
use Phpml\Helper\Optimizer\ConjugateGradient;
class LogisticRegression extends Adaline
{
/**
* Batch training: Gradient descent algorithm (default)
*/
const BATCH_TRAINING = 1;
/**
* Online training: Stochastic gradient descent learning
*/
const ONLINE_TRAINING = 2;
/**
* Conjugate Batch: Conjugate Gradient algorithm
*/
const CONJUGATE_GRAD_TRAINING = 3;
/**
* Cost function to optimize: 'log' and 'sse' are supported <br>
* - 'log' : log likelihood <br>
* - 'sse' : sum of squared errors <br>
*
* @var string
*/
protected $costFunction = 'sse';
/**
* Regularization term: only 'L2' is supported
*
* @var string
*/
protected $penalty = 'L2';
/**
* Lambda (λ) parameter of regularization term. If λ is set to 0, then
* regularization term is cancelled.
*
* @var float
*/
protected $lambda = 0.5;
/**
* Initalize a Logistic Regression classifier with maximum number of iterations
* and learning rule to be applied <br>
*
* Maximum number of iterations can be an integer value greater than 0 <br>
* If normalizeInputs is set to true, then every input given to the algorithm will be standardized
* by use of standard deviation and mean calculation <br>
*
* Cost function can be 'log' for log-likelihood and 'sse' for sum of squared errors <br>
*
* Penalty (Regularization term) can be 'L2' or empty string to cancel penalty term
*
* @param int $maxIterations
* @param bool $normalizeInputs
* @param int $trainingType
* @param string $cost
* @param string $penalty
*
* @throws \Exception
*/
public function __construct(int $maxIterations = 500, bool $normalizeInputs = true,
int $trainingType = self::CONJUGATE_GRAD_TRAINING, string $cost = 'sse',
string $penalty = 'L2')
{
$trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING);
if (! in_array($trainingType, $trainingTypes)) {
throw new \Exception("Logistic regression can only be trained with " .
"batch (gradient descent), online (stochastic gradient descent) " .
"or conjugate batch (conjugate gradients) algorithms");
}
if (! in_array($cost, ['log', 'sse'])) {
throw new \Exception("Logistic regression cost function can be one of the following: \n" .
"'log' for log-likelihood and 'sse' for sum of squared errors");
}
if ($penalty != '' && strtoupper($penalty) !== 'L2') {
throw new \Exception("Logistic regression supports only 'L2' regularization");
}
$this->learningRate = 0.001;
parent::__construct($this->learningRate, $maxIterations, $normalizeInputs);
$this->trainingType = $trainingType;
$this->costFunction = $cost;
$this->penalty = $penalty;
}
/**
* Sets the learning rate if gradient descent algorithm is
* selected for training
*
* @param float $learningRate
*/
public function setLearningRate(float $learningRate)
{
$this->learningRate = $learningRate;
}
/**
* Lambda (λ) parameter of regularization term. If 0 is given,
* then the regularization term is cancelled
*
* @param float $lambda
*/
public function setLambda(float $lambda)
{
$this->lambda = $lambda;
}
/**
* Adapts the weights with respect to given samples and targets
* by use of selected solver
*/
protected function runTraining()
{
$callback = $this->getCostFunction();
switch ($this->trainingType) {
case self::BATCH_TRAINING:
return $this->runGradientDescent($callback, true);
case self::ONLINE_TRAINING:
return $this->runGradientDescent($callback, false);
case self::CONJUGATE_GRAD_TRAINING:
return $this->runConjugateGradient($callback);
}
}
/**
* Executes Conjugate Gradient method to optimize the
* weights of the LogReg model
*/
protected function runConjugateGradient(\Closure $gradientFunc)
{
$optimizer = (new ConjugateGradient($this->featureCount))
->setMaxIterations($this->maxIterations);
$this->weights = $optimizer->runOptimization($this->samples, $this->targets, $gradientFunc);
$this->costValues = $optimizer->getCostValues();
}
/**
* Returns the appropriate callback function for the selected cost function
*
* @return \Closure
*/
protected function getCostFunction()
{
$penalty = 0;
if ($this->penalty == 'L2') {
$penalty = $this->lambda;
}
switch ($this->costFunction) {
case 'log':
/*
* Negative of Log-likelihood cost function to be minimized:
* J(x) = ( - y . log(h(x)) - (1 - y) . log(1 - h(x)))
*
* If regularization term is given, then it will be added to the cost:
* for L2 : J(x) = J(x) + λ/m . w
*
* The gradient of the cost function to be used with gradient descent:
* ∇J(x) = -(y - h(x)) = (h(x) - y)
*/
$callback = function ($weights, $sample, $y) use ($penalty) {
$this->weights = $weights;
$hX = $this->output($sample);
// In cases where $hX = 1 or $hX = 0, the log-likelihood
// value will give a NaN, so we fix these values
if ($hX == 1) {
$hX = 1 - 1e-10;
}
if ($hX == 0) {
$hX = 1e-10;
}
$error = -$y * log($hX) - (1 - $y) * log(1 - $hX);
$gradient = $hX - $y;
return [$error, $gradient, $penalty];
};
return $callback;
case 'sse':
/**
* Sum of squared errors or least squared errors cost function:
* J(x) = (y - h(x))^2
*
* If regularization term is given, then it will be added to the cost:
* for L2 : J(x) = J(x) + λ/m . w
*
* The gradient of the cost function:
* ∇J(x) = -(h(x) - y) . h(x) . (1 - h(x))
*/
$callback = function ($weights, $sample, $y) use ($penalty) {
$this->weights = $weights;
$hX = $this->output($sample);
$error = ($y - $hX) ** 2;
$gradient = -($y - $hX) * $hX * (1 - $hX);
return [$error, $gradient, $penalty];
};
return $callback;
}
}
/**
* Returns the output of the network, a float value between 0.0 and 1.0
*
* @param array $sample
*
* @return float
*/
protected function output(array $sample)
{
$sum = parent::output($sample);
return 1.0 / (1.0 + exp(-$sum));
}
/**
* Returns the class value (either -1 or 1) for the given input
*
* @param array $sample
* @return int
*/
protected function outputClass(array $sample)
{
$output = $this->output($sample);
if (round($output) > 0.5) {
return 1;
}
return -1;
}
/**
* Returns the probability of the sample of belonging to the given label.
*
* The probability is simply taken as the distance of the sample
* to the decision plane.
*
* @param array $sample
* @param mixed $label
*/
protected function predictProbability(array $sample, $label)
{
$predicted = $this->predictSampleBinary($sample);
if (strval($predicted) == strval($label)) {
$sample = $this->checkNormalizedSample($sample);
return abs($this->output($sample) - 0.5);
}
return 0.0;
}
}

View File

@ -6,6 +6,8 @@ namespace Phpml\Classification\Linear;
use Phpml\Helper\Predictable;
use Phpml\Helper\OneVsRest;
use Phpml\Helper\Optimizer\StochasticGD;
use Phpml\Helper\Optimizer\GD;
use Phpml\Classification\Classifier;
use Phpml\Preprocessing\Normalizer;
@ -13,14 +15,6 @@ class Perceptron implements Classifier
{
use Predictable, OneVsRest;
/**
* The function whose result will be used to calculate the network error
* for each instance
*
* @var string
*/
protected static $errorFunction = 'outputClass';
/**
* @var array
*/
@ -62,12 +56,14 @@ class Perceptron implements Classifier
protected $normalizer;
/**
* Minimum amount of change in the weights between iterations
* that needs to be obtained to continue the training
*
* @var float
* @var bool
*/
protected $threshold = 1e-5;
protected $enableEarlyStop = true;
/**
* @var array
*/
protected $costValues = [];
/**
* Initalize a perceptron classifier with given learning rate and maximum
@ -97,20 +93,6 @@ class Perceptron implements Classifier
$this->maxIterations = $maxIterations;
}
/**
* Sets minimum value for the change in the weights
* between iterations to continue the iterations.<br>
*
* If the weight change is less than given value then the
* algorithm will stop training
*
* @param float $threshold
*/
public function setChangeThreshold(float $threshold = 1e-5)
{
$this->threshold = $threshold;
}
/**
* @param array $samples
* @param array $targets
@ -136,82 +118,72 @@ class Perceptron implements Classifier
$this->samples = array_merge($this->samples, $samples);
$this->featureCount = count($this->samples[0]);
// Init weights with random values
$this->weights = array_fill(0, $this->featureCount + 1, 0);
foreach ($this->weights as &$weight) {
$weight = rand() / (float) getrandmax();
}
// Do training
$this->runTraining();
}
/**
* Adapts the weights with respect to given samples and targets
* by use of perceptron learning rule
* Normally enabling early stopping for the optimization procedure may
* help saving processing time while in some cases it may result in
* premature convergence.<br>
*
* If "false" is given, the optimization procedure will always be executed
* for $maxIterations times
*
* @param bool $enable
*/
protected function runTraining()
public function setEarlyStop(bool $enable = true)
{
$currIter = 0;
$bestWeights = null;
$bestScore = count($this->samples);
$bestWeightIter = 0;
$this->enableEarlyStop = $enable;
while ($this->maxIterations > $currIter++) {
$weights = $this->weights;
$misClassified = 0;
foreach ($this->samples as $index => $sample) {
$target = $this->targets[$index];
$prediction = $this->{static::$errorFunction}($sample);
$update = $target - $prediction;
if ($target != $prediction) {
$misClassified++;
}
// Update bias
$this->weights[0] += $update * $this->learningRate; // Bias
// Update other weights
for ($i=1; $i <= $this->featureCount; $i++) {
$this->weights[$i] += $update * $sample[$i - 1] * $this->learningRate;
}
}
// Save the best weights in the "pocket" so that
// any future weights worse than this will be disregarded
if ($bestWeights == null || $misClassified <= $bestScore) {
$bestWeights = $weights;
$bestScore = $misClassified;
$bestWeightIter = $currIter;
}
// Check for early stop
if ($this->earlyStop($weights)) {
break;
}
}
// The weights in the pocket are better than or equal to the last state
// so, we use these weights
$this->weights = $bestWeights;
return $this;
}
/**
* @param array $oldWeights
* Returns the cost values obtained during the training.
*
* @return boolean
* @return array
*/
protected function earlyStop($oldWeights)
public function getCostValues()
{
// Check for early stop: No change larger than 1e-5
$diff = array_map(
function ($w1, $w2) {
return abs($w1 - $w2) > 1e-5 ? 1 : 0;
},
$oldWeights, $this->weights);
return $this->costValues;
}
if (array_sum($diff) == 0) {
return true;
}
/**
* Trains the perceptron model with Stochastic Gradient Descent optimization
* to get the correct set of weights
*/
protected function runTraining()
{
// The cost function is the sum of squares
$callback = function ($weights, $sample, $target) {
$this->weights = $weights;
return false;
$prediction = $this->outputClass($sample);
$gradient = $prediction - $target;
$error = $gradient**2;
return [$error, $gradient];
};
$this->runGradientDescent($callback);
}
/**
* Executes Stochastic Gradient Descent algorithm for
* the given cost function
*/
protected function runGradientDescent(\Closure $gradientFunc, bool $isBatch = false)
{
$class = $isBatch ? GD::class : StochasticGD::class;
$optimizer = (new $class($this->featureCount))
->setLearningRate($this->learningRate)
->setMaxIterations($this->maxIterations)
->setChangeThreshold(1e-6)
->setEarlyStop($this->enableEarlyStop);
$this->weights = $optimizer->runOptimization($this->samples, $this->targets, $gradientFunc);
$this->costValues = $optimizer->getCostValues();
}
/**

View File

@ -15,7 +15,7 @@ trait OneVsRest
* @var array
*/
protected $targets = [];
/**
* @var array
*/
@ -26,6 +26,11 @@ trait OneVsRest
*/
protected $labels;
/**
* @var array
*/
protected $costValues;
/**
* Train a binary classifier in the OvR style
*
@ -56,6 +61,12 @@ trait OneVsRest
$this->classifiers[$label] = $predictor;
}
}
// If the underlying classifier is capable of giving the cost values
// during the training, then assign it to the relevant variable
if (method_exists($this->classifiers[0], 'getCostValues')) {
$this->costValues = $this->classifiers[0]->getCostValues();
}
}
/**

View File

@ -0,0 +1,359 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
/**
* Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
* See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
*
* The method applied below is explained in the below document in a practical manner
* - http://web.cs.iastate.edu/~cs577/handouts/conjugate-gradient.pdf
*
* However it is compliant with the general Conjugate Gradient method with
* Fletcher-Reeves update method. Note that, the f(x) is assumed to be one-dimensional
* and one gradient is utilized for all dimensions in the given data.
*/
class ConjugateGradient extends GD
{
/**
* @param array $samples
* @param array $targets
* @param \Closure $gradientCb
*
* @return array
*/
public function runOptimization(array $samples, array $targets, \Closure $gradientCb)
{
$this->samples = $samples;
$this->targets = $targets;
$this->gradientCb = $gradientCb;
$this->sampleCount = count($samples);
$this->costValues = [];
$d = mp::muls($this->gradient($this->theta), -1);
for ($i=0; $i < $this->maxIterations; $i++) {
// Obtain α that minimizes f(θ + α.d)
$alpha = $this->getAlpha(array_sum($d));
// θ(k+1) = θ(k) + α.d
$thetaNew = $this->getNewTheta($alpha, $d);
// β = ||∇f(x(k+1))||² ||∇f(x(k))||²
$beta = $this->getBeta($thetaNew);
// d(k+1) =∇f(x(k+1)) + β(k).d(k)
$d = $this->getNewDirection($thetaNew, $beta, $d);
// Save values for the next iteration
$oldTheta = $this->theta;
$this->costValues[] = $this->cost($thetaNew);
$this->theta = $thetaNew;
if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) {
break;
}
}
return $this->theta;
}
/**
* Executes the callback function for the problem and returns
* sum of the gradient for all samples & targets.
*
* @param array $theta
*
* @return float
*/
protected function gradient(array $theta)
{
list($_, $gradient, $_) = parent::gradient($theta);
return $gradient;
}
/**
* Returns the value of f(x) for given solution
*
* @param array $theta
*
* @return float
*/
protected function cost(array $theta)
{
list($cost, $_, $_) = parent::gradient($theta);
return array_sum($cost) / $this->sampleCount;
}
/**
* Calculates alpha that minimizes the function f(θ + α.d)
* by performing a line search that does not rely upon the derivation.
*
* There are several alternatives for this function. For now, we
* prefer a method inspired from the bisection method for its simplicity.
* This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01
*
* Algorithm as follows:
* a) Probe a small alpha (0.0001) and calculate cost function
* b) Probe a larger alpha (0.01) and calculate cost function
* b-1) If cost function decreases, continue enlarging alpha
* b-2) If cost function increases, take the midpoint and try again
*
* @param float $d
*
* @return array
*/
protected function getAlpha(float $d)
{
$small = 0.0001 * $d;
$large = 0.01 * $d;
// Obtain θ + α.d for two initial values, x0 and x1
$x0 = mp::adds($this->theta, $small);
$x1 = mp::adds($this->theta, $large);
$epsilon = 0.0001;
$iteration = 0;
do {
$fx1 = $this->cost($x1);
$fx0 = $this->cost($x0);
// If the difference between two values is small enough
// then break the loop
if (abs($fx1 - $fx0) <= $epsilon) {
break;
}
if ($fx1 < $fx0) {
$x0 = $x1;
$x1 = mp::adds($x1, 0.01); // Enlarge second
} else {
$x1 = mp::divs(mp::add($x1, $x0), 2.0);
} // Get to the midpoint
$error = $fx1 / $this->dimensions;
} while ($error <= $epsilon || $iteration++ < 10);
// Return α = θ / d
if ($d == 0) {
return $x1[0] - $this->theta[0];
}
return ($x1[0] - $this->theta[0]) / $d;
}
/**
* Calculates new set of solutions with given alpha (for each θ(k)) and
* gradient direction.
*
* θ(k+1) = θ(k) + α.d
*
* @param float $alpha
* @param array $d
*
* return array
*/
protected function getNewTheta(float $alpha, array $d)
{
$theta = $this->theta;
for ($i=0; $i < $this->dimensions + 1; $i++) {
if ($i == 0) {
$theta[$i] += $alpha * array_sum($d);
} else {
$sum = 0.0;
foreach ($this->samples as $si => $sample) {
$sum += $sample[$i - 1] * $d[$si] * $alpha;
}
$theta[$i] += $sum;
}
}
return $theta;
}
/**
* Calculates new beta (β) for given set of solutions by using
* FletcherReeves method.
*
* β = ||f(x(k+1))||² ||f(x(k))||²
*
* See:
* R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149154.
*
* @param array $newTheta
*
* @return float
*/
protected function getBeta(array $newTheta)
{
$dNew = array_sum($this->gradient($newTheta));
$dOld = array_sum($this->gradient($this->theta)) + 1e-100;
return $dNew ** 2 / $dOld ** 2;
}
/**
* Calculates the new conjugate direction
*
* d(k+1) =∇f(x(k+1)) + β(k).d(k)
*
* @param array $theta
* @param float $beta
* @param array $d
*
* @return array
*/
protected function getNewDirection(array $theta, float $beta, array $d)
{
$grad = $this->gradient($theta);
return mp::add(mp::muls($grad, -1), mp::muls($d, $beta));
}
}
/**
* Handles element-wise vector operations between vector-vector
* and vector-scalar variables
*/
class mp
{
/**
* Element-wise <b>multiplication</b> of two vectors of the same size
*
* @param array $m1
* @param array $m2
*
* @return array
*/
public static function mul(array $m1, array $m2)
{
$res = [];
foreach ($m1 as $i => $val) {
$res[] = $val * $m2[$i];
}
return $res;
}
/**
* Element-wise <b>division</b> of two vectors of the same size
*
* @param array $m1
* @param array $m2
*
* @return array
*/
public static function div(array $m1, array $m2)
{
$res = [];
foreach ($m1 as $i => $val) {
$res[] = $val / $m2[$i];
}
return $res;
}
/**
* Element-wise <b>addition</b> of two vectors of the same size
*
* @param array $m1
* @param array $m2
*
* @return array
*/
public static function add(array $m1, array $m2, $mag = 1)
{
$res = [];
foreach ($m1 as $i => $val) {
$res[] = $val + $mag * $m2[$i];
}
return $res;
}
/**
* Element-wise <b>subtraction</b> of two vectors of the same size
*
* @param array $m1
* @param array $m2
*
* @return array
*/
public static function sub(array $m1, array $m2)
{
return self::add($m1, $m2, -1);
}
/**
* Element-wise <b>multiplication</b> of a vector with a scalar
*
* @param array $m1
* @param float $m2
*
* @return array
*/
public static function muls(array $m1, float $m2)
{
$res = [];
foreach ($m1 as $val) {
$res[] = $val * $m2;
}
return $res;
}
/**
* Element-wise <b>division</b> of a vector with a scalar
*
* @param array $m1
* @param float $m2
*
* @return array
*/
public static function divs(array $m1, float $m2)
{
$res = [];
foreach ($m1 as $val) {
$res[] = $val / ($m2 + 1e-32);
}
return $res;
}
/**
* Element-wise <b>addition</b> of a vector with a scalar
*
* @param array $m1
* @param float $m2
*
* @return array
*/
public static function adds(array $m1, float $m2, $mag = 1)
{
$res = [];
foreach ($m1 as $val) {
$res[] = $val + $mag * $m2;
}
return $res;
}
/**
* Element-wise <b>subtraction</b> of a vector with a scalar
*
* @param array $m1
* @param float $m2
*
* @return array
*/
public static function subs(array $m1, array $m2)
{
return self::adds($m1, $m2, -1);
}
}

View File

@ -0,0 +1,108 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
/**
* Batch version of Gradient Descent to optimize the weights
* of a classifier given samples, targets and the objective function to minimize
*/
class GD extends StochasticGD
{
/**
* Number of samples given
*
* @var int
*/
protected $sampleCount;
/**
* @param array $samples
* @param array $targets
* @param \Closure $gradientCb
*
* @return array
*/
public function runOptimization(array $samples, array $targets, \Closure $gradientCb)
{
$this->samples = $samples;
$this->targets = $targets;
$this->gradientCb = $gradientCb;
$this->sampleCount = count($this->samples);
// Batch learning is executed:
$currIter = 0;
$this->costValues = [];
while ($this->maxIterations > $currIter++) {
$theta = $this->theta;
// Calculate update terms for each sample
list($errors, $updates, $totalPenalty) = $this->gradient($theta);
$this->updateWeightsWithUpdates($updates, $totalPenalty);
$this->costValues[] = array_sum($errors)/$this->sampleCount;
if ($this->earlyStop($theta)) {
break;
}
}
return $this->theta;
}
/**
* Calculates gradient, cost function and penalty term for each sample
* then returns them as an array of values
*
* @param array $theta
*
* @return array
*/
protected function gradient(array $theta)
{
$costs = [];
$gradient= [];
$totalPenalty = 0;
foreach ($this->samples as $index => $sample) {
$target = $this->targets[$index];
$result = ($this->gradientCb)($theta, $sample, $target);
list($cost, $grad, $penalty) = array_pad($result, 3, 0);
$costs[] = $cost;
$gradient[]= $grad;
$totalPenalty += $penalty;
}
$totalPenalty /= $this->sampleCount;
return [$costs, $gradient, $totalPenalty];
}
/**
* @param array $updates
* @param float $penalty
*/
protected function updateWeightsWithUpdates(array $updates, float $penalty)
{
// Updates all weights at once
for ($i=0; $i <= $this->dimensions; $i++) {
if ($i == 0) {
$this->theta[0] -= $this->learningRate * array_sum($updates);
} else {
$col = array_column($this->samples, $i - 1);
$error = 0;
foreach ($col as $index => $val) {
$error += $val * $updates[$index];
}
$this->theta[$i] -= $this->learningRate *
($error + $penalty * $this->theta[$i]);
}
}
}
}

View File

@ -0,0 +1,61 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
abstract class Optimizer
{
/**
* Unknown variables to be found
*
* @var array
*/
protected $theta;
/**
* Number of dimensions
*
* @var int
*/
protected $dimensions;
/**
* Inits a new instance of Optimizer for the given number of dimensions
*
* @param int $dimensions
*/
public function __construct(int $dimensions)
{
$this->dimensions = $dimensions;
// Inits the weights randomly
$this->theta = [];
for ($i=0; $i < $this->dimensions; $i++) {
$this->theta[] = rand() / (float) getrandmax();
}
}
/**
* Sets the weights manually
*
* @param array $theta
*/
public function setInitialTheta(array $theta)
{
if (count($theta) != $this->dimensions) {
throw new \Exception("Number of values in the weights array should be $this->dimensions");
}
$this->theta = $theta;
return $this;
}
/**
* Executes the optimization with the given samples & targets
* and returns the weights
*
*/
abstract protected function runOptimization(array $samples, array $targets, \Closure $gradientCb);
}

View File

@ -0,0 +1,271 @@
<?php
declare(strict_types=1);
namespace Phpml\Helper\Optimizer;
/**
* Stochastic Gradient Descent optimization method
* to find a solution for the equation A.ϴ = y where
* A (samples) and y (targets) are known and ϴ is unknown.
*/
class StochasticGD extends Optimizer
{
/**
* A (samples)
*
* @var array
*/
protected $samples;
/**
* y (targets)
*
* @var array
*/
protected $targets;
/**
* Callback function to get the gradient and cost value
* for a specific set of theta (ϴ) and a pair of sample & target
*
* @var \Closure
*/
protected $gradientCb;
/**
* Maximum number of iterations used to train the model
*
* @var int
*/
protected $maxIterations = 1000;
/**
* Learning rate is used to control the speed of the optimization.<br>
*
* Larger values of lr may overshoot the optimum or even cause divergence
* while small values slows down the convergence and increases the time
* required for the training
*
* @var float
*/
protected $learningRate = 0.001;
/**
* Minimum amount of change in the weights and error values
* between iterations that needs to be obtained to continue the training
*
* @var float
*/
protected $threshold = 1e-4;
/**
* Enable/Disable early stopping by checking the weight & cost values
* to see whether they changed large enough to continue the optimization
*
* @var bool
*/
protected $enableEarlyStop = true;
/**
* List of values obtained by evaluating the cost function at each iteration
* of the algorithm
*
* @var array
*/
protected $costValues= [];
/**
* Initializes the SGD optimizer for the given number of dimensions
*
* @param int $dimensions
*/
public function __construct(int $dimensions)
{
// Add one more dimension for the bias
parent::__construct($dimensions + 1);
$this->dimensions = $dimensions;
}
/**
* Sets minimum value for the change in the theta values
* between iterations to continue the iterations.<br>
*
* If change in the theta is less than given value then the
* algorithm will stop training
*
* @param float $threshold
*
* @return $this
*/
public function setChangeThreshold(float $threshold = 1e-5)
{
$this->threshold = $threshold;
return $this;
}
/**
* Enable/Disable early stopping by checking at each iteration
* whether changes in theta or cost value are not large enough
*
* @param bool $enable
*
* @return $this
*/
public function setEarlyStop(bool $enable = true)
{
$this->enableEarlyStop = $enable;
return $this;
}
/**
* @param float $learningRate
*
* @return $this
*/
public function setLearningRate(float $learningRate)
{
$this->learningRate = $learningRate;
return $this;
}
/**
* @param int $maxIterations
*
* @return $this
*/
public function setMaxIterations(int $maxIterations)
{
$this->maxIterations = $maxIterations;
return $this;
}
/**
* Optimization procedure finds the unknow variables for the equation A.ϴ = y
* for the given samples (A) and targets (y).<br>
*
* The cost function to minimize and the gradient of the function are to be
* handled by the callback function provided as the third parameter of the method.
*
* @param array $samples
* @param array $targets
* @param \Closure $gradientCb
*
* @return array
*/
public function runOptimization(array $samples, array $targets, \Closure $gradientCb)
{
$this->samples = $samples;
$this->targets = $targets;
$this->gradientCb = $gradientCb;
$currIter = 0;
$bestTheta = null;
$bestScore = 0.0;
$bestWeightIter = 0;
$this->costValues = [];
while ($this->maxIterations > $currIter++) {
$theta = $this->theta;
// Update the guess
$cost = $this->updateTheta();
// Save the best theta in the "pocket" so that
// any future set of theta worse than this will be disregarded
if ($bestTheta == null || $cost <= $bestScore) {
$bestTheta = $theta;
$bestScore = $cost;
$bestWeightIter = $currIter;
}
// Add the cost value for this iteration to the list
$this->costValues[] = $cost;
// Check for early stop
if ($this->enableEarlyStop && $this->earlyStop($theta)) {
break;
}
}
// Solution in the pocket is better than or equal to the last state
// so, we use this solution
return $this->theta = $bestTheta;
}
/**
* @return float
*/
protected function updateTheta()
{
$jValue = 0.0;
$theta = $this->theta;
foreach ($this->samples as $index => $sample) {
$target = $this->targets[$index];
$result = ($this->gradientCb)($theta, $sample, $target);
list($error, $gradient, $penalty) = array_pad($result, 3, 0);
// Update bias
$this->theta[0] -= $this->learningRate * $gradient;
// Update other values
for ($i=1; $i <= $this->dimensions; $i++) {
$this->theta[$i] -= $this->learningRate *
($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]);
}
// Sum error rate
$jValue += $error;
}
return $jValue / count($this->samples);
}
/**
* Checks if the optimization is not effective enough and can be stopped
* in case large enough changes in the solution do not happen
*
* @param array $oldTheta
*
* @return boolean
*/
protected function earlyStop($oldTheta)
{
// Check for early stop: No change larger than threshold (default 1e-5)
$diff = array_map(
function ($w1, $w2) {
return abs($w1 - $w2) > $this->threshold ? 1 : 0;
},
$oldTheta, $this->theta);
if (array_sum($diff) == 0) {
return true;
}
// Check if the last two cost values are almost the same
$costs = array_slice($this->costValues, -2);
if (count($costs) == 2 && abs($costs[1] - $costs[0]) < $this->threshold) {
return true;
}
return false;
}
/**
* Returns the list of cost values for each iteration executed in
* last run of the optimization
*
* @return array
*/
public function getCostValues()
{
return $this->costValues;
}
}

View File

@ -2,7 +2,7 @@
declare(strict_types=1);
namespace tests\Classification\Linear;
namespace tests\Classification\Ensemble;
use Phpml\Classification\Ensemble\AdaBoost;
use Phpml\ModelManager;

View File

@ -16,6 +16,7 @@ class PerceptronTest extends TestCase
$samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.6, 0.6]];
$targets = [0, 0, 0, 1, 1];
$classifier = new Perceptron(0.001, 5000);
$classifier->setEarlyStop(false);
$classifier->train($samples, $targets);
$this->assertEquals(0, $classifier->predict([0.1, 0.2]));
$this->assertEquals(0, $classifier->predict([0, 1]));
@ -25,6 +26,7 @@ class PerceptronTest extends TestCase
$samples = [[0.1, 0.1], [0.4, 0.], [0., 0.3], [1, 0], [0, 1], [1, 1]];
$targets = [0, 0, 0, 1, 1, 1];
$classifier = new Perceptron(0.001, 5000, false);
$classifier->setEarlyStop(false);
$classifier->train($samples, $targets);
$this->assertEquals(0, $classifier->predict([0., 0.]));
$this->assertEquals(1, $classifier->predict([0.1, 0.99]));
@ -40,11 +42,12 @@ class PerceptronTest extends TestCase
$targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2];
$classifier = new Perceptron();
$classifier->setEarlyStop(false);
$classifier->train($samples, $targets);
$this->assertEquals(0, $classifier->predict([0.5, 0.5]));
$this->assertEquals(1, $classifier->predict([6.0, 5.0]));
$this->assertEquals(2, $classifier->predict([3.0, 9.5]));
return $classifier;
}