diff --git a/src/Phpml/Classification/Linear/Adaline.php b/src/Phpml/Classification/Linear/Adaline.php index 194451a..8d94be4 100644 --- a/src/Phpml/Classification/Linear/Adaline.php +++ b/src/Phpml/Classification/Linear/Adaline.php @@ -19,14 +19,6 @@ class Adaline extends Perceptron */ const ONLINE_TRAINING = 2; - /** - * The function whose result will be used to calculate the network error - * for each instance - * - * @var string - */ - protected static $errorFunction = 'output'; - /** * Training type may be either 'Batch' or 'Online' learning * @@ -64,62 +56,19 @@ class Adaline extends Perceptron */ protected function runTraining() { - // If online training is chosen, then the parent runTraining method - // will be executed with the 'output' method as the error function - if ($this->trainingType == self::ONLINE_TRAINING) { - return parent::runTraining(); - } + // The cost function is the sum of squares + $callback = function ($weights, $sample, $target) { + $this->weights = $weights; - // Batch learning is executed: - $currIter = 0; - while ($this->maxIterations > $currIter++) { - $weights = $this->weights; + $output = $this->output($sample); + $gradient = $output - $target; + $error = $gradient ** 2; - $outputs = array_map([$this, 'output'], $this->samples); - $updates = array_map([$this, 'gradient'], $this->targets, $outputs); + return [$error, $gradient]; + }; - $this->updateWeights($updates); + $isBatch = $this->trainingType == self::BATCH_TRAINING; - if ($this->earlyStop($weights)) { - break; - } - } - } - - /** - * Returns the direction of gradient given the desired and actual outputs - * - * @param int $desired - * @param int $output - * @return int - */ - protected function gradient($desired, $output) - { - return $desired - $output; - } - - /** - * Updates the weights of the network given the direction of the - * gradient for each sample - * - * @param array $updates - */ - protected function updateWeights(array $updates) - { - // Updates all weights at once - for ($i=0; $i <= $this->featureCount; $i++) { - if ($i == 0) { - $this->weights[0] += $this->learningRate * array_sum($updates); - } else { - $col = array_column($this->samples, $i - 1); - - $error = 0; - foreach ($col as $index => $val) { - $error += $val * $updates[$index]; - } - - $this->weights[$i] += $this->learningRate * $error; - } - } + return parent::runGradientDescent($callback, $isBatch); } } diff --git a/src/Phpml/Classification/Linear/LogisticRegression.php b/src/Phpml/Classification/Linear/LogisticRegression.php new file mode 100644 index 0000000..a0ec290 --- /dev/null +++ b/src/Phpml/Classification/Linear/LogisticRegression.php @@ -0,0 +1,276 @@ + + * - 'log' : log likelihood
+ * - 'sse' : sum of squared errors
+ * + * @var string + */ + protected $costFunction = 'sse'; + + /** + * Regularization term: only 'L2' is supported + * + * @var string + */ + protected $penalty = 'L2'; + + /** + * Lambda (λ) parameter of regularization term. If λ is set to 0, then + * regularization term is cancelled. + * + * @var float + */ + protected $lambda = 0.5; + + /** + * Initalize a Logistic Regression classifier with maximum number of iterations + * and learning rule to be applied
+ * + * Maximum number of iterations can be an integer value greater than 0
+ * If normalizeInputs is set to true, then every input given to the algorithm will be standardized + * by use of standard deviation and mean calculation
+ * + * Cost function can be 'log' for log-likelihood and 'sse' for sum of squared errors
+ * + * Penalty (Regularization term) can be 'L2' or empty string to cancel penalty term + * + * @param int $maxIterations + * @param bool $normalizeInputs + * @param int $trainingType + * @param string $cost + * @param string $penalty + * + * @throws \Exception + */ + public function __construct(int $maxIterations = 500, bool $normalizeInputs = true, + int $trainingType = self::CONJUGATE_GRAD_TRAINING, string $cost = 'sse', + string $penalty = 'L2') + { + $trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING); + if (! in_array($trainingType, $trainingTypes)) { + throw new \Exception("Logistic regression can only be trained with " . + "batch (gradient descent), online (stochastic gradient descent) " . + "or conjugate batch (conjugate gradients) algorithms"); + } + + if (! in_array($cost, ['log', 'sse'])) { + throw new \Exception("Logistic regression cost function can be one of the following: \n" . + "'log' for log-likelihood and 'sse' for sum of squared errors"); + } + + if ($penalty != '' && strtoupper($penalty) !== 'L2') { + throw new \Exception("Logistic regression supports only 'L2' regularization"); + } + + $this->learningRate = 0.001; + + parent::__construct($this->learningRate, $maxIterations, $normalizeInputs); + + $this->trainingType = $trainingType; + $this->costFunction = $cost; + $this->penalty = $penalty; + } + + /** + * Sets the learning rate if gradient descent algorithm is + * selected for training + * + * @param float $learningRate + */ + public function setLearningRate(float $learningRate) + { + $this->learningRate = $learningRate; + } + + /** + * Lambda (λ) parameter of regularization term. If 0 is given, + * then the regularization term is cancelled + * + * @param float $lambda + */ + public function setLambda(float $lambda) + { + $this->lambda = $lambda; + } + + /** + * Adapts the weights with respect to given samples and targets + * by use of selected solver + */ + protected function runTraining() + { + $callback = $this->getCostFunction(); + + switch ($this->trainingType) { + case self::BATCH_TRAINING: + return $this->runGradientDescent($callback, true); + + case self::ONLINE_TRAINING: + return $this->runGradientDescent($callback, false); + + case self::CONJUGATE_GRAD_TRAINING: + return $this->runConjugateGradient($callback); + } + } + + /** + * Executes Conjugate Gradient method to optimize the + * weights of the LogReg model + */ + protected function runConjugateGradient(\Closure $gradientFunc) + { + $optimizer = (new ConjugateGradient($this->featureCount)) + ->setMaxIterations($this->maxIterations); + + $this->weights = $optimizer->runOptimization($this->samples, $this->targets, $gradientFunc); + $this->costValues = $optimizer->getCostValues(); + } + + /** + * Returns the appropriate callback function for the selected cost function + * + * @return \Closure + */ + protected function getCostFunction() + { + $penalty = 0; + if ($this->penalty == 'L2') { + $penalty = $this->lambda; + } + + switch ($this->costFunction) { + case 'log': + /* + * Negative of Log-likelihood cost function to be minimized: + * J(x) = ∑( - y . log(h(x)) - (1 - y) . log(1 - h(x))) + * + * If regularization term is given, then it will be added to the cost: + * for L2 : J(x) = J(x) + λ/m . w + * + * The gradient of the cost function to be used with gradient descent: + * ∇J(x) = -(y - h(x)) = (h(x) - y) + */ + $callback = function ($weights, $sample, $y) use ($penalty) { + $this->weights = $weights; + $hX = $this->output($sample); + + // In cases where $hX = 1 or $hX = 0, the log-likelihood + // value will give a NaN, so we fix these values + if ($hX == 1) { + $hX = 1 - 1e-10; + } + if ($hX == 0) { + $hX = 1e-10; + } + $error = -$y * log($hX) - (1 - $y) * log(1 - $hX); + $gradient = $hX - $y; + + return [$error, $gradient, $penalty]; + }; + + return $callback; + + case 'sse': + /** + * Sum of squared errors or least squared errors cost function: + * J(x) = ∑ (y - h(x))^2 + * + * If regularization term is given, then it will be added to the cost: + * for L2 : J(x) = J(x) + λ/m . w + * + * The gradient of the cost function: + * ∇J(x) = -(h(x) - y) . h(x) . (1 - h(x)) + */ + $callback = function ($weights, $sample, $y) use ($penalty) { + $this->weights = $weights; + $hX = $this->output($sample); + + $error = ($y - $hX) ** 2; + $gradient = -($y - $hX) * $hX * (1 - $hX); + + return [$error, $gradient, $penalty]; + }; + + return $callback; + } + } + + /** + * Returns the output of the network, a float value between 0.0 and 1.0 + * + * @param array $sample + * + * @return float + */ + protected function output(array $sample) + { + $sum = parent::output($sample); + + return 1.0 / (1.0 + exp(-$sum)); + } + + /** + * Returns the class value (either -1 or 1) for the given input + * + * @param array $sample + * @return int + */ + protected function outputClass(array $sample) + { + $output = $this->output($sample); + + if (round($output) > 0.5) { + return 1; + } + + return -1; + } + + /** + * Returns the probability of the sample of belonging to the given label. + * + * The probability is simply taken as the distance of the sample + * to the decision plane. + * + * @param array $sample + * @param mixed $label + */ + protected function predictProbability(array $sample, $label) + { + $predicted = $this->predictSampleBinary($sample); + + if (strval($predicted) == strval($label)) { + $sample = $this->checkNormalizedSample($sample); + return abs($this->output($sample) - 0.5); + } + + return 0.0; + } +} diff --git a/src/Phpml/Classification/Linear/Perceptron.php b/src/Phpml/Classification/Linear/Perceptron.php index 32e41f2..8280bcb 100644 --- a/src/Phpml/Classification/Linear/Perceptron.php +++ b/src/Phpml/Classification/Linear/Perceptron.php @@ -6,6 +6,8 @@ namespace Phpml\Classification\Linear; use Phpml\Helper\Predictable; use Phpml\Helper\OneVsRest; +use Phpml\Helper\Optimizer\StochasticGD; +use Phpml\Helper\Optimizer\GD; use Phpml\Classification\Classifier; use Phpml\Preprocessing\Normalizer; @@ -13,14 +15,6 @@ class Perceptron implements Classifier { use Predictable, OneVsRest; - /** - * The function whose result will be used to calculate the network error - * for each instance - * - * @var string - */ - protected static $errorFunction = 'outputClass'; - /** * @var array */ @@ -62,12 +56,14 @@ class Perceptron implements Classifier protected $normalizer; /** - * Minimum amount of change in the weights between iterations - * that needs to be obtained to continue the training - * - * @var float + * @var bool */ - protected $threshold = 1e-5; + protected $enableEarlyStop = true; + + /** + * @var array + */ + protected $costValues = []; /** * Initalize a perceptron classifier with given learning rate and maximum @@ -97,20 +93,6 @@ class Perceptron implements Classifier $this->maxIterations = $maxIterations; } - /** - * Sets minimum value for the change in the weights - * between iterations to continue the iterations.
- * - * If the weight change is less than given value then the - * algorithm will stop training - * - * @param float $threshold - */ - public function setChangeThreshold(float $threshold = 1e-5) - { - $this->threshold = $threshold; - } - /** * @param array $samples * @param array $targets @@ -136,82 +118,72 @@ class Perceptron implements Classifier $this->samples = array_merge($this->samples, $samples); $this->featureCount = count($this->samples[0]); - // Init weights with random values - $this->weights = array_fill(0, $this->featureCount + 1, 0); - foreach ($this->weights as &$weight) { - $weight = rand() / (float) getrandmax(); - } - // Do training $this->runTraining(); } /** - * Adapts the weights with respect to given samples and targets - * by use of perceptron learning rule + * Normally enabling early stopping for the optimization procedure may + * help saving processing time while in some cases it may result in + * premature convergence.
+ * + * If "false" is given, the optimization procedure will always be executed + * for $maxIterations times + * + * @param bool $enable */ - protected function runTraining() + public function setEarlyStop(bool $enable = true) { - $currIter = 0; - $bestWeights = null; - $bestScore = count($this->samples); - $bestWeightIter = 0; + $this->enableEarlyStop = $enable; - while ($this->maxIterations > $currIter++) { - $weights = $this->weights; - $misClassified = 0; - foreach ($this->samples as $index => $sample) { - $target = $this->targets[$index]; - $prediction = $this->{static::$errorFunction}($sample); - $update = $target - $prediction; - if ($target != $prediction) { - $misClassified++; - } - // Update bias - $this->weights[0] += $update * $this->learningRate; // Bias - // Update other weights - for ($i=1; $i <= $this->featureCount; $i++) { - $this->weights[$i] += $update * $sample[$i - 1] * $this->learningRate; - } - } - - // Save the best weights in the "pocket" so that - // any future weights worse than this will be disregarded - if ($bestWeights == null || $misClassified <= $bestScore) { - $bestWeights = $weights; - $bestScore = $misClassified; - $bestWeightIter = $currIter; - } - - // Check for early stop - if ($this->earlyStop($weights)) { - break; - } - } - - // The weights in the pocket are better than or equal to the last state - // so, we use these weights - $this->weights = $bestWeights; + return $this; } /** - * @param array $oldWeights + * Returns the cost values obtained during the training. * - * @return boolean + * @return array */ - protected function earlyStop($oldWeights) + public function getCostValues() { - // Check for early stop: No change larger than 1e-5 - $diff = array_map( - function ($w1, $w2) { - return abs($w1 - $w2) > 1e-5 ? 1 : 0; - }, - $oldWeights, $this->weights); + return $this->costValues; + } - if (array_sum($diff) == 0) { - return true; - } + /** + * Trains the perceptron model with Stochastic Gradient Descent optimization + * to get the correct set of weights + */ + protected function runTraining() + { + // The cost function is the sum of squares + $callback = function ($weights, $sample, $target) { + $this->weights = $weights; - return false; + $prediction = $this->outputClass($sample); + $gradient = $prediction - $target; + $error = $gradient**2; + + return [$error, $gradient]; + }; + + $this->runGradientDescent($callback); + } + + /** + * Executes Stochastic Gradient Descent algorithm for + * the given cost function + */ + protected function runGradientDescent(\Closure $gradientFunc, bool $isBatch = false) + { + $class = $isBatch ? GD::class : StochasticGD::class; + + $optimizer = (new $class($this->featureCount)) + ->setLearningRate($this->learningRate) + ->setMaxIterations($this->maxIterations) + ->setChangeThreshold(1e-6) + ->setEarlyStop($this->enableEarlyStop); + + $this->weights = $optimizer->runOptimization($this->samples, $this->targets, $gradientFunc); + $this->costValues = $optimizer->getCostValues(); } /** diff --git a/src/Phpml/Helper/OneVsRest.php b/src/Phpml/Helper/OneVsRest.php index 1288363..9e7bc82 100644 --- a/src/Phpml/Helper/OneVsRest.php +++ b/src/Phpml/Helper/OneVsRest.php @@ -15,7 +15,7 @@ trait OneVsRest * @var array */ protected $targets = []; - + /** * @var array */ @@ -26,6 +26,11 @@ trait OneVsRest */ protected $labels; + /** + * @var array + */ + protected $costValues; + /** * Train a binary classifier in the OvR style * @@ -56,6 +61,12 @@ trait OneVsRest $this->classifiers[$label] = $predictor; } } + + // If the underlying classifier is capable of giving the cost values + // during the training, then assign it to the relevant variable + if (method_exists($this->classifiers[0], 'getCostValues')) { + $this->costValues = $this->classifiers[0]->getCostValues(); + } } /** diff --git a/src/Phpml/Helper/Optimizer/ConjugateGradient.php b/src/Phpml/Helper/Optimizer/ConjugateGradient.php new file mode 100644 index 0000000..9bcb338 --- /dev/null +++ b/src/Phpml/Helper/Optimizer/ConjugateGradient.php @@ -0,0 +1,359 @@ +samples = $samples; + $this->targets = $targets; + $this->gradientCb = $gradientCb; + $this->sampleCount = count($samples); + $this->costValues = []; + + $d = mp::muls($this->gradient($this->theta), -1); + + for ($i=0; $i < $this->maxIterations; $i++) { + // Obtain α that minimizes f(θ + α.d) + $alpha = $this->getAlpha(array_sum($d)); + + // θ(k+1) = θ(k) + α.d + $thetaNew = $this->getNewTheta($alpha, $d); + + // β = ||∇f(x(k+1))||² ∕ ||∇f(x(k))||² + $beta = $this->getBeta($thetaNew); + + // d(k+1) =–∇f(x(k+1)) + β(k).d(k) + $d = $this->getNewDirection($thetaNew, $beta, $d); + + // Save values for the next iteration + $oldTheta = $this->theta; + $this->costValues[] = $this->cost($thetaNew); + + $this->theta = $thetaNew; + if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) { + break; + } + } + + return $this->theta; + } + + /** + * Executes the callback function for the problem and returns + * sum of the gradient for all samples & targets. + * + * @param array $theta + * + * @return float + */ + protected function gradient(array $theta) + { + list($_, $gradient, $_) = parent::gradient($theta); + + return $gradient; + } + + /** + * Returns the value of f(x) for given solution + * + * @param array $theta + * + * @return float + */ + protected function cost(array $theta) + { + list($cost, $_, $_) = parent::gradient($theta); + + return array_sum($cost) / $this->sampleCount; + } + + /** + * Calculates alpha that minimizes the function f(θ + α.d) + * by performing a line search that does not rely upon the derivation. + * + * There are several alternatives for this function. For now, we + * prefer a method inspired from the bisection method for its simplicity. + * This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01 + * + * Algorithm as follows: + * a) Probe a small alpha (0.0001) and calculate cost function + * b) Probe a larger alpha (0.01) and calculate cost function + * b-1) If cost function decreases, continue enlarging alpha + * b-2) If cost function increases, take the midpoint and try again + * + * @param float $d + * + * @return array + */ + protected function getAlpha(float $d) + { + $small = 0.0001 * $d; + $large = 0.01 * $d; + + // Obtain θ + α.d for two initial values, x0 and x1 + $x0 = mp::adds($this->theta, $small); + $x1 = mp::adds($this->theta, $large); + + $epsilon = 0.0001; + $iteration = 0; + do { + $fx1 = $this->cost($x1); + $fx0 = $this->cost($x0); + + // If the difference between two values is small enough + // then break the loop + if (abs($fx1 - $fx0) <= $epsilon) { + break; + } + + if ($fx1 < $fx0) { + $x0 = $x1; + $x1 = mp::adds($x1, 0.01); // Enlarge second + } else { + $x1 = mp::divs(mp::add($x1, $x0), 2.0); + } // Get to the midpoint + + $error = $fx1 / $this->dimensions; + } while ($error <= $epsilon || $iteration++ < 10); + + // Return α = θ / d + if ($d == 0) { + return $x1[0] - $this->theta[0]; + } + + return ($x1[0] - $this->theta[0]) / $d; + } + + /** + * Calculates new set of solutions with given alpha (for each θ(k)) and + * gradient direction. + * + * θ(k+1) = θ(k) + α.d + * + * @param float $alpha + * @param array $d + * + * return array + */ + protected function getNewTheta(float $alpha, array $d) + { + $theta = $this->theta; + + for ($i=0; $i < $this->dimensions + 1; $i++) { + if ($i == 0) { + $theta[$i] += $alpha * array_sum($d); + } else { + $sum = 0.0; + foreach ($this->samples as $si => $sample) { + $sum += $sample[$i - 1] * $d[$si] * $alpha; + } + + $theta[$i] += $sum; + } + } + + return $theta; + } + + /** + * Calculates new beta (β) for given set of solutions by using + * Fletcher–Reeves method. + * + * β = ||f(x(k+1))||² ∕ ||f(x(k))||² + * + * See: + * R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154. + * + * @param array $newTheta + * + * @return float + */ + protected function getBeta(array $newTheta) + { + $dNew = array_sum($this->gradient($newTheta)); + $dOld = array_sum($this->gradient($this->theta)) + 1e-100; + + return $dNew ** 2 / $dOld ** 2; + } + + /** + * Calculates the new conjugate direction + * + * d(k+1) =–∇f(x(k+1)) + β(k).d(k) + * + * @param array $theta + * @param float $beta + * @param array $d + * + * @return array + */ + protected function getNewDirection(array $theta, float $beta, array $d) + { + $grad = $this->gradient($theta); + + return mp::add(mp::muls($grad, -1), mp::muls($d, $beta)); + } +} + +/** + * Handles element-wise vector operations between vector-vector + * and vector-scalar variables + */ +class mp +{ + /** + * Element-wise multiplication of two vectors of the same size + * + * @param array $m1 + * @param array $m2 + * + * @return array + */ + public static function mul(array $m1, array $m2) + { + $res = []; + foreach ($m1 as $i => $val) { + $res[] = $val * $m2[$i]; + } + + return $res; + } + + /** + * Element-wise division of two vectors of the same size + * + * @param array $m1 + * @param array $m2 + * + * @return array + */ + public static function div(array $m1, array $m2) + { + $res = []; + foreach ($m1 as $i => $val) { + $res[] = $val / $m2[$i]; + } + + return $res; + } + + /** + * Element-wise addition of two vectors of the same size + * + * @param array $m1 + * @param array $m2 + * + * @return array + */ + public static function add(array $m1, array $m2, $mag = 1) + { + $res = []; + foreach ($m1 as $i => $val) { + $res[] = $val + $mag * $m2[$i]; + } + + return $res; + } + + /** + * Element-wise subtraction of two vectors of the same size + * + * @param array $m1 + * @param array $m2 + * + * @return array + */ + public static function sub(array $m1, array $m2) + { + return self::add($m1, $m2, -1); + } + + /** + * Element-wise multiplication of a vector with a scalar + * + * @param array $m1 + * @param float $m2 + * + * @return array + */ + public static function muls(array $m1, float $m2) + { + $res = []; + foreach ($m1 as $val) { + $res[] = $val * $m2; + } + + return $res; + } + + /** + * Element-wise division of a vector with a scalar + * + * @param array $m1 + * @param float $m2 + * + * @return array + */ + public static function divs(array $m1, float $m2) + { + $res = []; + foreach ($m1 as $val) { + $res[] = $val / ($m2 + 1e-32); + } + + return $res; + } + + /** + * Element-wise addition of a vector with a scalar + * + * @param array $m1 + * @param float $m2 + * + * @return array + */ + public static function adds(array $m1, float $m2, $mag = 1) + { + $res = []; + foreach ($m1 as $val) { + $res[] = $val + $mag * $m2; + } + + return $res; + } + + /** + * Element-wise subtraction of a vector with a scalar + * + * @param array $m1 + * @param float $m2 + * + * @return array + */ + public static function subs(array $m1, array $m2) + { + return self::adds($m1, $m2, -1); + } +} diff --git a/src/Phpml/Helper/Optimizer/GD.php b/src/Phpml/Helper/Optimizer/GD.php new file mode 100644 index 0000000..1402930 --- /dev/null +++ b/src/Phpml/Helper/Optimizer/GD.php @@ -0,0 +1,108 @@ +samples = $samples; + $this->targets = $targets; + $this->gradientCb = $gradientCb; + $this->sampleCount = count($this->samples); + + // Batch learning is executed: + $currIter = 0; + $this->costValues = []; + while ($this->maxIterations > $currIter++) { + $theta = $this->theta; + + // Calculate update terms for each sample + list($errors, $updates, $totalPenalty) = $this->gradient($theta); + + $this->updateWeightsWithUpdates($updates, $totalPenalty); + + $this->costValues[] = array_sum($errors)/$this->sampleCount; + + if ($this->earlyStop($theta)) { + break; + } + } + + return $this->theta; + } + + /** + * Calculates gradient, cost function and penalty term for each sample + * then returns them as an array of values + * + * @param array $theta + * + * @return array + */ + protected function gradient(array $theta) + { + $costs = []; + $gradient= []; + $totalPenalty = 0; + + foreach ($this->samples as $index => $sample) { + $target = $this->targets[$index]; + + $result = ($this->gradientCb)($theta, $sample, $target); + list($cost, $grad, $penalty) = array_pad($result, 3, 0); + + $costs[] = $cost; + $gradient[]= $grad; + $totalPenalty += $penalty; + } + + $totalPenalty /= $this->sampleCount; + + return [$costs, $gradient, $totalPenalty]; + } + + /** + * @param array $updates + * @param float $penalty + */ + protected function updateWeightsWithUpdates(array $updates, float $penalty) + { + // Updates all weights at once + for ($i=0; $i <= $this->dimensions; $i++) { + if ($i == 0) { + $this->theta[0] -= $this->learningRate * array_sum($updates); + } else { + $col = array_column($this->samples, $i - 1); + + $error = 0; + foreach ($col as $index => $val) { + $error += $val * $updates[$index]; + } + + $this->theta[$i] -= $this->learningRate * + ($error + $penalty * $this->theta[$i]); + } + } + } +} diff --git a/src/Phpml/Helper/Optimizer/Optimizer.php b/src/Phpml/Helper/Optimizer/Optimizer.php new file mode 100644 index 0000000..9ef4c4d --- /dev/null +++ b/src/Phpml/Helper/Optimizer/Optimizer.php @@ -0,0 +1,61 @@ +dimensions = $dimensions; + + // Inits the weights randomly + $this->theta = []; + for ($i=0; $i < $this->dimensions; $i++) { + $this->theta[] = rand() / (float) getrandmax(); + } + } + + /** + * Sets the weights manually + * + * @param array $theta + */ + public function setInitialTheta(array $theta) + { + if (count($theta) != $this->dimensions) { + throw new \Exception("Number of values in the weights array should be $this->dimensions"); + } + + $this->theta = $theta; + + return $this; + } + + /** + * Executes the optimization with the given samples & targets + * and returns the weights + * + */ + abstract protected function runOptimization(array $samples, array $targets, \Closure $gradientCb); +} diff --git a/src/Phpml/Helper/Optimizer/StochasticGD.php b/src/Phpml/Helper/Optimizer/StochasticGD.php new file mode 100644 index 0000000..5379a28 --- /dev/null +++ b/src/Phpml/Helper/Optimizer/StochasticGD.php @@ -0,0 +1,271 @@ + + * + * Larger values of lr may overshoot the optimum or even cause divergence + * while small values slows down the convergence and increases the time + * required for the training + * + * @var float + */ + protected $learningRate = 0.001; + + /** + * Minimum amount of change in the weights and error values + * between iterations that needs to be obtained to continue the training + * + * @var float + */ + protected $threshold = 1e-4; + + /** + * Enable/Disable early stopping by checking the weight & cost values + * to see whether they changed large enough to continue the optimization + * + * @var bool + */ + protected $enableEarlyStop = true; + /** + * List of values obtained by evaluating the cost function at each iteration + * of the algorithm + * + * @var array + */ + protected $costValues= []; + + /** + * Initializes the SGD optimizer for the given number of dimensions + * + * @param int $dimensions + */ + public function __construct(int $dimensions) + { + // Add one more dimension for the bias + parent::__construct($dimensions + 1); + + $this->dimensions = $dimensions; + } + + /** + * Sets minimum value for the change in the theta values + * between iterations to continue the iterations.
+ * + * If change in the theta is less than given value then the + * algorithm will stop training + * + * @param float $threshold + * + * @return $this + */ + public function setChangeThreshold(float $threshold = 1e-5) + { + $this->threshold = $threshold; + + return $this; + } + + /** + * Enable/Disable early stopping by checking at each iteration + * whether changes in theta or cost value are not large enough + * + * @param bool $enable + * + * @return $this + */ + public function setEarlyStop(bool $enable = true) + { + $this->enableEarlyStop = $enable; + + return $this; + } + + /** + * @param float $learningRate + * + * @return $this + */ + public function setLearningRate(float $learningRate) + { + $this->learningRate = $learningRate; + + return $this; + } + + /** + * @param int $maxIterations + * + * @return $this + */ + public function setMaxIterations(int $maxIterations) + { + $this->maxIterations = $maxIterations; + + return $this; + } + + /** + * Optimization procedure finds the unknow variables for the equation A.ϴ = y + * for the given samples (A) and targets (y).
+ * + * The cost function to minimize and the gradient of the function are to be + * handled by the callback function provided as the third parameter of the method. + * + * @param array $samples + * @param array $targets + * @param \Closure $gradientCb + * + * @return array + */ + public function runOptimization(array $samples, array $targets, \Closure $gradientCb) + { + $this->samples = $samples; + $this->targets = $targets; + $this->gradientCb = $gradientCb; + + $currIter = 0; + $bestTheta = null; + $bestScore = 0.0; + $bestWeightIter = 0; + $this->costValues = []; + + while ($this->maxIterations > $currIter++) { + $theta = $this->theta; + + // Update the guess + $cost = $this->updateTheta(); + + // Save the best theta in the "pocket" so that + // any future set of theta worse than this will be disregarded + if ($bestTheta == null || $cost <= $bestScore) { + $bestTheta = $theta; + $bestScore = $cost; + $bestWeightIter = $currIter; + } + + // Add the cost value for this iteration to the list + $this->costValues[] = $cost; + + // Check for early stop + if ($this->enableEarlyStop && $this->earlyStop($theta)) { + break; + } + } + + // Solution in the pocket is better than or equal to the last state + // so, we use this solution + return $this->theta = $bestTheta; + } + + /** + * @return float + */ + protected function updateTheta() + { + $jValue = 0.0; + $theta = $this->theta; + + foreach ($this->samples as $index => $sample) { + $target = $this->targets[$index]; + + $result = ($this->gradientCb)($theta, $sample, $target); + + list($error, $gradient, $penalty) = array_pad($result, 3, 0); + + // Update bias + $this->theta[0] -= $this->learningRate * $gradient; + + // Update other values + for ($i=1; $i <= $this->dimensions; $i++) { + $this->theta[$i] -= $this->learningRate * + ($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]); + } + + // Sum error rate + $jValue += $error; + } + + return $jValue / count($this->samples); + } + + /** + * Checks if the optimization is not effective enough and can be stopped + * in case large enough changes in the solution do not happen + * + * @param array $oldTheta + * + * @return boolean + */ + protected function earlyStop($oldTheta) + { + // Check for early stop: No change larger than threshold (default 1e-5) + $diff = array_map( + function ($w1, $w2) { + return abs($w1 - $w2) > $this->threshold ? 1 : 0; + }, + $oldTheta, $this->theta); + + if (array_sum($diff) == 0) { + return true; + } + + // Check if the last two cost values are almost the same + $costs = array_slice($this->costValues, -2); + if (count($costs) == 2 && abs($costs[1] - $costs[0]) < $this->threshold) { + return true; + } + + return false; + } + + /** + * Returns the list of cost values for each iteration executed in + * last run of the optimization + * + * @return array + */ + public function getCostValues() + { + return $this->costValues; + } +} diff --git a/tests/Phpml/Classification/Ensemble/AdaBoostTest.php b/tests/Phpml/Classification/Ensemble/AdaBoostTest.php index c9e4d86..8c17752 100644 --- a/tests/Phpml/Classification/Ensemble/AdaBoostTest.php +++ b/tests/Phpml/Classification/Ensemble/AdaBoostTest.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace tests\Classification\Linear; +namespace tests\Classification\Ensemble; use Phpml\Classification\Ensemble\AdaBoost; use Phpml\ModelManager; diff --git a/tests/Phpml/Classification/Linear/PerceptronTest.php b/tests/Phpml/Classification/Linear/PerceptronTest.php index ef820fc..1f40c46 100644 --- a/tests/Phpml/Classification/Linear/PerceptronTest.php +++ b/tests/Phpml/Classification/Linear/PerceptronTest.php @@ -16,6 +16,7 @@ class PerceptronTest extends TestCase $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.6, 0.6]]; $targets = [0, 0, 0, 1, 1]; $classifier = new Perceptron(0.001, 5000); + $classifier->setEarlyStop(false); $classifier->train($samples, $targets); $this->assertEquals(0, $classifier->predict([0.1, 0.2])); $this->assertEquals(0, $classifier->predict([0, 1])); @@ -25,6 +26,7 @@ class PerceptronTest extends TestCase $samples = [[0.1, 0.1], [0.4, 0.], [0., 0.3], [1, 0], [0, 1], [1, 1]]; $targets = [0, 0, 0, 1, 1, 1]; $classifier = new Perceptron(0.001, 5000, false); + $classifier->setEarlyStop(false); $classifier->train($samples, $targets); $this->assertEquals(0, $classifier->predict([0., 0.])); $this->assertEquals(1, $classifier->predict([0.1, 0.99])); @@ -40,11 +42,12 @@ class PerceptronTest extends TestCase $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]; $classifier = new Perceptron(); + $classifier->setEarlyStop(false); $classifier->train($samples, $targets); $this->assertEquals(0, $classifier->predict([0.5, 0.5])); $this->assertEquals(1, $classifier->predict([6.0, 5.0])); $this->assertEquals(2, $classifier->predict([3.0, 9.5])); - + return $classifier; }