LogisticRegression classifier & Optimization methods (#63)

* LogisticRegression classifier & Optimization methods * Minor fixes to Logistic Regression & Optimizers PR * Addition for getCostValues() method
2017-03-28 00:46:53 +03:00 · 2017-03-28 00:46:53 +03:00 · 49234429f0
parent c44f3b2730
commit 49234429f0
10 changed files with 1162 additions and 152 deletions
--- a/src/Phpml/Classification/Linear/Adaline.php
+++ b/src/Phpml/Classification/Linear/Adaline.php
@ -19,14 +19,6 @@ class Adaline extends Perceptron
     */
    const ONLINE_TRAINING    = 2;

-    /**
-     * The function whose result will be used to calculate the network error
-     * for each instance
-     *
-     * @var string
-     */
-    protected static $errorFunction = 'output';
-
    /**
     * Training type may be either 'Batch' or 'Online' learning
     *
@ -64,62 +56,19 @@ class Adaline extends Perceptron
     */
    protected function runTraining()
    {
-        // If online training is chosen, then the parent runTraining method
-        // will be executed with the 'output' method as the error function
-        if ($this->trainingType == self::ONLINE_TRAINING) {
-            return parent::runTraining();
-        }
+        // The cost function is the sum of squares
+        $callback = function ($weights, $sample, $target) {
+            $this->weights = $weights;

-        // Batch learning is executed:
-        $currIter = 0;
-        while ($this->maxIterations > $currIter++) {
-            $weights = $this->weights;
+            $output = $this->output($sample);
+            $gradient = $output - $target;
+            $error = $gradient ** 2;

-            $outputs = array_map([$this, 'output'], $this->samples);
-            $updates = array_map([$this, 'gradient'], $this->targets, $outputs);
+            return [$error, $gradient];
+        };

-            $this->updateWeights($updates);
+        $isBatch = $this->trainingType == self::BATCH_TRAINING;

-            if ($this->earlyStop($weights)) {
-                break;
-            }
-        }
-    }
-
-    /**
-     * Returns the direction of gradient given the desired and actual outputs
-     *
-     * @param int $desired
-     * @param int $output
-     * @return int
-     */
-    protected function gradient($desired, $output)
-    {
-        return $desired - $output;
-    }
-
-    /**
-     * Updates the weights of the network given the direction of the
-     * gradient for each sample
-     *
-     * @param array $updates
-     */
-    protected function updateWeights(array $updates)
-    {
-        // Updates all weights at once
-        for ($i=0; $i <= $this->featureCount; $i++) {
-            if ($i == 0) {
-                $this->weights[0] += $this->learningRate * array_sum($updates);
-            } else {
-                $col = array_column($this->samples, $i - 1);
-
-                $error = 0;
-                foreach ($col as $index => $val) {
-                    $error += $val * $updates[$index];
-                }
-
-                $this->weights[$i] += $this->learningRate * $error;
-            }
-        }
+        return parent::runGradientDescent($callback, $isBatch);
    }
 }
--- a/src/Phpml/Classification/Linear/LogisticRegression.php
+++ b/src/Phpml/Classification/Linear/LogisticRegression.php
@ -0,0 +1,276 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Phpml\Classification\Linear;
+
+use Phpml\Classification\Classifier;
+use Phpml\Helper\Optimizer\ConjugateGradient;
+
+class LogisticRegression extends Adaline
+{
+
+    /**
+     * Batch training: Gradient descent algorithm (default)
+     */
+    const BATCH_TRAINING    = 1;
+
+    /**
+     * Online training: Stochastic gradient descent learning
+     */
+    const ONLINE_TRAINING    = 2;
+
+    /**
+     * Conjugate Batch: Conjugate Gradient algorithm
+     */
+    const CONJUGATE_GRAD_TRAINING = 3;
+
+    /**
+     * Cost function to optimize: 'log' and 'sse' are supported <br>
+     *  - 'log' : log likelihood <br>
+     *  - 'sse' : sum of squared errors <br>
+     *
+     * @var string
+     */
+    protected $costFunction = 'sse';
+
+    /**
+     * Regularization term: only 'L2' is supported
+     *
+     * @var string
+     */
+    protected $penalty = 'L2';
+
+    /**
+     * Lambda (λ) parameter of regularization term. If λ is set to 0, then
+     * regularization term is cancelled.
+     *
+     * @var float
+     */
+    protected $lambda = 0.5;
+
+    /**
+     * Initalize a Logistic Regression classifier with maximum number of iterations
+     * and learning rule to be applied <br>
+     *
+     * Maximum number of iterations can be an integer value greater than 0 <br>
+     * If normalizeInputs is set to true, then every input given to the algorithm will be standardized
+     * by use of standard deviation and mean calculation <br>
+     *
+     * Cost function can be 'log' for log-likelihood and 'sse' for sum of squared errors <br>
+     *
+     * Penalty (Regularization term) can be 'L2' or empty string to cancel penalty term
+     *
+     * @param int $maxIterations
+     * @param bool $normalizeInputs
+     * @param int $trainingType
+     * @param string $cost
+     * @param string $penalty
+     *
+     * @throws \Exception
+     */
+    public function __construct(int $maxIterations = 500, bool $normalizeInputs = true,
+        int $trainingType = self::CONJUGATE_GRAD_TRAINING, string $cost = 'sse',
+        string $penalty = 'L2')
+    {
+        $trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING);
+        if (! in_array($trainingType, $trainingTypes)) {
+            throw new \Exception("Logistic regression can only be trained with " .
+                "batch (gradient descent), online (stochastic gradient descent) " .
+                "or conjugate batch (conjugate gradients) algorithms");
+        }
+
+        if (! in_array($cost, ['log', 'sse'])) {
+            throw new \Exception("Logistic regression cost function can be one of the following: \n" .
+                "'log' for log-likelihood and 'sse' for sum of squared errors");
+        }
+
+        if ($penalty != '' && strtoupper($penalty) !== 'L2') {
+            throw new \Exception("Logistic regression supports only 'L2' regularization");
+        }
+
+        $this->learningRate = 0.001;
+
+        parent::__construct($this->learningRate, $maxIterations, $normalizeInputs);
+
+        $this->trainingType = $trainingType;
+        $this->costFunction = $cost;
+        $this->penalty = $penalty;
+    }
+
+    /**
+     * Sets the learning rate if gradient descent algorithm is
+     * selected for training
+     *
+     * @param float $learningRate
+     */
+    public function setLearningRate(float $learningRate)
+    {
+        $this->learningRate = $learningRate;
+    }
+
+    /**
+     * Lambda (λ) parameter of regularization term. If 0 is given,
+     * then the regularization term is cancelled
+     *
+     * @param float $lambda
+     */
+    public function setLambda(float $lambda)
+    {
+        $this->lambda = $lambda;
+    }
+
+    /**
+     * Adapts the weights with respect to given samples and targets
+     * by use of selected solver
+     */
+    protected function runTraining()
+    {
+        $callback = $this->getCostFunction();
+
+        switch ($this->trainingType) {
+            case self::BATCH_TRAINING:
+                return $this->runGradientDescent($callback, true);
+
+            case self::ONLINE_TRAINING:
+                return $this->runGradientDescent($callback, false);
+
+            case self::CONJUGATE_GRAD_TRAINING:
+                return $this->runConjugateGradient($callback);
+        }
+    }
+
+    /**
+     * Executes Conjugate Gradient method to optimize the
+     * weights of the LogReg model
+     */
+    protected function runConjugateGradient(\Closure $gradientFunc)
+    {
+        $optimizer = (new ConjugateGradient($this->featureCount))
+            ->setMaxIterations($this->maxIterations);
+
+        $this->weights = $optimizer->runOptimization($this->samples, $this->targets, $gradientFunc);
+        $this->costValues = $optimizer->getCostValues();
+    }
+
+    /**
+     * Returns the appropriate callback function for the selected cost function
+     *
+     * @return \Closure
+     */
+    protected function getCostFunction()
+    {
+        $penalty = 0;
+        if ($this->penalty == 'L2') {
+            $penalty = $this->lambda;
+        }
+
+        switch ($this->costFunction) {
+            case 'log':
+                /*
+                 * Negative of Log-likelihood cost function to be minimized:
+                 *		J(x) = ∑( - y . log(h(x)) - (1 - y) . log(1 - h(x)))
+                 *
+                 * If regularization term is given, then it will be added to the cost:
+                 *		for L2 : J(x) = J(x) +  λ/m . w
+                 *
+                 * The gradient of the cost function to be used with gradient descent:
+                 *		∇J(x) = -(y - h(x)) = (h(x) - y)
+                 */
+                $callback = function ($weights, $sample, $y) use ($penalty) {
+                    $this->weights = $weights;
+                    $hX = $this->output($sample);
+
+                    // In cases where $hX = 1 or $hX = 0, the log-likelihood
+                    // value will give a NaN, so we fix these values
+                    if ($hX == 1) {
+                        $hX = 1 - 1e-10;
+                    }
+                    if ($hX == 0) {
+                        $hX = 1e-10;
+                    }
+                    $error = -$y * log($hX) - (1 - $y) * log(1 - $hX);
+                    $gradient = $hX - $y;
+
+                    return [$error, $gradient, $penalty];
+                };
+
+                return $callback;
+
+            case 'sse':
+                /**
+                 * Sum of squared errors or least squared errors cost function:
+                 *		J(x) = ∑ (y - h(x))^2
+                 *
+                 * If regularization term is given, then it will be added to the cost:
+                 *		for L2 : J(x) = J(x) +  λ/m . w
+                 *
+                 * The gradient of the cost function:
+                 *		∇J(x) = -(h(x) - y) . h(x) . (1 - h(x))
+                 */
+                $callback = function ($weights, $sample, $y) use ($penalty) {
+                    $this->weights = $weights;
+                    $hX = $this->output($sample);
+
+                    $error = ($y - $hX) ** 2;
+                    $gradient = -($y - $hX) * $hX * (1 - $hX);
+
+                    return [$error, $gradient, $penalty];
+                };
+
+                return $callback;
+        }
+    }
+
+    /**
+     * Returns the output of the network, a float value between 0.0 and 1.0
+     *
+     * @param array $sample
+     *
+     * @return float
+     */
+    protected function output(array $sample)
+    {
+        $sum = parent::output($sample);
+
+        return 1.0 / (1.0 + exp(-$sum));
+    }
+
+    /**
+     * Returns the class value (either -1 or 1) for the given input
+     *
+     * @param array $sample
+     * @return int
+     */
+    protected function outputClass(array $sample)
+    {
+        $output = $this->output($sample);
+
+        if (round($output) > 0.5) {
+            return 1;
+        }
+
+        return -1;
+    }
+
+    /**
+     * Returns the probability of the sample of belonging to the given label.
+     *
+     * The probability is simply taken as the distance of the sample
+     * to the decision plane.
+     *
+     * @param array $sample
+     * @param mixed $label
+     */
+    protected function predictProbability(array $sample, $label)
+    {
+        $predicted = $this->predictSampleBinary($sample);
+
+        if (strval($predicted) == strval($label)) {
+            $sample = $this->checkNormalizedSample($sample);
+            return abs($this->output($sample) - 0.5);
+        }
+
+        return 0.0;
+    }
+}
--- a/src/Phpml/Classification/Linear/Perceptron.php
+++ b/src/Phpml/Classification/Linear/Perceptron.php
@ -6,6 +6,8 @@ namespace Phpml\Classification\Linear;

 use Phpml\Helper\Predictable;
 use Phpml\Helper\OneVsRest;
+use Phpml\Helper\Optimizer\StochasticGD;
+use Phpml\Helper\Optimizer\GD;
 use Phpml\Classification\Classifier;
 use Phpml\Preprocessing\Normalizer;

@ -13,14 +15,6 @@ class Perceptron implements Classifier
 {
    use Predictable, OneVsRest;

-    /**
-     * The function whose result will be used to calculate the network error
-     * for each instance
-     *
-     * @var string
-     */
-    protected static $errorFunction = 'outputClass';
-
   /**
     * @var array
     */
@ -62,12 +56,14 @@ class Perceptron implements Classifier
    protected $normalizer;

    /**
-     * Minimum amount of change in the weights between iterations
-     * that needs to be obtained to continue the training
-     *
-     * @var float
+     * @var bool
     */
-    protected $threshold = 1e-5;
+    protected $enableEarlyStop = true;
+
+    /**
+     * @var array
+     */
+    protected $costValues = [];

    /**
     * Initalize a perceptron classifier with given learning rate and maximum
@ -97,20 +93,6 @@ class Perceptron implements Classifier
        $this->maxIterations = $maxIterations;
    }

-    /**
-     * Sets minimum value for the change in the weights
-     * between iterations to continue the iterations.<br>
-     *
-     * If the weight change is less than given value then the
-     * algorithm will stop training
-     *
-     * @param float $threshold
-     */
-    public function setChangeThreshold(float $threshold = 1e-5)
-    {
-        $this->threshold = $threshold;
-    }
-
   /**
     * @param array $samples
     * @param array $targets
@ -136,82 +118,72 @@ class Perceptron implements Classifier
        $this->samples = array_merge($this->samples, $samples);
        $this->featureCount = count($this->samples[0]);

-        // Init weights with random values
-        $this->weights = array_fill(0, $this->featureCount + 1, 0);
-        foreach ($this->weights as &$weight) {
-            $weight = rand() / (float) getrandmax();
-        }
-        // Do training
        $this->runTraining();
    }

    /**
-     * Adapts the weights with respect to given samples and targets
-     * by use of perceptron learning rule
+     * Normally enabling early stopping for the optimization procedure may
+     * help saving processing time while in some cases it may result in
+     * premature convergence.<br>
+     *
+     * If "false" is given, the optimization procedure will always be executed
+     * for $maxIterations times
+     *
+     * @param bool $enable
     */
-    protected function runTraining()
+    public function setEarlyStop(bool $enable = true)
    {
-        $currIter = 0;
-        $bestWeights = null;
-        $bestScore = count($this->samples);
-        $bestWeightIter = 0;
+        $this->enableEarlyStop = $enable;

-        while ($this->maxIterations > $currIter++) {
-            $weights = $this->weights;
-            $misClassified = 0;
-            foreach ($this->samples as $index => $sample) {
-                $target = $this->targets[$index];
-                $prediction = $this->{static::$errorFunction}($sample);
-                $update = $target - $prediction;
-                if ($target != $prediction) {
-                    $misClassified++;
-                }
-                // Update bias
-                $this->weights[0] += $update * $this->learningRate; // Bias
-                // Update other weights
-                for ($i=1; $i <= $this->featureCount; $i++) {
-                    $this->weights[$i] += $update * $sample[$i - 1] * $this->learningRate;
-                }
-            }
-
-            // Save the best weights in the "pocket" so that
-            // any future weights worse than this will be disregarded
-            if ($bestWeights == null || $misClassified <= $bestScore) {
-                $bestWeights = $weights;
-                $bestScore = $misClassified;
-                $bestWeightIter = $currIter;
-            }
-
-            // Check for early stop
-            if ($this->earlyStop($weights)) {
-                break;
-            }
-        }
-
-        // The weights in the pocket are better than or equal to the last state
-        // so, we use these weights
-        $this->weights = $bestWeights;
+        return $this;
    }

    /**
-     * @param array $oldWeights
+     * Returns the cost values obtained during the training.
     *
-     * @return boolean
+     * @return array
     */
-    protected function earlyStop($oldWeights)
+    public function getCostValues()
    {
-        // Check for early stop: No change larger than 1e-5
-        $diff = array_map(
-            function ($w1, $w2) {
-                return abs($w1 - $w2) > 1e-5 ? 1 : 0;
-            },
-            $oldWeights, $this->weights);
+        return $this->costValues;
+    }

-        if (array_sum($diff) == 0) {
-            return true;
-        }
+    /**
+     * Trains the perceptron model with Stochastic Gradient Descent optimization
+     * to get the correct set of weights
+     */
+    protected function runTraining()
+    {
+        // The cost function is the sum of squares
+        $callback = function ($weights, $sample, $target) {
+            $this->weights = $weights;

-        return false;
+            $prediction = $this->outputClass($sample);
+            $gradient = $prediction - $target;
+            $error = $gradient**2;
+
+            return [$error, $gradient];
+        };
+
+        $this->runGradientDescent($callback);
+    }
+
+    /**
+     * Executes Stochastic Gradient Descent algorithm for
+     * the given cost function
+     */
+    protected function runGradientDescent(\Closure $gradientFunc, bool $isBatch = false)
+    {
+        $class = $isBatch ? GD::class :  StochasticGD::class;
+
+        $optimizer = (new $class($this->featureCount))
+            ->setLearningRate($this->learningRate)
+            ->setMaxIterations($this->maxIterations)
+            ->setChangeThreshold(1e-6)
+            ->setEarlyStop($this->enableEarlyStop);
+
+        $this->weights = $optimizer->runOptimization($this->samples, $this->targets, $gradientFunc);
+        $this->costValues = $optimizer->getCostValues();
    }

    /**
--- a/src/Phpml/Helper/OneVsRest.php
+++ b/src/Phpml/Helper/OneVsRest.php
@ -15,7 +15,7 @@ trait OneVsRest
     * @var array
     */
    protected $targets = [];
-    
+
    /**
     * @var array
     */
@ -26,6 +26,11 @@ trait OneVsRest
     */
    protected $labels;

+    /**
+     * @var array
+     */
+    protected $costValues;
+
    /**
     * Train a binary classifier in the OvR style
     *
@ -56,6 +61,12 @@ trait OneVsRest
                $this->classifiers[$label] = $predictor;
            }
        }
+        
+        // If the underlying classifier is capable of giving the cost values
+        // during the training, then assign it to the relevant variable
+        if (method_exists($this->classifiers[0], 'getCostValues')) {
+            $this->costValues = $this->classifiers[0]->getCostValues();
+        }
    }

    /**
--- a/src/Phpml/Helper/Optimizer/ConjugateGradient.php
+++ b/src/Phpml/Helper/Optimizer/ConjugateGradient.php
@ -0,0 +1,359 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Phpml\Helper\Optimizer;
+
+/**
+ * Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x
+ * See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)
+ *
+ * The method applied below is explained in the below document in a practical manner
+ *  - http://web.cs.iastate.edu/~cs577/handouts/conjugate-gradient.pdf
+ *
+ * However it is compliant with the general Conjugate Gradient method with
+ * Fletcher-Reeves update method. Note that, the f(x) is assumed to be one-dimensional
+ * and one gradient is utilized for all dimensions in the given data.
+ */
+class ConjugateGradient extends GD
+{
+    /**
+     * @param array $samples
+     * @param array $targets
+     * @param \Closure $gradientCb
+     *
+     * @return array
+     */
+    public function runOptimization(array $samples, array $targets, \Closure $gradientCb)
+    {
+        $this->samples = $samples;
+        $this->targets = $targets;
+        $this->gradientCb = $gradientCb;
+        $this->sampleCount = count($samples);
+        $this->costValues = [];
+
+        $d = mp::muls($this->gradient($this->theta), -1);
+
+        for ($i=0; $i < $this->maxIterations; $i++) {
+            // Obtain α that minimizes f(θ + α.d)
+            $alpha = $this->getAlpha(array_sum($d));
+
+            // θ(k+1) = θ(k) + α.d
+            $thetaNew = $this->getNewTheta($alpha, $d);
+
+            // β = ||∇f(x(k+1))||²  ∕  ||∇f(x(k))||²
+            $beta = $this->getBeta($thetaNew);
+
+            // d(k+1) =–∇f(x(k+1)) + β(k).d(k)
+            $d = $this->getNewDirection($thetaNew, $beta, $d);
+
+            // Save values for the next iteration
+            $oldTheta = $this->theta;
+            $this->costValues[] = $this->cost($thetaNew);
+
+            $this->theta = $thetaNew;
+            if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) {
+                break;
+            }
+        }
+
+        return $this->theta;
+    }
+
+    /**
+     * Executes the callback function for the problem and returns
+     * sum of the gradient for all samples & targets.
+     *
+     * @param array $theta
+     *
+     * @return float
+     */
+    protected function gradient(array $theta)
+    {
+        list($_, $gradient, $_) = parent::gradient($theta);
+
+        return $gradient;
+    }
+
+    /**
+     * Returns the value of f(x) for given solution
+     *
+     * @param array $theta
+     *
+     * @return float
+     */
+    protected function cost(array $theta)
+    {
+        list($cost, $_, $_) = parent::gradient($theta);
+
+        return array_sum($cost) / $this->sampleCount;
+    }
+
+    /**
+     * Calculates alpha that minimizes the function f(θ + α.d)
+     * by performing a line search that does not rely upon the derivation.
+     *
+     * There are several alternatives for this function. For now, we
+     * prefer a method inspired from the bisection method for its simplicity.
+     * This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01
+     *
+     * Algorithm as follows:
+     *  a) Probe a small alpha  (0.0001) and calculate cost function
+     *  b) Probe a larger alpha (0.01) and calculate cost function
+     *		b-1) If cost function decreases, continue enlarging alpha
+     *		b-2) If cost function increases, take the midpoint and try again
+     *
+     * @param float $d
+     *
+     * @return array
+     */
+    protected function getAlpha(float $d)
+    {
+        $small = 0.0001 * $d;
+        $large = 0.01 * $d;
+
+        // Obtain θ + α.d for two initial values, x0 and x1
+        $x0 = mp::adds($this->theta, $small);
+        $x1 = mp::adds($this->theta, $large);
+
+        $epsilon = 0.0001;
+        $iteration = 0;
+        do {
+            $fx1 = $this->cost($x1);
+            $fx0 = $this->cost($x0);
+
+            // If the difference between two values is small enough
+            // then break the loop
+            if (abs($fx1 - $fx0) <= $epsilon) {
+                break;
+            }
+
+            if ($fx1 < $fx0) {
+                $x0 = $x1;
+                $x1 = mp::adds($x1, 0.01); // Enlarge second
+            } else {
+                $x1 = mp::divs(mp::add($x1, $x0), 2.0);
+            } // Get to the midpoint
+
+            $error = $fx1 / $this->dimensions;
+        } while ($error <= $epsilon || $iteration++ < 10);
+
+        //  Return α = θ / d
+        if ($d == 0) {
+            return $x1[0] - $this->theta[0];
+        }
+
+        return ($x1[0] - $this->theta[0]) / $d;
+    }
+
+    /**
+     * Calculates new set of solutions with given alpha (for each θ(k)) and
+     * gradient direction.
+     *
+     * θ(k+1) = θ(k) + α.d
+     *
+     * @param float $alpha
+     * @param array $d
+     *
+     * return array
+     */
+    protected function getNewTheta(float $alpha, array $d)
+    {
+        $theta = $this->theta;
+
+        for ($i=0; $i < $this->dimensions + 1; $i++) {
+            if ($i == 0) {
+                $theta[$i] += $alpha * array_sum($d);
+            } else {
+                $sum = 0.0;
+                foreach ($this->samples as $si => $sample) {
+                    $sum += $sample[$i - 1] * $d[$si] * $alpha;
+                }
+
+                $theta[$i] += $sum;
+            }
+        }
+
+        return $theta;
+    }
+
+    /**
+     * Calculates new beta (β) for given set of solutions by using
+     * Fletcher–Reeves method.
+     *
+     * β = ||f(x(k+1))||²  ∕  ||f(x(k))||²
+     *
+     * See:
+     *  R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154.
+     *
+     * @param array $newTheta
+     *
+     * @return float
+     */
+    protected function getBeta(array $newTheta)
+    {
+        $dNew = array_sum($this->gradient($newTheta));
+        $dOld = array_sum($this->gradient($this->theta)) + 1e-100;
+
+        return  $dNew ** 2 / $dOld ** 2;
+    }
+
+    /**
+     * Calculates the new conjugate direction
+     *
+     * d(k+1) =–∇f(x(k+1)) + β(k).d(k)
+     *
+     * @param array $theta
+     * @param float $beta
+     * @param array $d
+     *
+     * @return array
+     */
+    protected function getNewDirection(array $theta, float $beta, array $d)
+    {
+        $grad = $this->gradient($theta);
+
+        return mp::add(mp::muls($grad, -1), mp::muls($d, $beta));
+    }
+}
+
+/**
+ * Handles element-wise vector operations between vector-vector
+ * and vector-scalar variables
+ */
+class mp
+{
+    /**
+     * Element-wise <b>multiplication</b> of two vectors of the same size
+     *
+     * @param array $m1
+     * @param array $m2
+     *
+     * @return array
+     */
+    public static function mul(array $m1, array $m2)
+    {
+        $res = [];
+        foreach ($m1 as $i => $val) {
+            $res[] = $val * $m2[$i];
+        }
+
+        return $res;
+    }
+
+    /**
+     * Element-wise <b>division</b> of two vectors of the same size
+     *
+     * @param array $m1
+     * @param array $m2
+     *
+     * @return array
+     */
+    public static function div(array $m1, array $m2)
+    {
+        $res = [];
+        foreach ($m1 as $i => $val) {
+            $res[] = $val / $m2[$i];
+        }
+
+        return $res;
+    }
+
+    /**
+     * Element-wise <b>addition</b> of two vectors of the same size
+     *
+     * @param array $m1
+     * @param array $m2
+     *
+     * @return array
+     */
+    public static function add(array $m1, array $m2, $mag = 1)
+    {
+        $res = [];
+        foreach ($m1 as $i => $val) {
+            $res[] = $val + $mag * $m2[$i];
+        }
+
+        return $res;
+    }
+
+    /**
+     * Element-wise <b>subtraction</b> of two vectors of the same size
+     *
+     * @param array $m1
+     * @param array $m2
+     *
+     * @return array
+     */
+    public static function sub(array $m1, array $m2)
+    {
+        return self::add($m1, $m2, -1);
+    }
+
+    /**
+     * Element-wise <b>multiplication</b> of a vector with a scalar
+     *
+     * @param array $m1
+     * @param float $m2
+     *
+     * @return array
+     */
+    public static function muls(array $m1, float $m2)
+    {
+        $res = [];
+        foreach ($m1 as $val) {
+            $res[] = $val * $m2;
+        }
+
+        return $res;
+    }
+
+    /**
+     * Element-wise <b>division</b> of a vector with a scalar
+     *
+     * @param array $m1
+     * @param float $m2
+     *
+     * @return array
+     */
+    public static function divs(array $m1, float $m2)
+    {
+        $res = [];
+        foreach ($m1 as $val) {
+            $res[] = $val / ($m2 + 1e-32);
+        }
+
+        return $res;
+    }
+
+    /**
+     * Element-wise <b>addition</b> of a vector with a scalar
+     *
+     * @param array $m1
+     * @param float $m2
+     *
+     * @return array
+     */
+    public static function adds(array $m1, float $m2, $mag = 1)
+    {
+        $res = [];
+        foreach ($m1 as $val) {
+            $res[] = $val + $mag * $m2;
+        }
+
+        return $res;
+    }
+
+    /**
+     * Element-wise <b>subtraction</b> of a vector with a scalar
+     *
+     * @param array $m1
+     * @param float $m2
+     *
+     * @return array
+     */
+    public static function subs(array $m1, array $m2)
+    {
+        return self::adds($m1, $m2, -1);
+    }
+}
--- a/src/Phpml/Helper/Optimizer/GD.php
+++ b/src/Phpml/Helper/Optimizer/GD.php
@ -0,0 +1,108 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Phpml\Helper\Optimizer;
+
+/**
+ * Batch version of Gradient Descent to optimize the weights
+ * of a classifier given samples, targets and the objective function to minimize
+ */
+class GD extends StochasticGD
+{
+    /**
+     * Number of samples given
+     *
+     * @var int
+     */
+    protected $sampleCount;
+
+    /**
+     * @param array $samples
+     * @param array $targets
+     * @param \Closure $gradientCb
+     *
+     * @return array
+     */
+    public function runOptimization(array $samples, array $targets, \Closure $gradientCb)
+    {
+        $this->samples = $samples;
+        $this->targets = $targets;
+        $this->gradientCb = $gradientCb;
+        $this->sampleCount = count($this->samples);
+
+        // Batch learning is executed:
+        $currIter = 0;
+        $this->costValues = [];
+        while ($this->maxIterations > $currIter++) {
+            $theta = $this->theta;
+
+            // Calculate update terms for each sample
+            list($errors, $updates, $totalPenalty) = $this->gradient($theta);
+
+            $this->updateWeightsWithUpdates($updates, $totalPenalty);
+
+            $this->costValues[] = array_sum($errors)/$this->sampleCount;
+
+            if ($this->earlyStop($theta)) {
+                break;
+            }
+        }
+
+        return $this->theta;
+    }
+
+    /**
+     * Calculates gradient, cost function and penalty term for each sample
+     * then returns them as an array of values
+     *
+     * @param array $theta
+     *
+     * @return array
+     */
+    protected function gradient(array $theta)
+    {
+        $costs = [];
+        $gradient= [];
+        $totalPenalty = 0;
+
+        foreach ($this->samples as $index => $sample) {
+            $target = $this->targets[$index];
+
+            $result = ($this->gradientCb)($theta, $sample, $target);
+            list($cost, $grad, $penalty) = array_pad($result, 3, 0);
+
+            $costs[] = $cost;
+            $gradient[]= $grad;
+            $totalPenalty += $penalty;
+        }
+
+        $totalPenalty /= $this->sampleCount;
+
+        return [$costs, $gradient, $totalPenalty];
+    }
+
+    /**
+     * @param array $updates
+     * @param float $penalty
+     */
+    protected function updateWeightsWithUpdates(array $updates, float $penalty)
+    {
+        // Updates all weights at once
+        for ($i=0; $i <= $this->dimensions; $i++) {
+            if ($i == 0) {
+                $this->theta[0] -= $this->learningRate * array_sum($updates);
+            } else {
+                $col = array_column($this->samples, $i - 1);
+
+                $error = 0;
+                foreach ($col as $index => $val) {
+                    $error += $val * $updates[$index];
+                }
+
+                $this->theta[$i] -= $this->learningRate *
+                    ($error + $penalty * $this->theta[$i]);
+            }
+        }
+    }
+}
--- a/src/Phpml/Helper/Optimizer/Optimizer.php
+++ b/src/Phpml/Helper/Optimizer/Optimizer.php
@ -0,0 +1,61 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Phpml\Helper\Optimizer;
+
+abstract class Optimizer
+{
+    /**
+     * Unknown variables to be found
+     *
+     * @var array
+     */
+    protected $theta;
+
+    /**
+     * Number of dimensions
+     *
+     * @var int
+     */
+    protected $dimensions;
+
+    /**
+     * Inits a new instance of Optimizer for the given number of dimensions
+     *
+     * @param int $dimensions
+     */
+    public function __construct(int $dimensions)
+    {
+        $this->dimensions = $dimensions;
+
+        // Inits the weights randomly
+        $this->theta = [];
+        for ($i=0; $i < $this->dimensions; $i++) {
+            $this->theta[] = rand() / (float) getrandmax();
+        }
+    }
+
+    /**
+     * Sets the weights manually
+     *
+     * @param array $theta
+     */
+    public function setInitialTheta(array $theta)
+    {
+        if (count($theta) != $this->dimensions) {
+            throw new \Exception("Number of values in the weights array should be $this->dimensions");
+        }
+
+        $this->theta = $theta;
+
+        return $this;
+    }
+
+    /**
+     * Executes the optimization with the given samples & targets
+     * and returns the weights
+     *
+     */
+    abstract protected function runOptimization(array $samples, array $targets, \Closure $gradientCb);
+}
--- a/src/Phpml/Helper/Optimizer/StochasticGD.php
+++ b/src/Phpml/Helper/Optimizer/StochasticGD.php
@ -0,0 +1,271 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Phpml\Helper\Optimizer;
+
+/**
+ * Stochastic Gradient Descent optimization method
+ * to find a solution for the equation A.ϴ = y where
+ *  A (samples) and y (targets) are known and ϴ is unknown.
+ */
+class StochasticGD extends Optimizer
+{
+    /**
+     * A (samples)
+     *
+     * @var array
+     */
+    protected $samples;
+
+    /**
+     * y (targets)
+     *
+     * @var array
+     */
+    protected $targets;
+
+    /**
+     * Callback function to get the gradient and cost value
+     * for a specific set of theta (ϴ) and a pair of sample & target
+     *
+     * @var \Closure
+     */
+    protected $gradientCb;
+
+    /**
+     * Maximum number of iterations used to train the model
+     *
+     * @var int
+     */
+    protected $maxIterations = 1000;
+
+    /**
+     * Learning rate is used to control the speed of the optimization.<br>
+     *
+     * Larger values of lr may overshoot the optimum or even cause divergence
+     * while small values slows down the convergence and increases the time
+     * required for the training
+     *
+     * @var float
+     */
+    protected $learningRate = 0.001;
+
+    /**
+     * Minimum amount of change in the weights and error values
+     * between iterations that needs to be obtained to continue the training
+     *
+     * @var float
+     */
+    protected $threshold = 1e-4;
+
+    /**
+     * Enable/Disable early stopping by checking the weight & cost values
+     * to see whether they changed large enough to continue the optimization
+     *
+     * @var bool
+     */
+    protected $enableEarlyStop = true;
+    /**
+     * List of values obtained by evaluating the cost function at each iteration
+     * of the algorithm
+     *
+     * @var array
+     */
+    protected $costValues= [];
+
+    /**
+     * Initializes the SGD optimizer for the given number of dimensions
+     *
+     * @param int $dimensions
+     */
+    public function __construct(int $dimensions)
+    {
+        // Add one more dimension for the bias
+        parent::__construct($dimensions + 1);
+
+        $this->dimensions = $dimensions;
+    }
+
+    /**
+     * Sets minimum value for the change in the theta values
+     * between iterations to continue the iterations.<br>
+     *
+     * If change in the theta is less than given value then the
+     * algorithm will stop training
+     *
+     * @param float $threshold
+     *
+     * @return $this
+     */
+    public function setChangeThreshold(float $threshold = 1e-5)
+    {
+        $this->threshold = $threshold;
+
+        return $this;
+    }
+
+    /**
+     * Enable/Disable early stopping by checking at each iteration
+     * whether changes in theta or cost value are not large enough
+     *
+     * @param bool $enable
+     *
+     * @return $this
+     */
+    public function setEarlyStop(bool $enable = true)
+    {
+        $this->enableEarlyStop = $enable;
+
+        return $this;
+    }
+
+    /**
+     * @param float $learningRate
+     *
+     * @return $this
+     */
+    public function setLearningRate(float $learningRate)
+    {
+        $this->learningRate = $learningRate;
+
+        return $this;
+    }
+
+    /**
+     * @param int $maxIterations
+     *
+     * @return $this
+     */
+    public function setMaxIterations(int $maxIterations)
+    {
+        $this->maxIterations = $maxIterations;
+
+        return $this;
+    }
+
+    /**
+     * Optimization procedure finds the unknow variables for the equation A.ϴ = y
+     * for the given samples (A) and targets (y).<br>
+     *
+     * The cost function to minimize and the gradient of the function are to be
+     * handled by the callback function provided as the third parameter of the method.
+     *
+     * @param array $samples
+     * @param array $targets
+     * @param \Closure $gradientCb
+     *
+     * @return array
+     */
+    public function runOptimization(array $samples, array $targets, \Closure $gradientCb)
+    {
+        $this->samples = $samples;
+        $this->targets = $targets;
+        $this->gradientCb = $gradientCb;
+
+        $currIter = 0;
+        $bestTheta = null;
+        $bestScore = 0.0;
+        $bestWeightIter = 0;
+        $this->costValues = [];
+
+        while ($this->maxIterations > $currIter++) {
+            $theta = $this->theta;
+
+            // Update the guess
+            $cost = $this->updateTheta();
+
+            // Save the best theta in the "pocket" so that
+            // any future set of theta worse than this will be disregarded
+            if ($bestTheta == null || $cost <= $bestScore) {
+                $bestTheta = $theta;
+                $bestScore = $cost;
+                $bestWeightIter = $currIter;
+            }
+
+            // Add the cost value for this iteration to the list
+            $this->costValues[] = $cost;
+
+            // Check for early stop
+            if ($this->enableEarlyStop && $this->earlyStop($theta)) {
+                break;
+            }
+        }
+
+        // Solution in the pocket is better than or equal to the last state
+        // so, we use this solution
+        return $this->theta = $bestTheta;
+    }
+
+    /**
+     * @return float
+     */
+    protected function updateTheta()
+    {
+        $jValue = 0.0;
+        $theta = $this->theta;
+
+        foreach ($this->samples as $index => $sample) {
+            $target = $this->targets[$index];
+
+            $result = ($this->gradientCb)($theta, $sample, $target);
+
+            list($error, $gradient, $penalty) = array_pad($result, 3, 0);
+
+            // Update bias
+            $this->theta[0] -= $this->learningRate * $gradient;
+
+            // Update other values
+            for ($i=1; $i <= $this->dimensions; $i++) {
+                $this->theta[$i] -= $this->learningRate *
+                    ($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]);
+            }
+
+            // Sum error rate
+            $jValue += $error;
+        }
+
+        return $jValue / count($this->samples);
+    }
+
+    /**
+     * Checks if the optimization is not effective enough and can be stopped
+     * in case large enough changes in the solution do not happen
+     *
+     * @param array $oldTheta
+     *
+     * @return boolean
+     */
+    protected function earlyStop($oldTheta)
+    {
+        // Check for early stop: No change larger than threshold (default 1e-5)
+        $diff = array_map(
+            function ($w1, $w2) {
+                return abs($w1 - $w2) > $this->threshold ? 1 : 0;
+            },
+            $oldTheta, $this->theta);
+
+        if (array_sum($diff) == 0) {
+            return true;
+        }
+
+        // Check if the last two cost values are almost the same
+        $costs = array_slice($this->costValues, -2);
+        if (count($costs) == 2 && abs($costs[1] - $costs[0]) < $this->threshold) {
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Returns the list of cost values for each iteration executed in
+     * last run of the optimization
+     *
+     * @return array
+     */
+    public function getCostValues()
+    {
+        return $this->costValues;
+    }
+}
--- a/tests/Phpml/Classification/Ensemble/AdaBoostTest.php
+++ b/tests/Phpml/Classification/Ensemble/AdaBoostTest.php
@ -2,7 +2,7 @@

 declare(strict_types=1);

-namespace tests\Classification\Linear;
+namespace tests\Classification\Ensemble;

 use Phpml\Classification\Ensemble\AdaBoost;
 use Phpml\ModelManager;
--- a/tests/Phpml/Classification/Linear/PerceptronTest.php
+++ b/tests/Phpml/Classification/Linear/PerceptronTest.php
@ -16,6 +16,7 @@ class PerceptronTest extends TestCase
        $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.6, 0.6]];
        $targets = [0, 0, 0, 1, 1];
        $classifier = new Perceptron(0.001, 5000);
+        $classifier->setEarlyStop(false);
        $classifier->train($samples, $targets);
        $this->assertEquals(0, $classifier->predict([0.1, 0.2]));
        $this->assertEquals(0, $classifier->predict([0, 1]));
@ -25,6 +26,7 @@ class PerceptronTest extends TestCase
        $samples = [[0.1, 0.1], [0.4, 0.], [0., 0.3], [1, 0], [0, 1], [1, 1]];
        $targets = [0, 0, 0, 1, 1, 1];
        $classifier = new Perceptron(0.001, 5000, false);
+        $classifier->setEarlyStop(false);
        $classifier->train($samples, $targets);
        $this->assertEquals(0, $classifier->predict([0., 0.]));
        $this->assertEquals(1, $classifier->predict([0.1, 0.99]));
@ -40,11 +42,12 @@ class PerceptronTest extends TestCase
        $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2];

        $classifier = new Perceptron();
+        $classifier->setEarlyStop(false);
        $classifier->train($samples, $targets);
        $this->assertEquals(0, $classifier->predict([0.5, 0.5]));
        $this->assertEquals(1, $classifier->predict([6.0, 5.0]));
        $this->assertEquals(2, $classifier->predict([3.0, 9.5]));
-        
+
        return $classifier;
    }