From 4b837fae8e55f4464856399fd9c825a6c9f838ec Mon Sep 17 00:00:00 2001 From: Pol Dellaiera Date: Wed, 6 Feb 2019 08:00:17 +0100 Subject: [PATCH] Improve distance performance and reduce duplication in distance classes. (#348) * Issue #347: Reduce duplicated code. * Issue #347: Replace array_* with regular loops for better perfomance. --- src/Math/Distance/Chebyshev.php | 23 ++++--------- src/Math/Distance/Distance.php | 61 +++++++++++++++++++++++++++++++++ src/Math/Distance/Euclidean.php | 28 ++++++--------- src/Math/Distance/Manhattan.php | 22 +++++------- src/Math/Distance/Minkowski.php | 38 ++++---------------- 5 files changed, 93 insertions(+), 79 deletions(-) create mode 100644 src/Math/Distance/Distance.php diff --git a/src/Math/Distance/Chebyshev.php b/src/Math/Distance/Chebyshev.php index 0ccd29a..3c7dbc2 100644 --- a/src/Math/Distance/Chebyshev.php +++ b/src/Math/Distance/Chebyshev.php @@ -4,27 +4,16 @@ declare(strict_types=1); namespace Phpml\Math\Distance; -use Phpml\Exception\InvalidArgumentException; -use Phpml\Math\Distance; - -class Chebyshev implements Distance +/** + * Class Chebyshev + */ +class Chebyshev extends Distance { /** - * @throws InvalidArgumentException + * {@inheritdoc} */ public function distance(array $a, array $b): float { - if (count($a) !== count($b)) { - throw new InvalidArgumentException('Size of given arrays does not match'); - } - - $differences = []; - $count = count($a); - - for ($i = 0; $i < $count; ++$i) { - $differences[] = abs($a[$i] - $b[$i]); - } - - return max($differences); + return max($this->deltas($a, $b)); } } diff --git a/src/Math/Distance/Distance.php b/src/Math/Distance/Distance.php new file mode 100644 index 0000000..ad9cdb9 --- /dev/null +++ b/src/Math/Distance/Distance.php @@ -0,0 +1,61 @@ +norm = $norm; + } + + /** + * @throws InvalidArgumentException + */ + public function distance(array $a, array $b): float + { + $distance = 0; + + foreach ($this->deltas($a, $b) as $delta) { + $distance += $delta ** $this->norm; + } + + return $distance ** (1 / $this->norm); + } + + /** + * @throws InvalidArgumentException + */ + protected function deltas(array $a, array $b): array + { + $count = count($a); + + if ($count !== count($b)) { + throw new InvalidArgumentException('Size of given arrays does not match'); + } + + $deltas = []; + + for ($i = 0; $i < $count; $i++) { + $deltas[] = abs($a[$i] - $b[$i]); + } + + return $deltas; + } +} diff --git a/src/Math/Distance/Euclidean.php b/src/Math/Distance/Euclidean.php index 4f437dc..4b7abc4 100644 --- a/src/Math/Distance/Euclidean.php +++ b/src/Math/Distance/Euclidean.php @@ -4,31 +4,25 @@ declare(strict_types=1); namespace Phpml\Math\Distance; -use Phpml\Exception\InvalidArgumentException; -use Phpml\Math\Distance; - -class Euclidean implements Distance +/** + * Class Euclidean + * + * L^2 Metric. + */ +class Euclidean extends Distance { /** - * @throws InvalidArgumentException + * Euclidean constructor. */ - public function distance(array $a, array $b): float + public function __construct() { - if (count($a) !== count($b)) { - throw new InvalidArgumentException('Size of given arrays does not match'); - } - - $distance = 0; - - foreach ($a as $i => $val) { - $distance += ($val - $b[$i]) ** 2; - } - - return sqrt((float) $distance); + parent::__construct(2.0); } /** * Square of Euclidean distance + * + * @throws \Phpml\Exception\InvalidArgumentException */ public function sqDistance(array $a, array $b): float { diff --git a/src/Math/Distance/Manhattan.php b/src/Math/Distance/Manhattan.php index 459a5ec..21ddee2 100644 --- a/src/Math/Distance/Manhattan.php +++ b/src/Math/Distance/Manhattan.php @@ -4,22 +4,18 @@ declare(strict_types=1); namespace Phpml\Math\Distance; -use Phpml\Exception\InvalidArgumentException; -use Phpml\Math\Distance; - -class Manhattan implements Distance +/** + * Class Manhattan + * + * L^1 Metric. + */ +class Manhattan extends Distance { /** - * @throws InvalidArgumentException + * Manhattan constructor. */ - public function distance(array $a, array $b): float + public function __construct() { - if (count($a) !== count($b)) { - throw new InvalidArgumentException('Size of given arrays does not match'); - } - - return array_sum(array_map(function ($m, $n) { - return abs($m - $n); - }, $a, $b)); + parent::__construct(1.0); } } diff --git a/src/Math/Distance/Minkowski.php b/src/Math/Distance/Minkowski.php index 36edf9b..0ed5829 100644 --- a/src/Math/Distance/Minkowski.php +++ b/src/Math/Distance/Minkowski.php @@ -4,37 +4,11 @@ declare(strict_types=1); namespace Phpml\Math\Distance; -use Phpml\Exception\InvalidArgumentException; -use Phpml\Math\Distance; - -class Minkowski implements Distance +/** + * Class Minkowski + * + * L^n Metric. + */ +class Minkowski extends Distance { - /** - * @var float - */ - private $lambda; - - public function __construct(float $lambda = 3.0) - { - $this->lambda = $lambda; - } - - /** - * @throws InvalidArgumentException - */ - public function distance(array $a, array $b): float - { - if (count($a) !== count($b)) { - throw new InvalidArgumentException('Size of given arrays does not match'); - } - - $distance = 0; - $count = count($a); - - for ($i = 0; $i < $count; ++$i) { - $distance += pow(abs($a[$i] - $b[$i]), $this->lambda); - } - - return (float) pow($distance, 1 / $this->lambda); - } }