From f6a561158654ea8689fd3155f586ccb76106ab27 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Mon, 4 Apr 2016 21:50:30 +0200 Subject: [PATCH 01/27] move functions definitions --- src/Phpml/Classifier/NaiveBayes.php | 16 ---------------- .../NaiveBayes/GaussianNaiveBayes.php | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Phpml/Classifier/NaiveBayes.php b/src/Phpml/Classifier/NaiveBayes.php index e852645..7207409 100644 --- a/src/Phpml/Classifier/NaiveBayes.php +++ b/src/Phpml/Classifier/NaiveBayes.php @@ -5,22 +5,6 @@ namespace Phpml\Classifier; abstract class NaiveBayes implements Classifier { - /** - * @param array $features - * @param array $labels - */ - public function train($features, $labels) - { - } - - /** - * @param mixed $feature - * @return mixed - */ - public function predict($feature) - { - - } } diff --git a/src/Phpml/Classifier/NaiveBayes/GaussianNaiveBayes.php b/src/Phpml/Classifier/NaiveBayes/GaussianNaiveBayes.php index df9772f..33117fa 100644 --- a/src/Phpml/Classifier/NaiveBayes/GaussianNaiveBayes.php +++ b/src/Phpml/Classifier/NaiveBayes/GaussianNaiveBayes.php @@ -5,4 +5,22 @@ use Phpml\Classifier\NaiveBayes; class GaussianNaiveBayes extends NaiveBayes { + /** + * @param array $features + * @param array $labels + */ + public function train($features, $labels) + { + + } + + /** + * @param mixed $feature + * @return mixed + */ + public function predict($feature) + { + + } + } From ce1653a5a768bd96d7f0c265d63419e299db05db Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Mon, 4 Apr 2016 22:25:27 +0200 Subject: [PATCH 02/27] create euclidean distance function --- src/Phpml/Classifier/Classifier.php | 3 +- src/Phpml/Classifier/KNearestNeighbors.php | 54 +++++++++++++++++++ src/Phpml/Classifier/NaiveBayes.php | 19 ++++++- .../NaiveBayes/GaussianNaiveBayes.php | 26 --------- .../Exception/InvalidArgumentException.php | 17 ++++++ src/Phpml/Metric/Distance.php | 36 +++++++++++++ 6 files changed, 127 insertions(+), 28 deletions(-) create mode 100644 src/Phpml/Classifier/KNearestNeighbors.php delete mode 100644 src/Phpml/Classifier/NaiveBayes/GaussianNaiveBayes.php create mode 100644 src/Phpml/Exception/InvalidArgumentException.php create mode 100644 src/Phpml/Metric/Distance.php diff --git a/src/Phpml/Classifier/Classifier.php b/src/Phpml/Classifier/Classifier.php index ea2bbf1..1d2362e 100644 --- a/src/Phpml/Classifier/Classifier.php +++ b/src/Phpml/Classifier/Classifier.php @@ -1,4 +1,5 @@ k = $k; + $this->features = []; + $this->labels = []; + } + + + /** + * @param array $features + * @param array $labels + */ + public function train(array $features, array $labels) + { + $this->features = $features; + $this->labels = $labels; + } + + /** + * @param mixed $feature + * @return mixed + */ + public function predict($feature) + { + + } + +} diff --git a/src/Phpml/Classifier/NaiveBayes.php b/src/Phpml/Classifier/NaiveBayes.php index 7207409..8e31eff 100644 --- a/src/Phpml/Classifier/NaiveBayes.php +++ b/src/Phpml/Classifier/NaiveBayes.php @@ -1,10 +1,27 @@ Date: Mon, 4 Apr 2016 22:38:51 +0200 Subject: [PATCH 03/27] create phpunit configuration and first tests --- composer.json | 5 +++++ phpunit.xml | 14 ++++++++++++++ .../Exception/InvalidArgumentException.php | 4 ++-- src/Phpml/Metric/Distance.php | 4 ++-- tests/Phpml/Metric/DistanceTest.php | 18 ++++++++++++++++++ 5 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 phpunit.xml create mode 100644 tests/Phpml/Metric/DistanceTest.php diff --git a/composer.json b/composer.json index 7f6a744..d8d5026 100644 --- a/composer.json +++ b/composer.json @@ -11,6 +11,11 @@ "email": "arkadiusz.kondas@gmail.com" } ], + "autoload": { + "psr-4": { + "": "src/" + } + }, "config": { "bin-dir": "bin" }, diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..d31e033 --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,14 @@ + + + + + tests/* + + ​ + diff --git a/src/Phpml/Exception/InvalidArgumentException.php b/src/Phpml/Exception/InvalidArgumentException.php index c88ab50..4ee194b 100644 --- a/src/Phpml/Exception/InvalidArgumentException.php +++ b/src/Phpml/Exception/InvalidArgumentException.php @@ -9,9 +9,9 @@ class InvalidArgumentException extends \Exception /** * @return InvalidArgumentException */ - public static function parametersSizeNotMatch() + public static function sizeNotMatch() { - return new self('Size of parameters not match'); + return new self('Size of given arguments not match'); } } diff --git a/src/Phpml/Metric/Distance.php b/src/Phpml/Metric/Distance.php index e1e64c6..581b73c 100644 --- a/src/Phpml/Metric/Distance.php +++ b/src/Phpml/Metric/Distance.php @@ -16,11 +16,11 @@ class Distance * * @throws InvalidArgumentException */ - public static function euclidean(array $a, array $b): float + public static function euclidean(array $a, array $b): float { if(count($a) != count($b)) { - throw InvalidArgumentException::parametersSizeNotMatch(); + throw InvalidArgumentException::sizeNotMatch(); } $distance = 0; diff --git a/tests/Phpml/Metric/DistanceTest.php b/tests/Phpml/Metric/DistanceTest.php new file mode 100644 index 0000000..947bb81 --- /dev/null +++ b/tests/Phpml/Metric/DistanceTest.php @@ -0,0 +1,18 @@ + Date: Mon, 4 Apr 2016 22:48:07 +0200 Subject: [PATCH 04/27] add more Euclidean distance tests --- tests/Phpml/Metric/DistanceTest.php | 37 +++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/Phpml/Metric/DistanceTest.php b/tests/Phpml/Metric/DistanceTest.php index 947bb81..aad4c05 100644 --- a/tests/Phpml/Metric/DistanceTest.php +++ b/tests/Phpml/Metric/DistanceTest.php @@ -10,9 +10,42 @@ class DistanceTest extends \PHPUnit_Framework_TestCase /** * @expectedException \Phpml\Exception\InvalidArgumentException */ - public function testThrowExceptionOnInvalidArguments() + public function testThrowExceptionOnInvalidArgumentsInEuclidean() { - Distance::euclidean([0, 1, 2], [0, 2]); + $a = [0, 1, 2]; + $b = [0, 2]; + + Distance::euclidean($a, $b); + } + + public function testCalculateEuclideanDistanceForOneDimension() + { + $a = [4]; + $b = [2]; + + $expectedDistance = 2; + $actualDistance = Distance::euclidean($a, $b); + + \PHPUnit_Framework_Assert::assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateEuclideanDistanceForTwoAndMoreDimension() + { + $a = [4, 6]; + $b = [2, 5]; + + $expectedDistance = 2.2360679774998; + $actualDistance = Distance::euclidean($a, $b); + + \PHPUnit_Framework_Assert::assertEquals($expectedDistance, $actualDistance); + + $a = [6, 10, 3]; + $b = [2, 5, 5]; + + $expectedDistance = 6.7082039324993694; + $actualDistance = Distance::euclidean($a, $b); + + \PHPUnit_Framework_Assert::assertEquals($expectedDistance, $actualDistance); } } From bd31f9a025932b1f53864f425c96c6ab99a06465 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Mon, 4 Apr 2016 22:49:54 +0200 Subject: [PATCH 05/27] php-cs-fixer --- src/Phpml/Classifier/Classifier.php | 6 +++--- src/Phpml/Classifier/KNearestNeighbors.php | 8 +++----- src/Phpml/Classifier/NaiveBayes.php | 8 +++----- src/Phpml/Dataset/Dataset.php | 1 - src/Phpml/Dataset/Iris.php | 1 - src/Phpml/Exception/InvalidArgumentException.php | 5 ++--- src/Phpml/Metric/Distance.php | 11 ++++------- tests/Phpml/Metric/DistanceTest.php | 4 ++-- 8 files changed, 17 insertions(+), 27 deletions(-) diff --git a/src/Phpml/Classifier/Classifier.php b/src/Phpml/Classifier/Classifier.php index 1d2362e..face7b2 100644 --- a/src/Phpml/Classifier/Classifier.php +++ b/src/Phpml/Classifier/Classifier.php @@ -1,11 +1,11 @@ labels = []; } - /** * @param array $features * @param array $labels @@ -44,11 +43,10 @@ class KNearestNeighbors implements Classifier /** * @param mixed $feature + * * @return mixed */ public function predict($feature) { - } - } diff --git a/src/Phpml/Classifier/NaiveBayes.php b/src/Phpml/Classifier/NaiveBayes.php index 8e31eff..ed6bb8c 100644 --- a/src/Phpml/Classifier/NaiveBayes.php +++ b/src/Phpml/Classifier/NaiveBayes.php @@ -1,27 +1,25 @@ Date: Mon, 4 Apr 2016 22:50:14 +0200 Subject: [PATCH 06/27] create php-cs-fixer start script --- tools/php-cs-fixer.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 tools/php-cs-fixer.sh diff --git a/tools/php-cs-fixer.sh b/tools/php-cs-fixer.sh new file mode 100755 index 0000000..dbf66e4 --- /dev/null +++ b/tools/php-cs-fixer.sh @@ -0,0 +1,6 @@ +#!/bin/bash +echo "Fixing src/ folder" +php-cs-fixer fix src/ --level=symfony + +echo "Fixing tests/ folder" +php-cs-fixer fix tests/ --level=symfony From 4235f143bf148e7899ff9d080056decc1a31b172 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Tue, 5 Apr 2016 20:46:57 +0200 Subject: [PATCH 07/27] update composer php required version to 7 --- composer.json | 2 +- composer.lock | 183 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 141 insertions(+), 44 deletions(-) diff --git a/composer.json b/composer.json index d8d5026..66083b2 100644 --- a/composer.json +++ b/composer.json @@ -20,7 +20,7 @@ "bin-dir": "bin" }, "require": { - "php": ">=5.5.0" + "php": ">=7.0.5" }, "require-dev": { "phpunit/phpunit": "^5.2" diff --git a/composer.lock b/composer.lock index 1117546..0b9287b 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,8 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "640f762012a359b150ce245491743448", - "content-hash": "5efa8db5a672e2128d20c80c18746c72", + "hash": "54ea009d9b09c1886c0789dadc484a36", + "content-hash": "52c31f7df9edd7063e2489ed47e091a7", "packages": [], "packages-dev": [ { @@ -155,22 +155,24 @@ }, { "name": "phpspec/prophecy", - "version": "v1.5.0", + "version": "v1.6.0", "source": { "type": "git", "url": "https://github.com/phpspec/prophecy.git", - "reference": "4745ded9307786b730d7a60df5cb5a6c43cf95f7" + "reference": "3c91bdf81797d725b14cb62906f9a4ce44235972" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/phpspec/prophecy/zipball/4745ded9307786b730d7a60df5cb5a6c43cf95f7", - "reference": "4745ded9307786b730d7a60df5cb5a6c43cf95f7", + "url": "https://api.github.com/repos/phpspec/prophecy/zipball/3c91bdf81797d725b14cb62906f9a4ce44235972", + "reference": "3c91bdf81797d725b14cb62906f9a4ce44235972", "shasum": "" }, "require": { "doctrine/instantiator": "^1.0.2", + "php": "^5.3|^7.0", "phpdocumentor/reflection-docblock": "~2.0", - "sebastian/comparator": "~1.1" + "sebastian/comparator": "~1.1", + "sebastian/recursion-context": "~1.0" }, "require-dev": { "phpspec/phpspec": "~2.0" @@ -178,7 +180,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "1.4.x-dev" + "dev-master": "1.5.x-dev" } }, "autoload": { @@ -211,27 +213,28 @@ "spy", "stub" ], - "time": "2015-08-13 10:07:40" + "time": "2016-02-15 07:46:21" }, { "name": "phpunit/php-code-coverage", - "version": "3.1.1", + "version": "3.3.0", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/php-code-coverage.git", - "reference": "92f5c61b5c64159faec5298325ffab0c7e59dcc8" + "reference": "fe33716763b604ade4cb442c0794f5bd5ad73004" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/92f5c61b5c64159faec5298325ffab0c7e59dcc8", - "reference": "92f5c61b5c64159faec5298325ffab0c7e59dcc8", + "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/fe33716763b604ade4cb442c0794f5bd5ad73004", + "reference": "fe33716763b604ade4cb442c0794f5bd5ad73004", "shasum": "" }, "require": { - "php": ">=5.6", + "php": "^5.6 || ^7.0", "phpunit/php-file-iterator": "~1.3", "phpunit/php-text-template": "~1.2", - "phpunit/php-token-stream": "~1.3", + "phpunit/php-token-stream": "^1.4.2", + "sebastian/code-unit-reverse-lookup": "~1.0", "sebastian/environment": "^1.3.2", "sebastian/version": "~1.0|~2.0" }, @@ -247,7 +250,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "3.1.x-dev" + "dev-master": "3.3.x-dev" } }, "autoload": { @@ -273,7 +276,7 @@ "testing", "xunit" ], - "time": "2016-02-04 13:05:19" + "time": "2016-03-03 08:49:08" }, { "name": "phpunit/php-file-iterator", @@ -455,16 +458,16 @@ }, { "name": "phpunit/phpunit", - "version": "5.2.3", + "version": "5.3.0", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59" + "reference": "dd3001822b2df8f5add266020e3d2fd3c5db3ae9" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59", - "reference": "6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/dd3001822b2df8f5add266020e3d2fd3c5db3ae9", + "reference": "dd3001822b2df8f5add266020e3d2fd3c5db3ae9", "shasum": "" }, "require": { @@ -474,18 +477,19 @@ "ext-reflection": "*", "ext-spl": "*", "myclabs/deep-copy": "~1.3", - "php": ">=5.6", + "php": "^5.6 || ^7.0", "phpspec/prophecy": "^1.3.1", - "phpunit/php-code-coverage": "~3.0", + "phpunit/php-code-coverage": "^3.3.0", "phpunit/php-file-iterator": "~1.4", "phpunit/php-text-template": "~1.2", "phpunit/php-timer": ">=1.0.6", - "phpunit/phpunit-mock-objects": ">=3.0.5", + "phpunit/phpunit-mock-objects": "^3.1", "sebastian/comparator": "~1.1", "sebastian/diff": "~1.2", "sebastian/environment": "~1.3", "sebastian/exporter": "~1.2", "sebastian/global-state": "~1.0", + "sebastian/object-enumerator": "~1.0", "sebastian/resource-operations": "~1.0", "sebastian/version": "~1.0|~2.0", "symfony/yaml": "~2.1|~3.0" @@ -499,7 +503,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "5.2.x-dev" + "dev-master": "5.3.x-dev" } }, "autoload": { @@ -525,20 +529,20 @@ "testing", "xunit" ], - "time": "2016-02-08 12:15:53" + "time": "2016-03-31 21:35:50" }, { "name": "phpunit/phpunit-mock-objects", - "version": "3.0.6", + "version": "3.1.2", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git", - "reference": "49bc700750196c04dd6bc2c4c99cb632b893836b" + "reference": "7c34c9bdde4131b824086457a3145e27dba10ca1" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/49bc700750196c04dd6bc2c4c99cb632b893836b", - "reference": "49bc700750196c04dd6bc2c4c99cb632b893836b", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/7c34c9bdde4131b824086457a3145e27dba10ca1", + "reference": "7c34c9bdde4131b824086457a3145e27dba10ca1", "shasum": "" }, "require": { @@ -556,7 +560,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "3.0.x-dev" + "dev-master": "3.1.x-dev" } }, "autoload": { @@ -581,7 +585,52 @@ "mock", "xunit" ], - "time": "2015-12-08 08:47:06" + "time": "2016-03-24 05:58:25" + }, + { + "name": "sebastian/code-unit-reverse-lookup", + "version": "1.0.0", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git", + "reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/c36f5e7cfce482fde5bf8d10d41a53591e0198fe", + "reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe", + "shasum": "" + }, + "require": { + "php": ">=5.6" + }, + "require-dev": { + "phpunit/phpunit": "~5" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Looks up which function or method a line of code belongs to", + "homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/", + "time": "2016-02-13 06:45:14" }, { "name": "sebastian/comparator", @@ -701,16 +750,16 @@ }, { "name": "sebastian/environment", - "version": "1.3.3", + "version": "1.3.5", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/environment.git", - "reference": "6e7133793a8e5a5714a551a8324337374be209df" + "reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/6e7133793a8e5a5714a551a8324337374be209df", - "reference": "6e7133793a8e5a5714a551a8324337374be209df", + "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf", + "reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf", "shasum": "" }, "require": { @@ -747,7 +796,7 @@ "environment", "hhvm" ], - "time": "2015-12-02 08:37:27" + "time": "2016-02-26 18:40:46" }, { "name": "sebastian/exporter", @@ -866,6 +915,52 @@ ], "time": "2015-10-12 03:26:01" }, + { + "name": "sebastian/object-enumerator", + "version": "1.0.0", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/object-enumerator.git", + "reference": "d4ca2fb70344987502567bc50081c03e6192fb26" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/d4ca2fb70344987502567bc50081c03e6192fb26", + "reference": "d4ca2fb70344987502567bc50081c03e6192fb26", + "shasum": "" + }, + "require": { + "php": ">=5.6", + "sebastian/recursion-context": "~1.0" + }, + "require-dev": { + "phpunit/phpunit": "~5" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Traverses array structures and object graphs to enumerate all referenced objects", + "homepage": "https://github.com/sebastianbergmann/object-enumerator/", + "time": "2016-01-28 13:25:10" + }, { "name": "sebastian/recursion-context", "version": "1.0.2", @@ -1006,16 +1101,16 @@ }, { "name": "symfony/yaml", - "version": "v3.0.2", + "version": "v3.0.4", "source": { "type": "git", "url": "https://github.com/symfony/yaml.git", - "reference": "3cf0709d7fe936e97bee9e954382e449003f1d9a" + "reference": "0047c8366744a16de7516622c5b7355336afae96" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/yaml/zipball/3cf0709d7fe936e97bee9e954382e449003f1d9a", - "reference": "3cf0709d7fe936e97bee9e954382e449003f1d9a", + "url": "https://api.github.com/repos/symfony/yaml/zipball/0047c8366744a16de7516622c5b7355336afae96", + "reference": "0047c8366744a16de7516622c5b7355336afae96", "shasum": "" }, "require": { @@ -1051,7 +1146,7 @@ ], "description": "Symfony Yaml Component", "homepage": "https://symfony.com", - "time": "2016-02-02 13:44:19" + "time": "2016-03-04 07:55:57" } ], "aliases": [], @@ -1059,6 +1154,8 @@ "stability-flags": [], "prefer-stable": false, "prefer-lowest": false, - "platform": [], + "platform": { + "php": ">=7.0.5" + }, "platform-dev": [] } From 469848ff4936772b03b7eaf86c6a52425649bc49 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Tue, 5 Apr 2016 21:06:53 +0200 Subject: [PATCH 08/27] implement k nearest neighbors classifier --- src/Phpml/Classifier/Classifier.php | 8 ++-- src/Phpml/Classifier/KNearestNeighbors.php | 47 ++++++++++++++++++---- src/Phpml/Classifier/NaiveBayes.php | 8 ++-- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/src/Phpml/Classifier/Classifier.php b/src/Phpml/Classifier/Classifier.php index face7b2..6fad67a 100644 --- a/src/Phpml/Classifier/Classifier.php +++ b/src/Phpml/Classifier/Classifier.php @@ -7,15 +7,15 @@ namespace Phpml\Classifier; interface Classifier { /** - * @param array $features + * @param array $samples * @param array $labels */ - public function train(array $features, array $labels); + public function train(array $samples, array $labels); /** - * @param mixed $feature + * @param array $sample * * @return mixed */ - public function predict($feature); + public function predict(array $sample); } diff --git a/src/Phpml/Classifier/KNearestNeighbors.php b/src/Phpml/Classifier/KNearestNeighbors.php index e028d61..f454f9d 100644 --- a/src/Phpml/Classifier/KNearestNeighbors.php +++ b/src/Phpml/Classifier/KNearestNeighbors.php @@ -4,6 +4,8 @@ declare (strict_types = 1); namespace Phpml\Classifier; +use Phpml\Metric\Distance; + class KNearestNeighbors implements Classifier { /** @@ -14,7 +16,7 @@ class KNearestNeighbors implements Classifier /** * @var array */ - private $features; + private $samples; /** * @var array @@ -27,26 +29,57 @@ class KNearestNeighbors implements Classifier public function __construct(int $k = 3) { $this->k = $k; - $this->features = []; + $this->samples = []; $this->labels = []; } /** - * @param array $features + * @param array $samples * @param array $labels */ - public function train(array $features, array $labels) + public function train(array $samples, array $labels) { - $this->features = $features; + $this->samples = $samples; $this->labels = $labels; } /** - * @param mixed $feature + * @param array $sample * * @return mixed */ - public function predict($feature) + public function predict(array $sample) { + $distances = $this->kNeighborsDistances($sample); + + $predictions = []; + foreach ($distances as $index => $distance) { + $predictions[$this->labels[$index]]++; + } + + arsort($predictions); + + return array_shift(array_keys($predictions)); + } + + /** + * @param array $sample + * + * @return array + * + * @throws \Phpml\Exception\InvalidArgumentException + */ + private function kNeighborsDistances(array $sample): array + { + $distances = []; + foreach($this->samples as $index => $neighbor) { + $distances[$index] = Distance::euclidean($sample, $neighbor); + if(count($distances)==$this->k) { + break; + } + } + asort($distances); + + return $distances; } } diff --git a/src/Phpml/Classifier/NaiveBayes.php b/src/Phpml/Classifier/NaiveBayes.php index ed6bb8c..c1cc902 100644 --- a/src/Phpml/Classifier/NaiveBayes.php +++ b/src/Phpml/Classifier/NaiveBayes.php @@ -7,19 +7,19 @@ namespace Phpml\Classifier; class NaiveBayes implements Classifier { /** - * @param array $features + * @param array $samples * @param array $labels */ - public function train(array $features, array $labels) + public function train(array $samples, array $labels) { } /** - * @param mixed $feature + * @param array $sample * * @return mixed */ - public function predict($feature) + public function predict(array $sample) { } } From 7cbeaecffb76fbc7cbd76f8ddaca24b801c53ff2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Tue, 5 Apr 2016 21:35:06 +0200 Subject: [PATCH 09/27] simple test for knn classifier --- src/Phpml/Classifier/KNearestNeighbors.php | 17 ++++++----- .../Classifier/KNearestNeighborsTest.php | 29 +++++++++++++++++++ tests/Phpml/Metric/DistanceTest.php | 6 ++-- 3 files changed, 41 insertions(+), 11 deletions(-) create mode 100644 tests/Phpml/Classifier/KNearestNeighborsTest.php diff --git a/src/Phpml/Classifier/KNearestNeighbors.php b/src/Phpml/Classifier/KNearestNeighbors.php index f454f9d..5a998d3 100644 --- a/src/Phpml/Classifier/KNearestNeighbors.php +++ b/src/Phpml/Classifier/KNearestNeighbors.php @@ -52,14 +52,16 @@ class KNearestNeighbors implements Classifier { $distances = $this->kNeighborsDistances($sample); - $predictions = []; + $predictions = array_combine(array_values($this->labels), array_fill(0, count($this->labels), 0)); + foreach ($distances as $index => $distance) { - $predictions[$this->labels[$index]]++; + ++$predictions[$this->labels[$index]]; } arsort($predictions); + reset($predictions); - return array_shift(array_keys($predictions)); + return key($predictions); } /** @@ -72,14 +74,13 @@ class KNearestNeighbors implements Classifier private function kNeighborsDistances(array $sample): array { $distances = []; - foreach($this->samples as $index => $neighbor) { + + foreach ($this->samples as $index => $neighbor) { $distances[$index] = Distance::euclidean($sample, $neighbor); - if(count($distances)==$this->k) { - break; - } } + asort($distances); - return $distances; + return array_slice($distances, 0, $this->k, true); } } diff --git a/tests/Phpml/Classifier/KNearestNeighborsTest.php b/tests/Phpml/Classifier/KNearestNeighborsTest.php new file mode 100644 index 0000000..2786ade --- /dev/null +++ b/tests/Phpml/Classifier/KNearestNeighborsTest.php @@ -0,0 +1,29 @@ +train($samples, $labels); + + $this->assertEquals('b', $classifier->predict([3, 2])); + $this->assertEquals('b', $classifier->predict([5, 1])); + $this->assertEquals('b', $classifier->predict([4, 3])); + $this->assertEquals('b', $classifier->predict([4, -5])); + + $this->assertEquals('a', $classifier->predict([2, 3])); + $this->assertEquals('a', $classifier->predict([1, 2])); + $this->assertEquals('a', $classifier->predict([1, 5])); + $this->assertEquals('a', $classifier->predict([3, 10])); + } +} diff --git a/tests/Phpml/Metric/DistanceTest.php b/tests/Phpml/Metric/DistanceTest.php index 249a9f8..b5fdc75 100644 --- a/tests/Phpml/Metric/DistanceTest.php +++ b/tests/Phpml/Metric/DistanceTest.php @@ -27,7 +27,7 @@ class DistanceTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2; $actualDistance = Distance::euclidean($a, $b); - \PHPUnit_Framework_Assert::assertEquals($expectedDistance, $actualDistance); + $this->assertEquals($expectedDistance, $actualDistance); } public function testCalculateEuclideanDistanceForTwoAndMoreDimension() @@ -38,7 +38,7 @@ class DistanceTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2.2360679774998; $actualDistance = Distance::euclidean($a, $b); - \PHPUnit_Framework_Assert::assertEquals($expectedDistance, $actualDistance); + $this->assertEquals($expectedDistance, $actualDistance); $a = [6, 10, 3]; $b = [2, 5, 5]; @@ -46,6 +46,6 @@ class DistanceTest extends \PHPUnit_Framework_TestCase $expectedDistance = 6.7082039324993694; $actualDistance = Distance::euclidean($a, $b); - \PHPUnit_Framework_Assert::assertEquals($expectedDistance, $actualDistance); + $this->assertEquals($expectedDistance, $actualDistance); } } From e521fb8f803b1855f24336f95328e99b98c24eab Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Wed, 6 Apr 2016 21:46:17 +0200 Subject: [PATCH 10/27] iris dataset loader --- src/Phpml/Dataset/Dataset.php | 58 ++++++++++++++++++++++++ src/Phpml/Dataset/Iris.php | 11 +++++ src/Phpml/Exception/DatasetException.php | 22 +++++++++ tests/Phpml/Dataset/IrisTest.php | 23 ++++++++++ 4 files changed, 114 insertions(+) create mode 100644 src/Phpml/Exception/DatasetException.php create mode 100644 tests/Phpml/Dataset/IrisTest.php diff --git a/src/Phpml/Dataset/Dataset.php b/src/Phpml/Dataset/Dataset.php index 316ee1d..cf1574e 100644 --- a/src/Phpml/Dataset/Dataset.php +++ b/src/Phpml/Dataset/Dataset.php @@ -1,9 +1,67 @@ filepath; + + if(!file_exists($filepath)) { + throw DatasetException::missingFile(basename($filepath)); + } + + $row = 0; + if (($handle = fopen($filepath, "r")) !== FALSE) { + while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { + $row++; + if($row==1) { + continue; + } + $this->samples[] = array_slice($data, 0, 4); + $this->lables[] = $data[4]; + } + fclose($handle); + } else { + throw DatasetException::cantOpenFile(basename($filepath)); + } + + } + + /** + * @return array + */ + public function getSamples() + { + return $this->samples; + } + + /** + * @return array + */ + public function getLabels() + { + return $this->lables; + } + } diff --git a/src/Phpml/Dataset/Iris.php b/src/Phpml/Dataset/Iris.php index 0565558..b862a74 100644 --- a/src/Phpml/Dataset/Iris.php +++ b/src/Phpml/Dataset/Iris.php @@ -1,9 +1,20 @@ assertEquals(150, count($iris->getSamples())); + $this->assertEquals(150, count($iris->getLabels())); + + // one sample features count + $this->assertEquals(4, count($iris->getSamples()[0])); + } + +} \ No newline at end of file From 649cbdb9a609bb125d6a3d87be81111bfdde618b Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Wed, 6 Apr 2016 22:38:08 +0200 Subject: [PATCH 11/27] prepare cross validation random splitter --- src/Phpml/Dataset/CsvDataset.php | 65 +++++++++++++++++++ src/Phpml/Dataset/Dataset.php | 57 ++-------------- src/Phpml/Dataset/Iris.php | 5 +- src/Phpml/Exception/DatasetException.php | 1 - .../Exception/InvalidArgumentException.php | 10 +++ tests/Phpml/Dataset/IrisTest.php | 7 +- 6 files changed, 84 insertions(+), 61 deletions(-) create mode 100644 src/Phpml/Dataset/CsvDataset.php diff --git a/src/Phpml/Dataset/CsvDataset.php b/src/Phpml/Dataset/CsvDataset.php new file mode 100644 index 0000000..6fa6b42 --- /dev/null +++ b/src/Phpml/Dataset/CsvDataset.php @@ -0,0 +1,65 @@ +filepath; + + if (!file_exists($filepath)) { + throw DatasetException::missingFile(basename($filepath)); + } + + $row = 0; + if (($handle = fopen($filepath, 'r')) !== false) { + while (($data = fgetcsv($handle, 1000, ',')) !== false) { + ++$row; + if ($row == 1) { + continue; + } + $this->samples[] = array_slice($data, 0, 4); + $this->lables[] = $data[4]; + } + fclose($handle); + } else { + throw DatasetException::cantOpenFile(basename($filepath)); + } + } + + /** + * @return array + */ + public function getSamples(): array + { + return $this->samples; + } + + /** + * @return array + */ + public function getLabels(): array + { + return $this->lables; + } +} diff --git a/src/Phpml/Dataset/Dataset.php b/src/Phpml/Dataset/Dataset.php index cf1574e..4e04931 100644 --- a/src/Phpml/Dataset/Dataset.php +++ b/src/Phpml/Dataset/Dataset.php @@ -4,64 +4,15 @@ declare (strict_types = 1); namespace Phpml\Dataset; -use Phpml\Exception\DatasetException; - -abstract class Dataset +interface Dataset { /** - * @var string + * @return array */ - protected $filepath; - - /** - * @var array - */ - private $samples = []; - - /** - * @var array - */ - private $lables = []; - - public function __construct() - { - $filepath = dirname(__FILE__) . '/../../../data/' . $this->filepath; - - if(!file_exists($filepath)) { - throw DatasetException::missingFile(basename($filepath)); - } - - $row = 0; - if (($handle = fopen($filepath, "r")) !== FALSE) { - while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { - $row++; - if($row==1) { - continue; - } - $this->samples[] = array_slice($data, 0, 4); - $this->lables[] = $data[4]; - } - fclose($handle); - } else { - throw DatasetException::cantOpenFile(basename($filepath)); - } - - } + public function getSamples(): array; /** * @return array */ - public function getSamples() - { - return $this->samples; - } - - /** - * @return array - */ - public function getLabels() - { - return $this->lables; - } - + public function getLabels(): array; } diff --git a/src/Phpml/Dataset/Iris.php b/src/Phpml/Dataset/Iris.php index b862a74..1353989 100644 --- a/src/Phpml/Dataset/Iris.php +++ b/src/Phpml/Dataset/Iris.php @@ -8,13 +8,12 @@ namespace Phpml\Dataset; * Classes: 3 * Samples per class: 50 * Samples total: 150 - * Features per sample: 4 + * Features per sample: 4. */ -class Iris extends Dataset +class Iris extends CsvDataset { /** * @var string */ protected $filepath = 'iris.csv'; - } diff --git a/src/Phpml/Exception/DatasetException.php b/src/Phpml/Exception/DatasetException.php index f12c979..7b99e64 100644 --- a/src/Phpml/Exception/DatasetException.php +++ b/src/Phpml/Exception/DatasetException.php @@ -18,5 +18,4 @@ class DatasetException extends \Exception { return new self(sprintf('Dataset file %s can\'t be open.', $filepath)); } - } diff --git a/src/Phpml/Exception/InvalidArgumentException.php b/src/Phpml/Exception/InvalidArgumentException.php index 70bf918..48006fa 100644 --- a/src/Phpml/Exception/InvalidArgumentException.php +++ b/src/Phpml/Exception/InvalidArgumentException.php @@ -13,4 +13,14 @@ class InvalidArgumentException extends \Exception { return new self('Size of given arguments not match'); } + + /** + * @param $name + * + * @return InvalidArgumentException + */ + public static function percentNotInRange($name) + { + return new self(sprintf('%s must be between 0.0 and 1.0', $name)); + } } diff --git a/tests/Phpml/Dataset/IrisTest.php b/tests/Phpml/Dataset/IrisTest.php index 6bce532..99e19ad 100644 --- a/tests/Phpml/Dataset/IrisTest.php +++ b/tests/Phpml/Dataset/IrisTest.php @@ -1,5 +1,6 @@ assertEquals(4, count($iris->getSamples()[0])); } - -} \ No newline at end of file +} From c3f98e409382213ca3b7c66064403951d98b6487 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Wed, 6 Apr 2016 22:38:27 +0200 Subject: [PATCH 12/27] random splitter skeleton --- src/Phpml/CrossValidation/RandomSplit.php | 72 +++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 src/Phpml/CrossValidation/RandomSplit.php diff --git a/src/Phpml/CrossValidation/RandomSplit.php b/src/Phpml/CrossValidation/RandomSplit.php new file mode 100644 index 0000000..3cc6c05 --- /dev/null +++ b/src/Phpml/CrossValidation/RandomSplit.php @@ -0,0 +1,72 @@ + $testSize || 1 < $testSize) { + throw InvalidArgumentException::percentNotInRange('testSize'); + } + + // TODO: implement this ! + } + + /** + * @return array + */ + public function getTrainSamples() + { + return $this->trainSamples; + } + + /** + * @return array + */ + public function getTestSamples() + { + return $this->testSamples; + } + + /** + * @return array + */ + public function getTrainLabels() + { + return $this->trainLabels; + } + + /** + * @return array + */ + public function getTestLabels() + { + return $this->testLabels; + } +} From bbcc8a3e685290bea034186d0458fb9c77846fd6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 22:12:36 +0200 Subject: [PATCH 13/27] random split implementation and tests --- src/Phpml/CrossValidation/RandomSplit.php | 39 +++++++- src/Phpml/Dataset/ArrayDataset.php | 46 +++++++++ src/Phpml/Dataset/CsvDataset.php | 6 +- src/Phpml/Dataset/{ => Demo}/Iris.php | 4 +- .../Phpml/CrossValidation/RandomSplitTest.php | 95 +++++++++++++++++++ tests/Phpml/Dataset/{ => Demo}/IrisTest.php | 4 +- 6 files changed, 185 insertions(+), 9 deletions(-) create mode 100644 src/Phpml/Dataset/ArrayDataset.php rename src/Phpml/Dataset/{ => Demo}/Iris.php (79%) create mode 100644 tests/Phpml/CrossValidation/RandomSplitTest.php rename tests/Phpml/Dataset/{ => Demo}/IrisTest.php (86%) diff --git a/src/Phpml/CrossValidation/RandomSplit.php b/src/Phpml/CrossValidation/RandomSplit.php index 3cc6c05..4e2ecc9 100644 --- a/src/Phpml/CrossValidation/RandomSplit.php +++ b/src/Phpml/CrossValidation/RandomSplit.php @@ -29,13 +29,34 @@ class RandomSplit */ private $testLabels = []; - public function __construct(Dataset $dataset, float $testSize = 0.3) + /** + * @param Dataset $dataset + * @param float $testSize + * @param int $seed + * + * @throws InvalidArgumentException + */ + public function __construct(Dataset $dataset, float $testSize = 0.3, int $seed = null) { - if (0 > $testSize || 1 < $testSize) { + if (0 >= $testSize || 1 <= $testSize) { throw InvalidArgumentException::percentNotInRange('testSize'); } + $this->seedGenerator($seed); - // TODO: implement this ! + $samples = $dataset->getSamples(); + $labels = $dataset->getLabels(); + $datasetSize = count($samples); + + for($i=$datasetSize; $i>0; $i--) { + $key = mt_rand(0, $datasetSize-1); + $setName = count($this->testSamples) / $datasetSize >= $testSize ? 'train' : 'test'; + + $this->{$setName.'Samples'}[] = $samples[$key]; + $this->{$setName.'Labels'}[] = $labels[$key]; + + $samples = array_values($samples); + $labels = array_values($labels); + } } /** @@ -69,4 +90,16 @@ class RandomSplit { return $this->testLabels; } + + /** + * @param int|null $seed + */ + private function seedGenerator(int $seed = null) + { + if (null === $seed) { + mt_srand(); + } else { + mt_srand($seed); + } + } } diff --git a/src/Phpml/Dataset/ArrayDataset.php b/src/Phpml/Dataset/ArrayDataset.php new file mode 100644 index 0000000..85f9914 --- /dev/null +++ b/src/Phpml/Dataset/ArrayDataset.php @@ -0,0 +1,46 @@ +samples = $samples; + $this->labels = $labels; + } + + + /** + * @return array + */ + public function getSamples(): array + { + return $this->samples; + } + + /** + * @return array + */ + public function getLabels(): array + { + return $this->labels; + } + +} diff --git a/src/Phpml/Dataset/CsvDataset.php b/src/Phpml/Dataset/CsvDataset.php index 6fa6b42..c21ac97 100644 --- a/src/Phpml/Dataset/CsvDataset.php +++ b/src/Phpml/Dataset/CsvDataset.php @@ -21,7 +21,7 @@ abstract class CsvDataset implements Dataset /** * @var array */ - private $lables = []; + private $labels = []; public function __construct() { @@ -39,7 +39,7 @@ abstract class CsvDataset implements Dataset continue; } $this->samples[] = array_slice($data, 0, 4); - $this->lables[] = $data[4]; + $this->labels[] = $data[4]; } fclose($handle); } else { @@ -60,6 +60,6 @@ abstract class CsvDataset implements Dataset */ public function getLabels(): array { - return $this->lables; + return $this->labels; } } diff --git a/src/Phpml/Dataset/Iris.php b/src/Phpml/Dataset/Demo/Iris.php similarity index 79% rename from src/Phpml/Dataset/Iris.php rename to src/Phpml/Dataset/Demo/Iris.php index 1353989..d544a55 100644 --- a/src/Phpml/Dataset/Iris.php +++ b/src/Phpml/Dataset/Demo/Iris.php @@ -2,7 +2,9 @@ declare (strict_types = 1); -namespace Phpml\Dataset; +namespace Phpml\Dataset\Demo; + +use Phpml\Dataset\CsvDataset; /** * Classes: 3 diff --git a/tests/Phpml/CrossValidation/RandomSplitTest.php b/tests/Phpml/CrossValidation/RandomSplitTest.php new file mode 100644 index 0000000..cbbd497 --- /dev/null +++ b/tests/Phpml/CrossValidation/RandomSplitTest.php @@ -0,0 +1,95 @@ +assertEquals(2, count($randomSplit1->getTestSamples())); + $this->assertEquals(2, count($randomSplit1->getTrainSamples())); + + $randomSplit2 = new RandomSplit($dataset, 0.25); + + $this->assertEquals(1, count($randomSplit2->getTestSamples())); + $this->assertEquals(3, count($randomSplit2->getTrainSamples())); + } + + public function testDatasetRandomSplitWithSameSeed() + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], + $labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] + ); + + $seed = 123; + + $randomSplit1 = new RandomSplit($dataset, 0.5, $seed); + $randomSplit2 = new RandomSplit($dataset, 0.5, $seed); + + $this->assertEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels()); + $this->assertEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples()); + $this->assertEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels()); + $this->assertEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples()); + } + + public function testDatasetRandomSplitWithDifferentSeed() + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], + $labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] + ); + + $randomSplit1 = new RandomSplit($dataset, 0.5, 4321); + $randomSplit2 = new RandomSplit($dataset, 0.5, 1234); + + $this->assertNotEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels()); + $this->assertNotEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples()); + $this->assertNotEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels()); + $this->assertNotEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples()); + } + + public function testRandomSplitCorrectSampleAndLabelPosition() + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4]], + $labels = [1, 2, 3, 4] + ); + + $randomSplit = new RandomSplit($dataset, 0.5); + + $this->assertEquals($randomSplit->getTestSamples()[0][0], $randomSplit->getTestLabels()[0]); + $this->assertEquals($randomSplit->getTestSamples()[1][0], $randomSplit->getTestLabels()[1]); + $this->assertEquals($randomSplit->getTrainSamples()[0][0], $randomSplit->getTrainLabels()[0]); + $this->assertEquals($randomSplit->getTrainSamples()[1][0], $randomSplit->getTrainLabels()[1]); + } + +} \ No newline at end of file diff --git a/tests/Phpml/Dataset/IrisTest.php b/tests/Phpml/Dataset/Demo/IrisTest.php similarity index 86% rename from tests/Phpml/Dataset/IrisTest.php rename to tests/Phpml/Dataset/Demo/IrisTest.php index 99e19ad..1f0da90 100644 --- a/tests/Phpml/Dataset/IrisTest.php +++ b/tests/Phpml/Dataset/Demo/IrisTest.php @@ -2,9 +2,9 @@ declare (strict_types = 1); -namespace tests\Phpml\Dataset; +namespace tests\Phpml\Dataset\Demo; -use Phpml\Dataset\Iris; +use Phpml\Dataset\Demo\Iris; class IrisTest extends \PHPUnit_Framework_TestCase { From d3247ebccb855a87118816c8a8d19b93bcd94e4d Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 22:13:31 +0200 Subject: [PATCH 14/27] random split implementation and tests --- src/Phpml/Dataset/ArrayDataset.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Phpml/Dataset/ArrayDataset.php b/src/Phpml/Dataset/ArrayDataset.php index 85f9914..b2e09b7 100644 --- a/src/Phpml/Dataset/ArrayDataset.php +++ b/src/Phpml/Dataset/ArrayDataset.php @@ -3,6 +3,8 @@ declare(strict_types = 1); namespace Phpml\Dataset; +use Phpml\Exception\InvalidArgumentException; + class ArrayDataset implements Dataset { @@ -19,9 +21,15 @@ class ArrayDataset implements Dataset /** * @param array $samples * @param array $labels + * + * @throws InvalidArgumentException */ public function __construct(array $samples, array $labels) { + if (count($samples) != count($labels)) { + throw InvalidArgumentException::sizeNotMatch(); + } + $this->samples = $samples; $this->labels = $labels; } From a20f474324b4239753616d0527d02f8cf0d4de4b Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 22:19:04 +0200 Subject: [PATCH 15/27] refactor csv dataset definition --- src/Phpml/Dataset/ArrayDataset.php | 4 ++-- src/Phpml/Dataset/CsvDataset.php | 33 +++++------------------------- src/Phpml/Dataset/Demo/Iris.php | 8 ++++++-- 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/src/Phpml/Dataset/ArrayDataset.php b/src/Phpml/Dataset/ArrayDataset.php index b2e09b7..580df0a 100644 --- a/src/Phpml/Dataset/ArrayDataset.php +++ b/src/Phpml/Dataset/ArrayDataset.php @@ -11,12 +11,12 @@ class ArrayDataset implements Dataset /** * @var array */ - private $samples = []; + protected $samples = []; /** * @var array */ - private $labels = []; + protected $labels = []; /** * @param array $samples diff --git a/src/Phpml/Dataset/CsvDataset.php b/src/Phpml/Dataset/CsvDataset.php index c21ac97..de540c9 100644 --- a/src/Phpml/Dataset/CsvDataset.php +++ b/src/Phpml/Dataset/CsvDataset.php @@ -6,7 +6,7 @@ namespace Phpml\Dataset; use Phpml\Exception\DatasetException; -abstract class CsvDataset implements Dataset +class CsvDataset extends ArrayDataset { /** * @var string @@ -14,19 +14,12 @@ abstract class CsvDataset implements Dataset protected $filepath; /** - * @var array + * @param string|null $filepath + * + * @throws DatasetException */ - private $samples = []; - - /** - * @var array - */ - private $labels = []; - - public function __construct() + public function __construct(string $filepath = null) { - $filepath = dirname(__FILE__).'/../../../data/'.$this->filepath; - if (!file_exists($filepath)) { throw DatasetException::missingFile(basename($filepath)); } @@ -46,20 +39,4 @@ abstract class CsvDataset implements Dataset throw DatasetException::cantOpenFile(basename($filepath)); } } - - /** - * @return array - */ - public function getSamples(): array - { - return $this->samples; - } - - /** - * @return array - */ - public function getLabels(): array - { - return $this->labels; - } } diff --git a/src/Phpml/Dataset/Demo/Iris.php b/src/Phpml/Dataset/Demo/Iris.php index d544a55..5a4789e 100644 --- a/src/Phpml/Dataset/Demo/Iris.php +++ b/src/Phpml/Dataset/Demo/Iris.php @@ -15,7 +15,11 @@ use Phpml\Dataset\CsvDataset; class Iris extends CsvDataset { /** - * @var string + * @param string|null $filepath */ - protected $filepath = 'iris.csv'; + public function __construct(string $filepath = null) + { + $filepath = dirname(__FILE__).'/../../../../data/iris.csv'; + parent::__construct($filepath); + } } From 9c18a5a22d67937a79b834fd325c8cb4b389879e Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 22:35:49 +0200 Subject: [PATCH 16/27] add tests for datasets --- src/Phpml/CrossValidation/RandomSplit.php | 8 ++++---- src/Phpml/Dataset/ArrayDataset.php | 6 ++---- src/Phpml/Dataset/CsvDataset.php | 12 +++++++----- src/Phpml/Dataset/Demo/Iris.php | 7 ++----- tests/Phpml/CrossValidation/RandomSplitTest.php | 7 +++---- 5 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/Phpml/CrossValidation/RandomSplit.php b/src/Phpml/CrossValidation/RandomSplit.php index 4e2ecc9..c5a24bd 100644 --- a/src/Phpml/CrossValidation/RandomSplit.php +++ b/src/Phpml/CrossValidation/RandomSplit.php @@ -31,8 +31,8 @@ class RandomSplit /** * @param Dataset $dataset - * @param float $testSize - * @param int $seed + * @param float $testSize + * @param int $seed * * @throws InvalidArgumentException */ @@ -47,8 +47,8 @@ class RandomSplit $labels = $dataset->getLabels(); $datasetSize = count($samples); - for($i=$datasetSize; $i>0; $i--) { - $key = mt_rand(0, $datasetSize-1); + for ($i = $datasetSize; $i > 0; --$i) { + $key = mt_rand(0, $datasetSize - 1); $setName = count($this->testSamples) / $datasetSize >= $testSize ? 'train' : 'test'; $this->{$setName.'Samples'}[] = $samples[$key]; diff --git a/src/Phpml/Dataset/ArrayDataset.php b/src/Phpml/Dataset/ArrayDataset.php index 580df0a..d117122 100644 --- a/src/Phpml/Dataset/ArrayDataset.php +++ b/src/Phpml/Dataset/ArrayDataset.php @@ -1,5 +1,6 @@ labels = $labels; } - /** * @return array */ @@ -50,5 +49,4 @@ class ArrayDataset implements Dataset { return $this->labels; } - } diff --git a/src/Phpml/Dataset/CsvDataset.php b/src/Phpml/Dataset/CsvDataset.php index de540c9..e6dafd2 100644 --- a/src/Phpml/Dataset/CsvDataset.php +++ b/src/Phpml/Dataset/CsvDataset.php @@ -14,11 +14,13 @@ class CsvDataset extends ArrayDataset protected $filepath; /** - * @param string|null $filepath + * @param string $filepath + * @param int $features + * @param bool $headingRow * * @throws DatasetException */ - public function __construct(string $filepath = null) + public function __construct(string $filepath, int $features, bool $headingRow = true) { if (!file_exists($filepath)) { throw DatasetException::missingFile(basename($filepath)); @@ -28,11 +30,11 @@ class CsvDataset extends ArrayDataset if (($handle = fopen($filepath, 'r')) !== false) { while (($data = fgetcsv($handle, 1000, ',')) !== false) { ++$row; - if ($row == 1) { + if ($headingRow && $row == 1) { continue; } - $this->samples[] = array_slice($data, 0, 4); - $this->labels[] = $data[4]; + $this->samples[] = array_slice($data, 0, $features); + $this->labels[] = $data[$features]; } fclose($handle); } else { diff --git a/src/Phpml/Dataset/Demo/Iris.php b/src/Phpml/Dataset/Demo/Iris.php index 5a4789e..923f0ba 100644 --- a/src/Phpml/Dataset/Demo/Iris.php +++ b/src/Phpml/Dataset/Demo/Iris.php @@ -14,12 +14,9 @@ use Phpml\Dataset\CsvDataset; */ class Iris extends CsvDataset { - /** - * @param string|null $filepath - */ - public function __construct(string $filepath = null) + public function __construct() { $filepath = dirname(__FILE__).'/../../../../data/iris.csv'; - parent::__construct($filepath); + parent::__construct($filepath, 4, true); } } diff --git a/tests/Phpml/CrossValidation/RandomSplitTest.php b/tests/Phpml/CrossValidation/RandomSplitTest.php index cbbd497..e6ae30e 100644 --- a/tests/Phpml/CrossValidation/RandomSplitTest.php +++ b/tests/Phpml/CrossValidation/RandomSplitTest.php @@ -1,5 +1,6 @@ assertEquals($randomSplit->getTrainSamples()[0][0], $randomSplit->getTrainLabels()[0]); $this->assertEquals($randomSplit->getTrainSamples()[1][0], $randomSplit->getTrainLabels()[1]); } - -} \ No newline at end of file +} From db9d57cd221e182aa086d69598b91ffbe9873072 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 22:36:02 +0200 Subject: [PATCH 17/27] add tests for datasets --- tests/Phpml/Dataset/ArrayDatasetTest.php | 29 +++++++++++++++++++++++ tests/Phpml/Dataset/CsvDatasetTest.php | 28 ++++++++++++++++++++++ tests/Phpml/Dataset/Resources/dataset.csv | 11 +++++++++ 3 files changed, 68 insertions(+) create mode 100644 tests/Phpml/Dataset/ArrayDatasetTest.php create mode 100644 tests/Phpml/Dataset/CsvDatasetTest.php create mode 100644 tests/Phpml/Dataset/Resources/dataset.csv diff --git a/tests/Phpml/Dataset/ArrayDatasetTest.php b/tests/Phpml/Dataset/ArrayDatasetTest.php new file mode 100644 index 0000000..7244b3e --- /dev/null +++ b/tests/Phpml/Dataset/ArrayDatasetTest.php @@ -0,0 +1,29 @@ +assertEquals($samples, $dataset->getSamples()); + $this->assertEquals($labels, $dataset->getLabels()); + } +} diff --git a/tests/Phpml/Dataset/CsvDatasetTest.php b/tests/Phpml/Dataset/CsvDatasetTest.php new file mode 100644 index 0000000..db87d62 --- /dev/null +++ b/tests/Phpml/Dataset/CsvDatasetTest.php @@ -0,0 +1,28 @@ +assertEquals(10, count($dataset->getSamples())); + $this->assertEquals(10, count($dataset->getLabels())); + } +} diff --git a/tests/Phpml/Dataset/Resources/dataset.csv b/tests/Phpml/Dataset/Resources/dataset.csv new file mode 100644 index 0000000..fcf0121 --- /dev/null +++ b/tests/Phpml/Dataset/Resources/dataset.csv @@ -0,0 +1,11 @@ +feature1,feature2,label +1,1,a +2,1,b +3,1,c +4,5,a +2,4,a +1,5,a +2,6,b +3,7,c +4,4,a +2,0,a \ No newline at end of file From 9899da7143965f1dfe49c4f3ebc317297fef7ce2 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 23:08:04 +0200 Subject: [PATCH 18/27] remove count from test condition --- composer.json | 2 +- src/Phpml/Metric/Distance.php | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index 66083b2..d3569d0 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "php-ai/php-ml", "type": "library", - "description": "PHP Machine learning library", + "description": "PHP Machine Learning library", "license": "MIT", "keywords": ["machine learning","pattern recognition","computational learning theory","artificial intelligence"], "homepage": "https://github.com/php-ai/php-ml", diff --git a/src/Phpml/Metric/Distance.php b/src/Phpml/Metric/Distance.php index e0dce2d..1b92cef 100644 --- a/src/Phpml/Metric/Distance.php +++ b/src/Phpml/Metric/Distance.php @@ -23,8 +23,9 @@ class Distance } $distance = 0; + $count = count($a); - for ($i = 0; $i < count($a); ++$i) { + for ($i = 0; $i < $count; ++$i) { $distance += pow($a[$i] - $b[$i], 2); } From bd34266ae1d4d32bba0fd6f89c6e04f64d4adddd Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 23:13:50 +0200 Subject: [PATCH 19/27] change min php version to 7.0.0 --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index d3569d0..340f242 100644 --- a/composer.json +++ b/composer.json @@ -20,7 +20,7 @@ "bin-dir": "bin" }, "require": { - "php": ">=7.0.5" + "php": ">=7.0.0" }, "require-dev": { "phpunit/phpunit": "^5.2" From 63d49616d8e4d2b3d9788f1ea739fd2edd15ad65 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 7 Apr 2016 23:25:02 +0200 Subject: [PATCH 20/27] update composer --- composer.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/composer.lock b/composer.lock index 0b9287b..667ba6a 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,8 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "54ea009d9b09c1886c0789dadc484a36", - "content-hash": "52c31f7df9edd7063e2489ed47e091a7", + "hash": "7c34eebd6b8749a1cd09df57e5d1f47a", + "content-hash": "087091d0c339e9fa3a551a189ea658bf", "packages": [], "packages-dev": [ { @@ -458,16 +458,16 @@ }, { "name": "phpunit/phpunit", - "version": "5.3.0", + "version": "5.3.1", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "dd3001822b2df8f5add266020e3d2fd3c5db3ae9" + "reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/dd3001822b2df8f5add266020e3d2fd3c5db3ae9", - "reference": "dd3001822b2df8f5add266020e3d2fd3c5db3ae9", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/34a3acb401ae79deb37bc6e5f5ec3d325b369b4c", + "reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c", "shasum": "" }, "require": { @@ -482,7 +482,7 @@ "phpunit/php-code-coverage": "^3.3.0", "phpunit/php-file-iterator": "~1.4", "phpunit/php-text-template": "~1.2", - "phpunit/php-timer": ">=1.0.6", + "phpunit/php-timer": "^1.0.6", "phpunit/phpunit-mock-objects": "^3.1", "sebastian/comparator": "~1.1", "sebastian/diff": "~1.2", @@ -529,7 +529,7 @@ "testing", "xunit" ], - "time": "2016-03-31 21:35:50" + "time": "2016-04-07 07:04:34" }, { "name": "phpunit/phpunit-mock-objects", @@ -1155,7 +1155,7 @@ "prefer-stable": false, "prefer-lowest": false, "platform": { - "php": ">=7.0.5" + "php": ">=7.0.0" }, "platform-dev": [] } From 0b6dc42807a2dd8e2fdcdb79690ebd6f8f5f585b Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Fri, 8 Apr 2016 21:37:26 +0200 Subject: [PATCH 21/27] change autoloader to psr-0 --- composer.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index 340f242..4f0dd26 100644 --- a/composer.json +++ b/composer.json @@ -12,8 +12,8 @@ } ], "autoload": { - "psr-4": { - "": "src/" + "psr-0": { + "Phpml": "src/" } }, "config": { From f1c81638d63ffa891f266a891683334158ffa5e8 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Fri, 8 Apr 2016 22:11:59 +0200 Subject: [PATCH 22/27] accuracy score with test --- src/Phpml/Metric/Accuracy.php | 39 +++++++++++++++++++++++++++++ tests/Phpml/Metric/AccuracyTest.php | 38 ++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 src/Phpml/Metric/Accuracy.php create mode 100644 tests/Phpml/Metric/AccuracyTest.php diff --git a/src/Phpml/Metric/Accuracy.php b/src/Phpml/Metric/Accuracy.php new file mode 100644 index 0000000..878cadd --- /dev/null +++ b/src/Phpml/Metric/Accuracy.php @@ -0,0 +1,39 @@ + $label) { + if($label===$predictedLabels[$index]) { + $score++; + } + } + + if($normalize) { + $score = $score / count($actualLabels); + } + + return $score; + } +} diff --git a/tests/Phpml/Metric/AccuracyTest.php b/tests/Phpml/Metric/AccuracyTest.php new file mode 100644 index 0000000..31bb0fc --- /dev/null +++ b/tests/Phpml/Metric/AccuracyTest.php @@ -0,0 +1,38 @@ +assertEquals(0.5, Accuracy::score($actualLabels, $predictedLabels)); + } + + public function testCalculateNotNormalizedScore() + { + $actualLabels = ['a', 'b', 'a', 'b']; + $predictedLabels = ['a', 'b', 'b', 'b']; + + $this->assertEquals(3, Accuracy::score($actualLabels, $predictedLabels, false)); + } + +} From e7d27801503136cd4450bbc3a8014ef13595f5ba Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Fri, 8 Apr 2016 22:25:15 +0200 Subject: [PATCH 23/27] classifier predict array of samples or one sample --- src/Phpml/Classifier/Classifier.php | 4 ++-- src/Phpml/Classifier/KNearestNeighbors.php | 21 ++++++++++++++++++- src/Phpml/Classifier/NaiveBayes.php | 4 ++-- .../Classifier/KNearestNeighborsTest.php | 15 +++++++++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/Phpml/Classifier/Classifier.php b/src/Phpml/Classifier/Classifier.php index 6fad67a..90250a9 100644 --- a/src/Phpml/Classifier/Classifier.php +++ b/src/Phpml/Classifier/Classifier.php @@ -13,9 +13,9 @@ interface Classifier public function train(array $samples, array $labels); /** - * @param array $sample + * @param array $samples * * @return mixed */ - public function predict(array $sample); + public function predict(array $samples); } diff --git a/src/Phpml/Classifier/KNearestNeighbors.php b/src/Phpml/Classifier/KNearestNeighbors.php index 5a998d3..53c66b0 100644 --- a/src/Phpml/Classifier/KNearestNeighbors.php +++ b/src/Phpml/Classifier/KNearestNeighbors.php @@ -43,12 +43,31 @@ class KNearestNeighbors implements Classifier $this->labels = $labels; } + /** + * @param array $samples + * + * @return mixed + */ + public function predict(array $samples) + { + if(!is_array($samples[0])) { + $predicted = $this->predictSample($samples); + } else { + $predicted = []; + foreach ($samples as $index => $sample) { + $predicted[$index] = $this->predictSample($sample); + } + } + + return $predicted; + } + /** * @param array $sample * * @return mixed */ - public function predict(array $sample) + private function predictSample(array $sample) { $distances = $this->kNeighborsDistances($sample); diff --git a/src/Phpml/Classifier/NaiveBayes.php b/src/Phpml/Classifier/NaiveBayes.php index c1cc902..7324d79 100644 --- a/src/Phpml/Classifier/NaiveBayes.php +++ b/src/Phpml/Classifier/NaiveBayes.php @@ -15,11 +15,11 @@ class NaiveBayes implements Classifier } /** - * @param array $sample + * @param array $samples * * @return mixed */ - public function predict(array $sample) + public function predict(array $samples) { } } diff --git a/tests/Phpml/Classifier/KNearestNeighborsTest.php b/tests/Phpml/Classifier/KNearestNeighborsTest.php index 2786ade..06ae42f 100644 --- a/tests/Phpml/Classifier/KNearestNeighborsTest.php +++ b/tests/Phpml/Classifier/KNearestNeighborsTest.php @@ -26,4 +26,19 @@ class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase $this->assertEquals('a', $classifier->predict([1, 5])); $this->assertEquals('a', $classifier->predict([3, 10])); } + + public function testPredictArrayOfSamples() + { + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; + $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a',]; + + $classifier = new KNearestNeighbors(); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + $this->assertEquals($testLabels, $predicted); + } } From 62ec4ec2f240781ac0e82778144699ff364ddb15 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Fri, 8 Apr 2016 22:49:17 +0200 Subject: [PATCH 24/27] integration tests for knn classifier --- src/Phpml/Classifier/KNearestNeighbors.php | 2 +- src/Phpml/Metric/Accuracy.php | 12 ++++++------ .../Phpml/Classifier/KNearestNeighborsTest.php | 18 ++++++++++++++++-- tests/Phpml/Metric/AccuracyTest.php | 5 ++--- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/Phpml/Classifier/KNearestNeighbors.php b/src/Phpml/Classifier/KNearestNeighbors.php index 53c66b0..f913488 100644 --- a/src/Phpml/Classifier/KNearestNeighbors.php +++ b/src/Phpml/Classifier/KNearestNeighbors.php @@ -50,7 +50,7 @@ class KNearestNeighbors implements Classifier */ public function predict(array $samples) { - if(!is_array($samples[0])) { + if (!is_array($samples[0])) { $predicted = $this->predictSample($samples); } else { $predicted = []; diff --git a/src/Phpml/Metric/Accuracy.php b/src/Phpml/Metric/Accuracy.php index 878cadd..d871e85 100644 --- a/src/Phpml/Metric/Accuracy.php +++ b/src/Phpml/Metric/Accuracy.php @@ -1,5 +1,6 @@ $label) { - if($label===$predictedLabels[$index]) { - $score++; + if ($label === $predictedLabels[$index]) { + ++$score; } } - if($normalize) { + if ($normalize) { $score = $score / count($actualLabels); } diff --git a/tests/Phpml/Classifier/KNearestNeighborsTest.php b/tests/Phpml/Classifier/KNearestNeighborsTest.php index 06ae42f..1050607 100644 --- a/tests/Phpml/Classifier/KNearestNeighborsTest.php +++ b/tests/Phpml/Classifier/KNearestNeighborsTest.php @@ -5,10 +5,13 @@ declare (strict_types = 1); namespace tests\Classifier; use Phpml\Classifier\KNearestNeighbors; +use Phpml\CrossValidation\RandomSplit; +use Phpml\Dataset\Demo\Iris; +use Phpml\Metric\Accuracy; class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase { - public function testPredictSimpleSampleWithDefaultK() + public function testPredictSingleSampleWithDefaultK() { $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; $labels = ['a', 'a', 'a', 'b', 'b', 'b']; @@ -33,7 +36,7 @@ class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; - $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a',]; + $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a']; $classifier = new KNearestNeighbors(); $classifier->train($trainSamples, $trainLabels); @@ -41,4 +44,15 @@ class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase $this->assertEquals($testLabels, $predicted); } + + public function testAccuracyOnIrisDataset() + { + $dataset = new RandomSplit(new Iris(), $testSize = 0.5, $seed = 123); + $classifier = new KNearestNeighbors($k = 4); + $classifier->train($dataset->getTrainSamples(), $dataset->getTrainLabels()); + $predicted = $classifier->predict($dataset->getTestSamples()); + $score = Accuracy::score($dataset->getTestLabels(), $predicted); + + $this->assertEquals(0.96, $score); + } } diff --git a/tests/Phpml/Metric/AccuracyTest.php b/tests/Phpml/Metric/AccuracyTest.php index 31bb0fc..aa68b22 100644 --- a/tests/Phpml/Metric/AccuracyTest.php +++ b/tests/Phpml/Metric/AccuracyTest.php @@ -1,5 +1,6 @@ assertEquals(3, Accuracy::score($actualLabels, $predictedLabels, false)); } - } From 83b4a9e19c3a0bbdaa6871a2edc4b5c44fe002ee Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Fri, 8 Apr 2016 23:22:55 +0200 Subject: [PATCH 25/27] add instal info and badges to readme --- README.md | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 457f3c1..568c3ab 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,23 @@ # PHP Machine learning library +[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop) +[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml) +[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop) + Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... -## Available Algorithms +## Documentation -### Classification - -Identifying to which category an object belongs to. - -* **Naive Bayes** - algorithm based on applying Bayes’ theorem with the “naive” assumption of independence between every pair of features +The documentation will be available on `Read the Docs` soon ... ## Installation -This repo will be published do packagist.org soon... +Currently this library is in the process of developing, but You can install it with Composer: + +``` +composer require php-ai/php-ml +``` ## To-Do @@ -21,7 +26,7 @@ This repo will be published do packagist.org soon... ## Testing -After installation, you can launch the test suite in project root directory (you will need to install dev requiremnts with composer) +After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer) ``` bin/phpunit @@ -33,3 +38,4 @@ PHP-ML is released under the MIT Licence. See the bundled LICENSE file for detai ## Author +Arkadiusz Kondas (@ArkadiuszKondas) \ No newline at end of file From 5be21477840b64336be8b6e5e4713074fc2159fc Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Sat, 9 Apr 2016 00:36:48 +0200 Subject: [PATCH 26/27] creat docs files --- docs/index.md | 37 +++++++++++++++++++ .../classification/knearestneighbors.md | 35 ++++++++++++++++++ .../cross-validation/randomsplit.md | 29 +++++++++++++++ .../datasets/array-dataset.md | 21 +++++++++++ docs/machine-learning/datasets/csv-dataset.md | 15 ++++++++ docs/machine-learning/datasets/demo/iris.md | 34 +++++++++++++++++ docs/machine-learning/metric/accuracy.md | 24 ++++++++++++ docs/machine-learning/metric/distance.md | 17 +++++++++ mkdocs.yml | 17 +++++++++ .../Phpml/CrossValidation/RandomSplitTest.php | 6 +-- 10 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 docs/index.md create mode 100644 docs/machine-learning/classification/knearestneighbors.md create mode 100644 docs/machine-learning/cross-validation/randomsplit.md create mode 100644 docs/machine-learning/datasets/array-dataset.md create mode 100644 docs/machine-learning/datasets/csv-dataset.md create mode 100644 docs/machine-learning/datasets/demo/iris.md create mode 100644 docs/machine-learning/metric/accuracy.md create mode 100644 docs/machine-learning/metric/distance.md create mode 100644 mkdocs.yml diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..c3e2703 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,37 @@ +# PHP Machine Learning (PHP-ML) + +[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop) +[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml) +[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop) + +Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... + +## Installation + +Currently this library is in the process of developing, but You can install it with Composer: + +``` +composer require php-ai/php-ml +``` + +## To-Do + +* implements more algorithms +* integration with Lavacharts for data visualization + +## Testing + +After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer) + +``` +bin/phpunit +``` + +## License + +PHP-ML is released under the MIT Licence. See the bundled LICENSE file for details. + +## Author + +Arkadiusz Kondas (@ArkadiuszKondas) \ No newline at end of file diff --git a/docs/machine-learning/classification/knearestneighbors.md b/docs/machine-learning/classification/knearestneighbors.md new file mode 100644 index 0000000..569c48b --- /dev/null +++ b/docs/machine-learning/classification/knearestneighbors.md @@ -0,0 +1,35 @@ +# KNearestNeighbors Classifier + +Classifier implementing the k-nearest neighbors algorithm. + +### Constructor Parameters + +* $k - number of nearest neighbors to scan (default: 3) + +``` +$classifier = new KNearestNeighbors($k=4); +``` + +### Train + +To train a classifier simply provide train samples and labels (as `array`): + +``` +$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; +$labels = ['a', 'a', 'a', 'b', 'b', 'b']; + +$classifier = new KNearestNeighbors(); +$classifier->train($samples, $labels); +``` + +### Predict + +To predict sample class use `predict` method. You can provide one sample or array of samples: + +``` +$classifier->predict([3, 2]); +// return 'b' + +$classifier->predict([[3, 2], [1, 5]]); +// return ['b', 'a'] +``` diff --git a/docs/machine-learning/cross-validation/randomsplit.md b/docs/machine-learning/cross-validation/randomsplit.md new file mode 100644 index 0000000..464f0db --- /dev/null +++ b/docs/machine-learning/cross-validation/randomsplit.md @@ -0,0 +1,29 @@ +# RandomSplit + +One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust number of samples in each group. + +### Constructor Parameters + +* $dataset - object that implements `Dataset` interface +* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3) +* $seed - seed for random generator (for tests) + +``` +$randomSplit = new RandomSplit($dataset, 0.2); +``` + +### Samples and labels groups + +To get samples or labels from test and train group you can use getters: + +``` +$dataset = new RandomSplit($dataset, 0.3, 1234); + +// train group +$dataset->getTrainSamples(); +$dataset->getTrainLabels(); + +// test group +$dataset->getTestSamples(); +$dataset->getTestLabels(); +``` diff --git a/docs/machine-learning/datasets/array-dataset.md b/docs/machine-learning/datasets/array-dataset.md new file mode 100644 index 0000000..5081ed8 --- /dev/null +++ b/docs/machine-learning/datasets/array-dataset.md @@ -0,0 +1,21 @@ +# ArrayDataset + +Helper class that holds data as PHP `array` type. Implements the `Dataset` interface which is used heavily in other classes. + +### Constructors Parameters + +* $samples - (array) of samples +* $labels - (array) of labels + +``` +$dataset = new ArrayDataset([[1, 1], [2, 1], [3, 2], [4, 1]], ['a', 'a', 'b', 'b']); +``` + +### Samples and labels + +To get samples or labels you can use getters: + +``` +$dataset->getSamples(); +$dataset->getLabels(); +``` diff --git a/docs/machine-learning/datasets/csv-dataset.md b/docs/machine-learning/datasets/csv-dataset.md new file mode 100644 index 0000000..553bc60 --- /dev/null +++ b/docs/machine-learning/datasets/csv-dataset.md @@ -0,0 +1,15 @@ +# CsvDataset + +Helper class that loads data from CSV file. It extends the `ArrayDataset`. + +### Constructors Parameters + +* $filepath - (string) path to `.csv` file +* $features - (int) number of columns that are features (starts from first column), last column must be a label +* $headingRow - (bool) define is file have a heading row (if `true` then first row will be ignored) + +``` +$dataset = new CsvDataset('dataset.csv', 2, true); +``` + +See Array Dataset for more information. diff --git a/docs/machine-learning/datasets/demo/iris.md b/docs/machine-learning/datasets/demo/iris.md new file mode 100644 index 0000000..9e00d5c --- /dev/null +++ b/docs/machine-learning/datasets/demo/iris.md @@ -0,0 +1,34 @@ +# Iris Dataset + +Most popular and widely available dataset of iris flower measurement and class names. + +### Specification + +| Classes | 3 | +| Samples per class | 50 | +| Samples total | 150 | +| Features per sample | 4 | + +### Load + +To load Iris dataset simple use: + +``` +$dataset = new Iris(); +``` + +### Several samples + +``` +sepal length,sepal width,petal length,petal width,class +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginicacs +``` diff --git a/docs/machine-learning/metric/accuracy.md b/docs/machine-learning/metric/accuracy.md new file mode 100644 index 0000000..b8ec70a --- /dev/null +++ b/docs/machine-learning/metric/accuracy.md @@ -0,0 +1,24 @@ +# Accuracy + +Class for calculate classifier accuracy. + +### Score + +To calculate classifier accuracy score use `score` static method. Parametrs: + +* $actualLabels - (array) true sample labels +* $predictedLabels - (array) predicted labels (e.x. from test group) +* $normalize - (bool) normalize or not the result (default: true) + +### Example + +``` +$actualLabels = ['a', 'b', 'a', 'b']; +$predictedLabels = ['a', 'a', 'a', 'b']; + +Accuracy::score($actualLabels, $predictedLabels); +// return 0.75 + +Accuracy::score($actualLabels, $predictedLabels, false); +// return 3 +``` diff --git a/docs/machine-learning/metric/distance.md b/docs/machine-learning/metric/distance.md new file mode 100644 index 0000000..de8bcb1 --- /dev/null +++ b/docs/machine-learning/metric/distance.md @@ -0,0 +1,17 @@ +# Distance + +Special class for calculation of different types of distance. + +### Euclidean + +![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance") + +To calculate euclidean distance: + +``` +$a = [4, 6]; +$b = [2, 5]; + +Distance::euclidean($a, $b); +// return 2.2360679774998 +``` diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..f20036f --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,17 @@ +site_name: PHP Machine Learning (PHP-ML) +pages: + - Home: index.md + - Machine Learning: + - Classification: + - KNearestNeighbors: machine-learning/classification/knearestneighbors.md + - Cross Validation: + - RandomSplit: machine-learning/cross-validation/randomsplit.md + - Datasets: + - Array Dataset: machine-learning/datasets/array-dataset.md + - CSV Dataset: machine-learning/datasets/csv-dataset.md + - Demo: + - Iris: machine-learning/datasets/demo/iris.md + - Metric: + - Accuracy: machine-learning/metric/accuracy.md + - Distance: machine-learning/metric/distance.md +theme: readthedocs \ No newline at end of file diff --git a/tests/Phpml/CrossValidation/RandomSplitTest.php b/tests/Phpml/CrossValidation/RandomSplitTest.php index e6ae30e..d31c6a6 100644 --- a/tests/Phpml/CrossValidation/RandomSplitTest.php +++ b/tests/Phpml/CrossValidation/RandomSplitTest.php @@ -32,10 +32,10 @@ class RandomSplitTest extends \PHPUnit_Framework_TestCase $labels = ['a', 'a', 'b', 'b'] ); - $randomSplit1 = new RandomSplit($dataset, 0.5); + $randomSplit = new RandomSplit($dataset, 0.5); - $this->assertEquals(2, count($randomSplit1->getTestSamples())); - $this->assertEquals(2, count($randomSplit1->getTrainSamples())); + $this->assertEquals(2, count($randomSplit->getTestSamples())); + $this->assertEquals(2, count($randomSplit->getTrainSamples())); $randomSplit2 = new RandomSplit($dataset, 0.25); From 04763b16f0065115fd05a799b111c26b96562c01 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Sat, 9 Apr 2016 00:50:08 +0200 Subject: [PATCH 27/27] add documentation link --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 568c3ab..feac3e5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ -# PHP Machine learning library +# PHP Machine Learning library [![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop) +[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop) [![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml) [![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop) @@ -9,7 +10,7 @@ Fresh approach to machine learning in PHP. Note that at the moment PHP is not th ## Documentation -The documentation will be available on `Read the Docs` soon ... +To find out how to use PHP-ML follow [Documentation](php-ml.readthedocs.org). ## Installation