mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-01-25 08:08:31 +00:00
commit
662627c052
25
README.md
25
README.md
@ -1,18 +1,24 @@
|
||||
# PHP Machine learning library
|
||||
# PHP Machine Learning library
|
||||
|
||||
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
|
||||
[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop)
|
||||
[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml)
|
||||
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
|
||||
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
|
||||
|
||||
Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
|
||||
|
||||
## Available Algorithms
|
||||
## Documentation
|
||||
|
||||
### Classification
|
||||
|
||||
Identifying to which category an object belongs to.
|
||||
|
||||
* **Naive Bayes** - algorithm based on applying Bayes’ theorem with the “naive” assumption of independence between every pair of features
|
||||
To find out how to use PHP-ML follow [Documentation](php-ml.readthedocs.org).
|
||||
|
||||
## Installation
|
||||
|
||||
This repo will be published do packagist.org soon...
|
||||
Currently this library is in the process of developing, but You can install it with Composer:
|
||||
|
||||
```
|
||||
composer require php-ai/php-ml
|
||||
```
|
||||
|
||||
## To-Do
|
||||
|
||||
@ -21,7 +27,7 @@ This repo will be published do packagist.org soon...
|
||||
|
||||
## Testing
|
||||
|
||||
After installation, you can launch the test suite in project root directory (you will need to install dev requiremnts with composer)
|
||||
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer)
|
||||
|
||||
```
|
||||
bin/phpunit
|
||||
@ -33,3 +39,4 @@ PHP-ML is released under the MIT Licence. See the bundled LICENSE file for detai
|
||||
|
||||
## Author
|
||||
|
||||
Arkadiusz Kondas (@ArkadiuszKondas)
|
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "php-ai/php-ml",
|
||||
"type": "library",
|
||||
"description": "PHP Machine learning library",
|
||||
"description": "PHP Machine Learning library",
|
||||
"license": "MIT",
|
||||
"keywords": ["machine learning","pattern recognition","computational learning theory","artificial intelligence"],
|
||||
"homepage": "https://github.com/php-ai/php-ml",
|
||||
@ -11,11 +11,16 @@
|
||||
"email": "arkadiusz.kondas@gmail.com"
|
||||
}
|
||||
],
|
||||
"autoload": {
|
||||
"psr-0": {
|
||||
"Phpml": "src/"
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"bin-dir": "bin"
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.5.0"
|
||||
"php": ">=7.0.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^5.2"
|
||||
|
185
composer.lock
generated
185
composer.lock
generated
@ -4,8 +4,8 @@
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"hash": "640f762012a359b150ce245491743448",
|
||||
"content-hash": "5efa8db5a672e2128d20c80c18746c72",
|
||||
"hash": "7c34eebd6b8749a1cd09df57e5d1f47a",
|
||||
"content-hash": "087091d0c339e9fa3a551a189ea658bf",
|
||||
"packages": [],
|
||||
"packages-dev": [
|
||||
{
|
||||
@ -155,22 +155,24 @@
|
||||
},
|
||||
{
|
||||
"name": "phpspec/prophecy",
|
||||
"version": "v1.5.0",
|
||||
"version": "v1.6.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/phpspec/prophecy.git",
|
||||
"reference": "4745ded9307786b730d7a60df5cb5a6c43cf95f7"
|
||||
"reference": "3c91bdf81797d725b14cb62906f9a4ce44235972"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/phpspec/prophecy/zipball/4745ded9307786b730d7a60df5cb5a6c43cf95f7",
|
||||
"reference": "4745ded9307786b730d7a60df5cb5a6c43cf95f7",
|
||||
"url": "https://api.github.com/repos/phpspec/prophecy/zipball/3c91bdf81797d725b14cb62906f9a4ce44235972",
|
||||
"reference": "3c91bdf81797d725b14cb62906f9a4ce44235972",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"doctrine/instantiator": "^1.0.2",
|
||||
"php": "^5.3|^7.0",
|
||||
"phpdocumentor/reflection-docblock": "~2.0",
|
||||
"sebastian/comparator": "~1.1"
|
||||
"sebastian/comparator": "~1.1",
|
||||
"sebastian/recursion-context": "~1.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpspec/phpspec": "~2.0"
|
||||
@ -178,7 +180,7 @@
|
||||
"type": "library",
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "1.4.x-dev"
|
||||
"dev-master": "1.5.x-dev"
|
||||
}
|
||||
},
|
||||
"autoload": {
|
||||
@ -211,27 +213,28 @@
|
||||
"spy",
|
||||
"stub"
|
||||
],
|
||||
"time": "2015-08-13 10:07:40"
|
||||
"time": "2016-02-15 07:46:21"
|
||||
},
|
||||
{
|
||||
"name": "phpunit/php-code-coverage",
|
||||
"version": "3.1.1",
|
||||
"version": "3.3.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/php-code-coverage.git",
|
||||
"reference": "92f5c61b5c64159faec5298325ffab0c7e59dcc8"
|
||||
"reference": "fe33716763b604ade4cb442c0794f5bd5ad73004"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/92f5c61b5c64159faec5298325ffab0c7e59dcc8",
|
||||
"reference": "92f5c61b5c64159faec5298325ffab0c7e59dcc8",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/fe33716763b604ade4cb442c0794f5bd5ad73004",
|
||||
"reference": "fe33716763b604ade4cb442c0794f5bd5ad73004",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.6",
|
||||
"php": "^5.6 || ^7.0",
|
||||
"phpunit/php-file-iterator": "~1.3",
|
||||
"phpunit/php-text-template": "~1.2",
|
||||
"phpunit/php-token-stream": "~1.3",
|
||||
"phpunit/php-token-stream": "^1.4.2",
|
||||
"sebastian/code-unit-reverse-lookup": "~1.0",
|
||||
"sebastian/environment": "^1.3.2",
|
||||
"sebastian/version": "~1.0|~2.0"
|
||||
},
|
||||
@ -247,7 +250,7 @@
|
||||
"type": "library",
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "3.1.x-dev"
|
||||
"dev-master": "3.3.x-dev"
|
||||
}
|
||||
},
|
||||
"autoload": {
|
||||
@ -273,7 +276,7 @@
|
||||
"testing",
|
||||
"xunit"
|
||||
],
|
||||
"time": "2016-02-04 13:05:19"
|
||||
"time": "2016-03-03 08:49:08"
|
||||
},
|
||||
{
|
||||
"name": "phpunit/php-file-iterator",
|
||||
@ -455,16 +458,16 @@
|
||||
},
|
||||
{
|
||||
"name": "phpunit/phpunit",
|
||||
"version": "5.2.3",
|
||||
"version": "5.3.1",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/phpunit.git",
|
||||
"reference": "6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59"
|
||||
"reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59",
|
||||
"reference": "6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/34a3acb401ae79deb37bc6e5f5ec3d325b369b4c",
|
||||
"reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -474,18 +477,19 @@
|
||||
"ext-reflection": "*",
|
||||
"ext-spl": "*",
|
||||
"myclabs/deep-copy": "~1.3",
|
||||
"php": ">=5.6",
|
||||
"php": "^5.6 || ^7.0",
|
||||
"phpspec/prophecy": "^1.3.1",
|
||||
"phpunit/php-code-coverage": "~3.0",
|
||||
"phpunit/php-code-coverage": "^3.3.0",
|
||||
"phpunit/php-file-iterator": "~1.4",
|
||||
"phpunit/php-text-template": "~1.2",
|
||||
"phpunit/php-timer": ">=1.0.6",
|
||||
"phpunit/phpunit-mock-objects": ">=3.0.5",
|
||||
"phpunit/php-timer": "^1.0.6",
|
||||
"phpunit/phpunit-mock-objects": "^3.1",
|
||||
"sebastian/comparator": "~1.1",
|
||||
"sebastian/diff": "~1.2",
|
||||
"sebastian/environment": "~1.3",
|
||||
"sebastian/exporter": "~1.2",
|
||||
"sebastian/global-state": "~1.0",
|
||||
"sebastian/object-enumerator": "~1.0",
|
||||
"sebastian/resource-operations": "~1.0",
|
||||
"sebastian/version": "~1.0|~2.0",
|
||||
"symfony/yaml": "~2.1|~3.0"
|
||||
@ -499,7 +503,7 @@
|
||||
"type": "library",
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "5.2.x-dev"
|
||||
"dev-master": "5.3.x-dev"
|
||||
}
|
||||
},
|
||||
"autoload": {
|
||||
@ -525,20 +529,20 @@
|
||||
"testing",
|
||||
"xunit"
|
||||
],
|
||||
"time": "2016-02-08 12:15:53"
|
||||
"time": "2016-04-07 07:04:34"
|
||||
},
|
||||
{
|
||||
"name": "phpunit/phpunit-mock-objects",
|
||||
"version": "3.0.6",
|
||||
"version": "3.1.2",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git",
|
||||
"reference": "49bc700750196c04dd6bc2c4c99cb632b893836b"
|
||||
"reference": "7c34c9bdde4131b824086457a3145e27dba10ca1"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/49bc700750196c04dd6bc2c4c99cb632b893836b",
|
||||
"reference": "49bc700750196c04dd6bc2c4c99cb632b893836b",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/7c34c9bdde4131b824086457a3145e27dba10ca1",
|
||||
"reference": "7c34c9bdde4131b824086457a3145e27dba10ca1",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -556,7 +560,7 @@
|
||||
"type": "library",
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "3.0.x-dev"
|
||||
"dev-master": "3.1.x-dev"
|
||||
}
|
||||
},
|
||||
"autoload": {
|
||||
@ -581,7 +585,52 @@
|
||||
"mock",
|
||||
"xunit"
|
||||
],
|
||||
"time": "2015-12-08 08:47:06"
|
||||
"time": "2016-03-24 05:58:25"
|
||||
},
|
||||
{
|
||||
"name": "sebastian/code-unit-reverse-lookup",
|
||||
"version": "1.0.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git",
|
||||
"reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/c36f5e7cfce482fde5bf8d10d41a53591e0198fe",
|
||||
"reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.6"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~5"
|
||||
},
|
||||
"type": "library",
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "1.0.x-dev"
|
||||
}
|
||||
},
|
||||
"autoload": {
|
||||
"classmap": [
|
||||
"src/"
|
||||
]
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"BSD-3-Clause"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Sebastian Bergmann",
|
||||
"email": "sebastian@phpunit.de"
|
||||
}
|
||||
],
|
||||
"description": "Looks up which function or method a line of code belongs to",
|
||||
"homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/",
|
||||
"time": "2016-02-13 06:45:14"
|
||||
},
|
||||
{
|
||||
"name": "sebastian/comparator",
|
||||
@ -701,16 +750,16 @@
|
||||
},
|
||||
{
|
||||
"name": "sebastian/environment",
|
||||
"version": "1.3.3",
|
||||
"version": "1.3.5",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/environment.git",
|
||||
"reference": "6e7133793a8e5a5714a551a8324337374be209df"
|
||||
"reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/6e7133793a8e5a5714a551a8324337374be209df",
|
||||
"reference": "6e7133793a8e5a5714a551a8324337374be209df",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf",
|
||||
"reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -747,7 +796,7 @@
|
||||
"environment",
|
||||
"hhvm"
|
||||
],
|
||||
"time": "2015-12-02 08:37:27"
|
||||
"time": "2016-02-26 18:40:46"
|
||||
},
|
||||
{
|
||||
"name": "sebastian/exporter",
|
||||
@ -866,6 +915,52 @@
|
||||
],
|
||||
"time": "2015-10-12 03:26:01"
|
||||
},
|
||||
{
|
||||
"name": "sebastian/object-enumerator",
|
||||
"version": "1.0.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/object-enumerator.git",
|
||||
"reference": "d4ca2fb70344987502567bc50081c03e6192fb26"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/d4ca2fb70344987502567bc50081c03e6192fb26",
|
||||
"reference": "d4ca2fb70344987502567bc50081c03e6192fb26",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.6",
|
||||
"sebastian/recursion-context": "~1.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~5"
|
||||
},
|
||||
"type": "library",
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "1.0.x-dev"
|
||||
}
|
||||
},
|
||||
"autoload": {
|
||||
"classmap": [
|
||||
"src/"
|
||||
]
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"BSD-3-Clause"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Sebastian Bergmann",
|
||||
"email": "sebastian@phpunit.de"
|
||||
}
|
||||
],
|
||||
"description": "Traverses array structures and object graphs to enumerate all referenced objects",
|
||||
"homepage": "https://github.com/sebastianbergmann/object-enumerator/",
|
||||
"time": "2016-01-28 13:25:10"
|
||||
},
|
||||
{
|
||||
"name": "sebastian/recursion-context",
|
||||
"version": "1.0.2",
|
||||
@ -1006,16 +1101,16 @@
|
||||
},
|
||||
{
|
||||
"name": "symfony/yaml",
|
||||
"version": "v3.0.2",
|
||||
"version": "v3.0.4",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/symfony/yaml.git",
|
||||
"reference": "3cf0709d7fe936e97bee9e954382e449003f1d9a"
|
||||
"reference": "0047c8366744a16de7516622c5b7355336afae96"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/symfony/yaml/zipball/3cf0709d7fe936e97bee9e954382e449003f1d9a",
|
||||
"reference": "3cf0709d7fe936e97bee9e954382e449003f1d9a",
|
||||
"url": "https://api.github.com/repos/symfony/yaml/zipball/0047c8366744a16de7516622c5b7355336afae96",
|
||||
"reference": "0047c8366744a16de7516622c5b7355336afae96",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -1051,7 +1146,7 @@
|
||||
],
|
||||
"description": "Symfony Yaml Component",
|
||||
"homepage": "https://symfony.com",
|
||||
"time": "2016-02-02 13:44:19"
|
||||
"time": "2016-03-04 07:55:57"
|
||||
}
|
||||
],
|
||||
"aliases": [],
|
||||
@ -1059,6 +1154,8 @@
|
||||
"stability-flags": [],
|
||||
"prefer-stable": false,
|
||||
"prefer-lowest": false,
|
||||
"platform": [],
|
||||
"platform": {
|
||||
"php": ">=7.0.0"
|
||||
},
|
||||
"platform-dev": []
|
||||
}
|
||||
|
37
docs/index.md
Normal file
37
docs/index.md
Normal file
@ -0,0 +1,37 @@
|
||||
# PHP Machine Learning (PHP-ML)
|
||||
|
||||
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
|
||||
[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml)
|
||||
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
|
||||
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
|
||||
|
||||
Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
|
||||
|
||||
## Installation
|
||||
|
||||
Currently this library is in the process of developing, but You can install it with Composer:
|
||||
|
||||
```
|
||||
composer require php-ai/php-ml
|
||||
```
|
||||
|
||||
## To-Do
|
||||
|
||||
* implements more algorithms
|
||||
* integration with Lavacharts for data visualization
|
||||
|
||||
## Testing
|
||||
|
||||
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer)
|
||||
|
||||
```
|
||||
bin/phpunit
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
PHP-ML is released under the MIT Licence. See the bundled LICENSE file for details.
|
||||
|
||||
## Author
|
||||
|
||||
Arkadiusz Kondas (@ArkadiuszKondas)
|
35
docs/machine-learning/classification/knearestneighbors.md
Normal file
35
docs/machine-learning/classification/knearestneighbors.md
Normal file
@ -0,0 +1,35 @@
|
||||
# KNearestNeighbors Classifier
|
||||
|
||||
Classifier implementing the k-nearest neighbors algorithm.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
* $k - number of nearest neighbors to scan (default: 3)
|
||||
|
||||
```
|
||||
$classifier = new KNearestNeighbors($k=4);
|
||||
```
|
||||
|
||||
### Train
|
||||
|
||||
To train a classifier simply provide train samples and labels (as `array`):
|
||||
|
||||
```
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$classifier = new KNearestNeighbors();
|
||||
$classifier->train($samples, $labels);
|
||||
```
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample class use `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$classifier->predict([3, 2]);
|
||||
// return 'b'
|
||||
|
||||
$classifier->predict([[3, 2], [1, 5]]);
|
||||
// return ['b', 'a']
|
||||
```
|
29
docs/machine-learning/cross-validation/randomsplit.md
Normal file
29
docs/machine-learning/cross-validation/randomsplit.md
Normal file
@ -0,0 +1,29 @@
|
||||
# RandomSplit
|
||||
|
||||
One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust number of samples in each group.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
* $dataset - object that implements `Dataset` interface
|
||||
* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3)
|
||||
* $seed - seed for random generator (for tests)
|
||||
|
||||
```
|
||||
$randomSplit = new RandomSplit($dataset, 0.2);
|
||||
```
|
||||
|
||||
### Samples and labels groups
|
||||
|
||||
To get samples or labels from test and train group you can use getters:
|
||||
|
||||
```
|
||||
$dataset = new RandomSplit($dataset, 0.3, 1234);
|
||||
|
||||
// train group
|
||||
$dataset->getTrainSamples();
|
||||
$dataset->getTrainLabels();
|
||||
|
||||
// test group
|
||||
$dataset->getTestSamples();
|
||||
$dataset->getTestLabels();
|
||||
```
|
21
docs/machine-learning/datasets/array-dataset.md
Normal file
21
docs/machine-learning/datasets/array-dataset.md
Normal file
@ -0,0 +1,21 @@
|
||||
# ArrayDataset
|
||||
|
||||
Helper class that holds data as PHP `array` type. Implements the `Dataset` interface which is used heavily in other classes.
|
||||
|
||||
### Constructors Parameters
|
||||
|
||||
* $samples - (array) of samples
|
||||
* $labels - (array) of labels
|
||||
|
||||
```
|
||||
$dataset = new ArrayDataset([[1, 1], [2, 1], [3, 2], [4, 1]], ['a', 'a', 'b', 'b']);
|
||||
```
|
||||
|
||||
### Samples and labels
|
||||
|
||||
To get samples or labels you can use getters:
|
||||
|
||||
```
|
||||
$dataset->getSamples();
|
||||
$dataset->getLabels();
|
||||
```
|
15
docs/machine-learning/datasets/csv-dataset.md
Normal file
15
docs/machine-learning/datasets/csv-dataset.md
Normal file
@ -0,0 +1,15 @@
|
||||
# CsvDataset
|
||||
|
||||
Helper class that loads data from CSV file. It extends the `ArrayDataset`.
|
||||
|
||||
### Constructors Parameters
|
||||
|
||||
* $filepath - (string) path to `.csv` file
|
||||
* $features - (int) number of columns that are features (starts from first column), last column must be a label
|
||||
* $headingRow - (bool) define is file have a heading row (if `true` then first row will be ignored)
|
||||
|
||||
```
|
||||
$dataset = new CsvDataset('dataset.csv', 2, true);
|
||||
```
|
||||
|
||||
See Array Dataset for more information.
|
34
docs/machine-learning/datasets/demo/iris.md
Normal file
34
docs/machine-learning/datasets/demo/iris.md
Normal file
@ -0,0 +1,34 @@
|
||||
# Iris Dataset
|
||||
|
||||
Most popular and widely available dataset of iris flower measurement and class names.
|
||||
|
||||
### Specification
|
||||
|
||||
| Classes | 3 |
|
||||
| Samples per class | 50 |
|
||||
| Samples total | 150 |
|
||||
| Features per sample | 4 |
|
||||
|
||||
### Load
|
||||
|
||||
To load Iris dataset simple use:
|
||||
|
||||
```
|
||||
$dataset = new Iris();
|
||||
```
|
||||
|
||||
### Several samples
|
||||
|
||||
```
|
||||
sepal length,sepal width,petal length,petal width,class
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginicacs
|
||||
```
|
24
docs/machine-learning/metric/accuracy.md
Normal file
24
docs/machine-learning/metric/accuracy.md
Normal file
@ -0,0 +1,24 @@
|
||||
# Accuracy
|
||||
|
||||
Class for calculate classifier accuracy.
|
||||
|
||||
### Score
|
||||
|
||||
To calculate classifier accuracy score use `score` static method. Parametrs:
|
||||
|
||||
* $actualLabels - (array) true sample labels
|
||||
* $predictedLabels - (array) predicted labels (e.x. from test group)
|
||||
* $normalize - (bool) normalize or not the result (default: true)
|
||||
|
||||
### Example
|
||||
|
||||
```
|
||||
$actualLabels = ['a', 'b', 'a', 'b'];
|
||||
$predictedLabels = ['a', 'a', 'a', 'b'];
|
||||
|
||||
Accuracy::score($actualLabels, $predictedLabels);
|
||||
// return 0.75
|
||||
|
||||
Accuracy::score($actualLabels, $predictedLabels, false);
|
||||
// return 3
|
||||
```
|
17
docs/machine-learning/metric/distance.md
Normal file
17
docs/machine-learning/metric/distance.md
Normal file
@ -0,0 +1,17 @@
|
||||
# Distance
|
||||
|
||||
Special class for calculation of different types of distance.
|
||||
|
||||
### Euclidean
|
||||
|
||||
![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance")
|
||||
|
||||
To calculate euclidean distance:
|
||||
|
||||
```
|
||||
$a = [4, 6];
|
||||
$b = [2, 5];
|
||||
|
||||
Distance::euclidean($a, $b);
|
||||
// return 2.2360679774998
|
||||
```
|
17
mkdocs.yml
Normal file
17
mkdocs.yml
Normal file
@ -0,0 +1,17 @@
|
||||
site_name: PHP Machine Learning (PHP-ML)
|
||||
pages:
|
||||
- Home: index.md
|
||||
- Machine Learning:
|
||||
- Classification:
|
||||
- KNearestNeighbors: machine-learning/classification/knearestneighbors.md
|
||||
- Cross Validation:
|
||||
- RandomSplit: machine-learning/cross-validation/randomsplit.md
|
||||
- Datasets:
|
||||
- Array Dataset: machine-learning/datasets/array-dataset.md
|
||||
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
||||
- Demo:
|
||||
- Iris: machine-learning/datasets/demo/iris.md
|
||||
- Metric:
|
||||
- Accuracy: machine-learning/metric/accuracy.md
|
||||
- Distance: machine-learning/metric/distance.md
|
||||
theme: readthedocs
|
14
phpunit.xml
Normal file
14
phpunit.xml
Normal file
@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<phpunit
|
||||
colors="true"
|
||||
beStrictAboutTestsThatDoNotTestAnything="true"
|
||||
beStrictAboutOutputDuringTests="true"
|
||||
beStrictAboutTestSize="true"
|
||||
beStrictAboutChangesToGlobalState="true"
|
||||
>
|
||||
<testsuites>
|
||||
<testsuite name="PHP-ML Test Suite">
|
||||
<directory>tests/*</directory>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
</phpunit>
|
@ -1,20 +1,21 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classifier;
|
||||
|
||||
interface Classifier
|
||||
{
|
||||
|
||||
/**
|
||||
* @param array $features
|
||||
* @param array $samples
|
||||
* @param array $labels
|
||||
*/
|
||||
public function train($features, $labels);
|
||||
public function train(array $samples, array $labels);
|
||||
|
||||
/**
|
||||
* @param mixed $feature
|
||||
* @param array $samples
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function predict($feature);
|
||||
|
||||
public function predict(array $samples);
|
||||
}
|
||||
|
105
src/Phpml/Classifier/KNearestNeighbors.php
Normal file
105
src/Phpml/Classifier/KNearestNeighbors.php
Normal file
@ -0,0 +1,105 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classifier;
|
||||
|
||||
use Phpml\Metric\Distance;
|
||||
|
||||
class KNearestNeighbors implements Classifier
|
||||
{
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $k;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $samples;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $labels;
|
||||
|
||||
/**
|
||||
* @param int $k
|
||||
*/
|
||||
public function __construct(int $k = 3)
|
||||
{
|
||||
$this->k = $k;
|
||||
$this->samples = [];
|
||||
$this->labels = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
* @param array $labels
|
||||
*/
|
||||
public function train(array $samples, array $labels)
|
||||
{
|
||||
$this->samples = $samples;
|
||||
$this->labels = $labels;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function predict(array $samples)
|
||||
{
|
||||
if (!is_array($samples[0])) {
|
||||
$predicted = $this->predictSample($samples);
|
||||
} else {
|
||||
$predicted = [];
|
||||
foreach ($samples as $index => $sample) {
|
||||
$predicted[$index] = $this->predictSample($sample);
|
||||
}
|
||||
}
|
||||
|
||||
return $predicted;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
private function predictSample(array $sample)
|
||||
{
|
||||
$distances = $this->kNeighborsDistances($sample);
|
||||
|
||||
$predictions = array_combine(array_values($this->labels), array_fill(0, count($this->labels), 0));
|
||||
|
||||
foreach ($distances as $index => $distance) {
|
||||
++$predictions[$this->labels[$index]];
|
||||
}
|
||||
|
||||
arsort($predictions);
|
||||
reset($predictions);
|
||||
|
||||
return key($predictions);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
*
|
||||
* @return array
|
||||
*
|
||||
* @throws \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
private function kNeighborsDistances(array $sample): array
|
||||
{
|
||||
$distances = [];
|
||||
|
||||
foreach ($this->samples as $index => $neighbor) {
|
||||
$distances[$index] = Distance::euclidean($sample, $neighbor);
|
||||
}
|
||||
|
||||
asort($distances);
|
||||
|
||||
return array_slice($distances, 0, $this->k, true);
|
||||
}
|
||||
}
|
@ -1,26 +1,25 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classifier;
|
||||
|
||||
abstract class NaiveBayes implements Classifier
|
||||
class NaiveBayes implements Classifier
|
||||
{
|
||||
|
||||
/**
|
||||
* @param array $features
|
||||
* @param array $samples
|
||||
* @param array $labels
|
||||
*/
|
||||
public function train($features, $labels)
|
||||
public function train(array $samples, array $labels)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $feature
|
||||
* @param array $samples
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function predict($feature)
|
||||
public function predict(array $samples)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,8 +0,0 @@
|
||||
<?php
|
||||
|
||||
use Phpml\Classifier\NaiveBayes;
|
||||
|
||||
class GaussianNaiveBayes extends NaiveBayes
|
||||
{
|
||||
|
||||
}
|
105
src/Phpml/CrossValidation/RandomSplit.php
Normal file
105
src/Phpml/CrossValidation/RandomSplit.php
Normal file
@ -0,0 +1,105 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\CrossValidation;
|
||||
|
||||
use Phpml\Dataset\Dataset;
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
class RandomSplit
|
||||
{
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $trainSamples = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $testSamples = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $trainLabels = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $testLabels = [];
|
||||
|
||||
/**
|
||||
* @param Dataset $dataset
|
||||
* @param float $testSize
|
||||
* @param int $seed
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function __construct(Dataset $dataset, float $testSize = 0.3, int $seed = null)
|
||||
{
|
||||
if (0 >= $testSize || 1 <= $testSize) {
|
||||
throw InvalidArgumentException::percentNotInRange('testSize');
|
||||
}
|
||||
$this->seedGenerator($seed);
|
||||
|
||||
$samples = $dataset->getSamples();
|
||||
$labels = $dataset->getLabels();
|
||||
$datasetSize = count($samples);
|
||||
|
||||
for ($i = $datasetSize; $i > 0; --$i) {
|
||||
$key = mt_rand(0, $datasetSize - 1);
|
||||
$setName = count($this->testSamples) / $datasetSize >= $testSize ? 'train' : 'test';
|
||||
|
||||
$this->{$setName.'Samples'}[] = $samples[$key];
|
||||
$this->{$setName.'Labels'}[] = $labels[$key];
|
||||
|
||||
$samples = array_values($samples);
|
||||
$labels = array_values($labels);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getTrainSamples()
|
||||
{
|
||||
return $this->trainSamples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getTestSamples()
|
||||
{
|
||||
return $this->testSamples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getTrainLabels()
|
||||
{
|
||||
return $this->trainLabels;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getTestLabels()
|
||||
{
|
||||
return $this->testLabels;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int|null $seed
|
||||
*/
|
||||
private function seedGenerator(int $seed = null)
|
||||
{
|
||||
if (null === $seed) {
|
||||
mt_srand();
|
||||
} else {
|
||||
mt_srand($seed);
|
||||
}
|
||||
}
|
||||
}
|
52
src/Phpml/Dataset/ArrayDataset.php
Normal file
52
src/Phpml/Dataset/ArrayDataset.php
Normal file
@ -0,0 +1,52 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Dataset;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
class ArrayDataset implements Dataset
|
||||
{
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $samples = [];
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected $labels = [];
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
* @param array $labels
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public function __construct(array $samples, array $labels)
|
||||
{
|
||||
if (count($samples) != count($labels)) {
|
||||
throw InvalidArgumentException::sizeNotMatch();
|
||||
}
|
||||
|
||||
$this->samples = $samples;
|
||||
$this->labels = $labels;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getSamples(): array
|
||||
{
|
||||
return $this->samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getLabels(): array
|
||||
{
|
||||
return $this->labels;
|
||||
}
|
||||
}
|
44
src/Phpml/Dataset/CsvDataset.php
Normal file
44
src/Phpml/Dataset/CsvDataset.php
Normal file
@ -0,0 +1,44 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Dataset;
|
||||
|
||||
use Phpml\Exception\DatasetException;
|
||||
|
||||
class CsvDataset extends ArrayDataset
|
||||
{
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected $filepath;
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
* @param int $features
|
||||
* @param bool $headingRow
|
||||
*
|
||||
* @throws DatasetException
|
||||
*/
|
||||
public function __construct(string $filepath, int $features, bool $headingRow = true)
|
||||
{
|
||||
if (!file_exists($filepath)) {
|
||||
throw DatasetException::missingFile(basename($filepath));
|
||||
}
|
||||
|
||||
$row = 0;
|
||||
if (($handle = fopen($filepath, 'r')) !== false) {
|
||||
while (($data = fgetcsv($handle, 1000, ',')) !== false) {
|
||||
++$row;
|
||||
if ($headingRow && $row == 1) {
|
||||
continue;
|
||||
}
|
||||
$this->samples[] = array_slice($data, 0, $features);
|
||||
$this->labels[] = $data[$features];
|
||||
}
|
||||
fclose($handle);
|
||||
} else {
|
||||
throw DatasetException::cantOpenFile(basename($filepath));
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +1,18 @@
|
||||
<?php
|
||||
|
||||
abstract class Dataset
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Dataset;
|
||||
|
||||
interface Dataset
|
||||
{
|
||||
/**
|
||||
* @var string
|
||||
* @return array
|
||||
*/
|
||||
protected $filepath;
|
||||
public function getSamples(): array;
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getLabels(): array;
|
||||
}
|
||||
|
22
src/Phpml/Dataset/Demo/Iris.php
Normal file
22
src/Phpml/Dataset/Demo/Iris.php
Normal file
@ -0,0 +1,22 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Dataset\Demo;
|
||||
|
||||
use Phpml\Dataset\CsvDataset;
|
||||
|
||||
/**
|
||||
* Classes: 3
|
||||
* Samples per class: 50
|
||||
* Samples total: 150
|
||||
* Features per sample: 4.
|
||||
*/
|
||||
class Iris extends CsvDataset
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
$filepath = dirname(__FILE__).'/../../../../data/iris.csv';
|
||||
parent::__construct($filepath, 4, true);
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
<?php
|
||||
|
||||
class Iris extends Dataset
|
||||
{
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected $filepath = 'iris.csv';
|
||||
|
||||
}
|
21
src/Phpml/Exception/DatasetException.php
Normal file
21
src/Phpml/Exception/DatasetException.php
Normal file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class DatasetException extends \Exception
|
||||
{
|
||||
/**
|
||||
* @return DatasetException
|
||||
*/
|
||||
public static function missingFile($filepath)
|
||||
{
|
||||
return new self(sprintf('Dataset file %s missing.', $filepath));
|
||||
}
|
||||
|
||||
public static function cantOpenFile($filepath)
|
||||
{
|
||||
return new self(sprintf('Dataset file %s can\'t be open.', $filepath));
|
||||
}
|
||||
}
|
26
src/Phpml/Exception/InvalidArgumentException.php
Normal file
26
src/Phpml/Exception/InvalidArgumentException.php
Normal file
@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class InvalidArgumentException extends \Exception
|
||||
{
|
||||
/**
|
||||
* @return InvalidArgumentException
|
||||
*/
|
||||
public static function sizeNotMatch()
|
||||
{
|
||||
return new self('Size of given arguments not match');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $name
|
||||
*
|
||||
* @return InvalidArgumentException
|
||||
*/
|
||||
public static function percentNotInRange($name)
|
||||
{
|
||||
return new self(sprintf('%s must be between 0.0 and 1.0', $name));
|
||||
}
|
||||
}
|
39
src/Phpml/Metric/Accuracy.php
Normal file
39
src/Phpml/Metric/Accuracy.php
Normal file
@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Metric;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
class Accuracy
|
||||
{
|
||||
/**
|
||||
* @param array $actualLabels
|
||||
* @param array $predictedLabels
|
||||
* @param bool $normalize
|
||||
*
|
||||
* @return float|int
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function score(array $actualLabels, array $predictedLabels, bool $normalize = true)
|
||||
{
|
||||
if (count($actualLabels) != count($predictedLabels)) {
|
||||
throw InvalidArgumentException::sizeNotMatch();
|
||||
}
|
||||
|
||||
$score = 0;
|
||||
foreach ($actualLabels as $index => $label) {
|
||||
if ($label === $predictedLabels[$index]) {
|
||||
++$score;
|
||||
}
|
||||
}
|
||||
|
||||
if ($normalize) {
|
||||
$score = $score / count($actualLabels);
|
||||
}
|
||||
|
||||
return $score;
|
||||
}
|
||||
}
|
34
src/Phpml/Metric/Distance.php
Normal file
34
src/Phpml/Metric/Distance.php
Normal file
@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Metric;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
class Distance
|
||||
{
|
||||
/**
|
||||
* @param array $a
|
||||
* @param array $b
|
||||
*
|
||||
* @return float
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function euclidean(array $a, array $b): float
|
||||
{
|
||||
if (count($a) != count($b)) {
|
||||
throw InvalidArgumentException::sizeNotMatch();
|
||||
}
|
||||
|
||||
$distance = 0;
|
||||
$count = count($a);
|
||||
|
||||
for ($i = 0; $i < $count; ++$i) {
|
||||
$distance += pow($a[$i] - $b[$i], 2);
|
||||
}
|
||||
|
||||
return sqrt($distance);
|
||||
}
|
||||
}
|
58
tests/Phpml/Classifier/KNearestNeighborsTest.php
Normal file
58
tests/Phpml/Classifier/KNearestNeighborsTest.php
Normal file
@ -0,0 +1,58 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Classifier;
|
||||
|
||||
use Phpml\Classifier\KNearestNeighbors;
|
||||
use Phpml\CrossValidation\RandomSplit;
|
||||
use Phpml\Dataset\Demo\Iris;
|
||||
use Phpml\Metric\Accuracy;
|
||||
|
||||
class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testPredictSingleSampleWithDefaultK()
|
||||
{
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$classifier = new KNearestNeighbors();
|
||||
$classifier->train($samples, $labels);
|
||||
|
||||
$this->assertEquals('b', $classifier->predict([3, 2]));
|
||||
$this->assertEquals('b', $classifier->predict([5, 1]));
|
||||
$this->assertEquals('b', $classifier->predict([4, 3]));
|
||||
$this->assertEquals('b', $classifier->predict([4, -5]));
|
||||
|
||||
$this->assertEquals('a', $classifier->predict([2, 3]));
|
||||
$this->assertEquals('a', $classifier->predict([1, 2]));
|
||||
$this->assertEquals('a', $classifier->predict([1, 5]));
|
||||
$this->assertEquals('a', $classifier->predict([3, 10]));
|
||||
}
|
||||
|
||||
public function testPredictArrayOfSamples()
|
||||
{
|
||||
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]];
|
||||
$testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'];
|
||||
|
||||
$classifier = new KNearestNeighbors();
|
||||
$classifier->train($trainSamples, $trainLabels);
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$this->assertEquals($testLabels, $predicted);
|
||||
}
|
||||
|
||||
public function testAccuracyOnIrisDataset()
|
||||
{
|
||||
$dataset = new RandomSplit(new Iris(), $testSize = 0.5, $seed = 123);
|
||||
$classifier = new KNearestNeighbors($k = 4);
|
||||
$classifier->train($dataset->getTrainSamples(), $dataset->getTrainLabels());
|
||||
$predicted = $classifier->predict($dataset->getTestSamples());
|
||||
$score = Accuracy::score($dataset->getTestLabels(), $predicted);
|
||||
|
||||
$this->assertEquals(0.96, $score);
|
||||
}
|
||||
}
|
94
tests/Phpml/CrossValidation/RandomSplitTest.php
Normal file
94
tests/Phpml/CrossValidation/RandomSplitTest.php
Normal file
@ -0,0 +1,94 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Phpml\CrossValidation;
|
||||
|
||||
use Phpml\CrossValidation\RandomSplit;
|
||||
use Phpml\Dataset\ArrayDataset;
|
||||
|
||||
class RandomSplitTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testThrowExceptionOnToSmallTestSize()
|
||||
{
|
||||
new RandomSplit(new ArrayDataset([], []), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testThrowExceptionOnToBigTestSize()
|
||||
{
|
||||
new RandomSplit(new ArrayDataset([], []), 1);
|
||||
}
|
||||
|
||||
public function testDatasetRandomSplitWithoutSeed()
|
||||
{
|
||||
$dataset = new ArrayDataset(
|
||||
$samples = [[1], [2], [3], [4]],
|
||||
$labels = ['a', 'a', 'b', 'b']
|
||||
);
|
||||
|
||||
$randomSplit = new RandomSplit($dataset, 0.5);
|
||||
|
||||
$this->assertEquals(2, count($randomSplit->getTestSamples()));
|
||||
$this->assertEquals(2, count($randomSplit->getTrainSamples()));
|
||||
|
||||
$randomSplit2 = new RandomSplit($dataset, 0.25);
|
||||
|
||||
$this->assertEquals(1, count($randomSplit2->getTestSamples()));
|
||||
$this->assertEquals(3, count($randomSplit2->getTrainSamples()));
|
||||
}
|
||||
|
||||
public function testDatasetRandomSplitWithSameSeed()
|
||||
{
|
||||
$dataset = new ArrayDataset(
|
||||
$samples = [[1], [2], [3], [4], [5], [6], [7], [8]],
|
||||
$labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b']
|
||||
);
|
||||
|
||||
$seed = 123;
|
||||
|
||||
$randomSplit1 = new RandomSplit($dataset, 0.5, $seed);
|
||||
$randomSplit2 = new RandomSplit($dataset, 0.5, $seed);
|
||||
|
||||
$this->assertEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels());
|
||||
$this->assertEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples());
|
||||
$this->assertEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels());
|
||||
$this->assertEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples());
|
||||
}
|
||||
|
||||
public function testDatasetRandomSplitWithDifferentSeed()
|
||||
{
|
||||
$dataset = new ArrayDataset(
|
||||
$samples = [[1], [2], [3], [4], [5], [6], [7], [8]],
|
||||
$labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b']
|
||||
);
|
||||
|
||||
$randomSplit1 = new RandomSplit($dataset, 0.5, 4321);
|
||||
$randomSplit2 = new RandomSplit($dataset, 0.5, 1234);
|
||||
|
||||
$this->assertNotEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels());
|
||||
$this->assertNotEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples());
|
||||
$this->assertNotEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels());
|
||||
$this->assertNotEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples());
|
||||
}
|
||||
|
||||
public function testRandomSplitCorrectSampleAndLabelPosition()
|
||||
{
|
||||
$dataset = new ArrayDataset(
|
||||
$samples = [[1], [2], [3], [4]],
|
||||
$labels = [1, 2, 3, 4]
|
||||
);
|
||||
|
||||
$randomSplit = new RandomSplit($dataset, 0.5);
|
||||
|
||||
$this->assertEquals($randomSplit->getTestSamples()[0][0], $randomSplit->getTestLabels()[0]);
|
||||
$this->assertEquals($randomSplit->getTestSamples()[1][0], $randomSplit->getTestLabels()[1]);
|
||||
$this->assertEquals($randomSplit->getTrainSamples()[0][0], $randomSplit->getTrainLabels()[0]);
|
||||
$this->assertEquals($randomSplit->getTrainSamples()[1][0], $randomSplit->getTrainLabels()[1]);
|
||||
}
|
||||
}
|
29
tests/Phpml/Dataset/ArrayDatasetTest.php
Normal file
29
tests/Phpml/Dataset/ArrayDatasetTest.php
Normal file
@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Phpml\Dataset;
|
||||
|
||||
use Phpml\Dataset\ArrayDataset;
|
||||
|
||||
class ArrayDatasetTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testThrowExceptionOnInvalidArgumentsSize()
|
||||
{
|
||||
new ArrayDataset([0, 1], [0]);
|
||||
}
|
||||
|
||||
public function testArrayDataset()
|
||||
{
|
||||
$dataset = new ArrayDataset(
|
||||
$samples = [[1], [2], [3], [4]],
|
||||
$labels = ['a', 'a', 'b', 'b']
|
||||
);
|
||||
|
||||
$this->assertEquals($samples, $dataset->getSamples());
|
||||
$this->assertEquals($labels, $dataset->getLabels());
|
||||
}
|
||||
}
|
28
tests/Phpml/Dataset/CsvDatasetTest.php
Normal file
28
tests/Phpml/Dataset/CsvDatasetTest.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Phpml\Dataset;
|
||||
|
||||
use Phpml\Dataset\CsvDataset;
|
||||
|
||||
class CsvDatasetTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\DatasetException
|
||||
*/
|
||||
public function testThrowExceptionOnMissingFile()
|
||||
{
|
||||
new CsvDataset('missingFile', 3);
|
||||
}
|
||||
|
||||
public function testSampleCsvDataset()
|
||||
{
|
||||
$filePath = dirname(__FILE__).'/Resources/dataset.csv';
|
||||
|
||||
$dataset = new CsvDataset($filePath, 2, true);
|
||||
|
||||
$this->assertEquals(10, count($dataset->getSamples()));
|
||||
$this->assertEquals(10, count($dataset->getLabels()));
|
||||
}
|
||||
}
|
22
tests/Phpml/Dataset/Demo/IrisTest.php
Normal file
22
tests/Phpml/Dataset/Demo/IrisTest.php
Normal file
@ -0,0 +1,22 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Phpml\Dataset\Demo;
|
||||
|
||||
use Phpml\Dataset\Demo\Iris;
|
||||
|
||||
class IrisTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testLoadingIrisDataset()
|
||||
{
|
||||
$iris = new Iris();
|
||||
|
||||
// whole dataset
|
||||
$this->assertEquals(150, count($iris->getSamples()));
|
||||
$this->assertEquals(150, count($iris->getLabels()));
|
||||
|
||||
// one sample features count
|
||||
$this->assertEquals(4, count($iris->getSamples()[0]));
|
||||
}
|
||||
}
|
11
tests/Phpml/Dataset/Resources/dataset.csv
Normal file
11
tests/Phpml/Dataset/Resources/dataset.csv
Normal file
@ -0,0 +1,11 @@
|
||||
feature1,feature2,label
|
||||
1,1,a
|
||||
2,1,b
|
||||
3,1,c
|
||||
4,5,a
|
||||
2,4,a
|
||||
1,5,a
|
||||
2,6,b
|
||||
3,7,c
|
||||
4,4,a
|
||||
2,0,a
|
|
37
tests/Phpml/Metric/AccuracyTest.php
Normal file
37
tests/Phpml/Metric/AccuracyTest.php
Normal file
@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Phpml\Metric;
|
||||
|
||||
use Phpml\Metric\Accuracy;
|
||||
|
||||
class AccuracyTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testThrowExceptionOnInvalidArguments()
|
||||
{
|
||||
$actualLabels = ['a', 'b', 'a', 'b'];
|
||||
$predictedLabels = ['a', 'a'];
|
||||
|
||||
Accuracy::score($actualLabels, $predictedLabels);
|
||||
}
|
||||
|
||||
public function testCalculateNormalizedScore()
|
||||
{
|
||||
$actualLabels = ['a', 'b', 'a', 'b'];
|
||||
$predictedLabels = ['a', 'a', 'b', 'b'];
|
||||
|
||||
$this->assertEquals(0.5, Accuracy::score($actualLabels, $predictedLabels));
|
||||
}
|
||||
|
||||
public function testCalculateNotNormalizedScore()
|
||||
{
|
||||
$actualLabels = ['a', 'b', 'a', 'b'];
|
||||
$predictedLabels = ['a', 'b', 'b', 'b'];
|
||||
|
||||
$this->assertEquals(3, Accuracy::score($actualLabels, $predictedLabels, false));
|
||||
}
|
||||
}
|
51
tests/Phpml/Metric/DistanceTest.php
Normal file
51
tests/Phpml/Metric/DistanceTest.php
Normal file
@ -0,0 +1,51 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Phpml\Metric;
|
||||
|
||||
use Phpml\Metric\Distance;
|
||||
|
||||
class DistanceTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testThrowExceptionOnInvalidArgumentsInEuclidean()
|
||||
{
|
||||
$a = [0, 1, 2];
|
||||
$b = [0, 2];
|
||||
|
||||
Distance::euclidean($a, $b);
|
||||
}
|
||||
|
||||
public function testCalculateEuclideanDistanceForOneDimension()
|
||||
{
|
||||
$a = [4];
|
||||
$b = [2];
|
||||
|
||||
$expectedDistance = 2;
|
||||
$actualDistance = Distance::euclidean($a, $b);
|
||||
|
||||
$this->assertEquals($expectedDistance, $actualDistance);
|
||||
}
|
||||
|
||||
public function testCalculateEuclideanDistanceForTwoAndMoreDimension()
|
||||
{
|
||||
$a = [4, 6];
|
||||
$b = [2, 5];
|
||||
|
||||
$expectedDistance = 2.2360679774998;
|
||||
$actualDistance = Distance::euclidean($a, $b);
|
||||
|
||||
$this->assertEquals($expectedDistance, $actualDistance);
|
||||
|
||||
$a = [6, 10, 3];
|
||||
$b = [2, 5, 5];
|
||||
|
||||
$expectedDistance = 6.7082039324993694;
|
||||
$actualDistance = Distance::euclidean($a, $b);
|
||||
|
||||
$this->assertEquals($expectedDistance, $actualDistance);
|
||||
}
|
||||
}
|
6
tools/php-cs-fixer.sh
Executable file
6
tools/php-cs-fixer.sh
Executable file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
echo "Fixing src/ folder"
|
||||
php-cs-fixer fix src/ --level=symfony
|
||||
|
||||
echo "Fixing tests/ folder"
|
||||
php-cs-fixer fix tests/ --level=symfony
|
Loading…
x
Reference in New Issue
Block a user