Merge pull request #2 from php-ai/develop

Develop to master
This commit is contained in:
Arkadiusz Kondas 2016-04-09 00:51:31 +02:00
commit 662627c052
36 changed files with 1217 additions and 91 deletions

View File

@ -1,18 +1,24 @@
# PHP Machine learning library
# PHP Machine Learning library
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop)
[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml)
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
## Available Algorithms
## Documentation
### Classification
Identifying to which category an object belongs to.
* **Naive Bayes** - algorithm based on applying Bayes theorem with the “naive” assumption of independence between every pair of features
To find out how to use PHP-ML follow [Documentation](php-ml.readthedocs.org).
## Installation
This repo will be published do packagist.org soon...
Currently this library is in the process of developing, but You can install it with Composer:
```
composer require php-ai/php-ml
```
## To-Do
@ -21,7 +27,7 @@ This repo will be published do packagist.org soon...
## Testing
After installation, you can launch the test suite in project root directory (you will need to install dev requiremnts with composer)
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer)
```
bin/phpunit
@ -33,3 +39,4 @@ PHP-ML is released under the MIT Licence. See the bundled LICENSE file for detai
## Author
Arkadiusz Kondas (@ArkadiuszKondas)

View File

@ -1,7 +1,7 @@
{
"name": "php-ai/php-ml",
"type": "library",
"description": "PHP Machine learning library",
"description": "PHP Machine Learning library",
"license": "MIT",
"keywords": ["machine learning","pattern recognition","computational learning theory","artificial intelligence"],
"homepage": "https://github.com/php-ai/php-ml",
@ -11,11 +11,16 @@
"email": "arkadiusz.kondas@gmail.com"
}
],
"autoload": {
"psr-0": {
"Phpml": "src/"
}
},
"config": {
"bin-dir": "bin"
},
"require": {
"php": ">=5.5.0"
"php": ">=7.0.0"
},
"require-dev": {
"phpunit/phpunit": "^5.2"

185
composer.lock generated
View File

@ -4,8 +4,8 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"hash": "640f762012a359b150ce245491743448",
"content-hash": "5efa8db5a672e2128d20c80c18746c72",
"hash": "7c34eebd6b8749a1cd09df57e5d1f47a",
"content-hash": "087091d0c339e9fa3a551a189ea658bf",
"packages": [],
"packages-dev": [
{
@ -155,22 +155,24 @@
},
{
"name": "phpspec/prophecy",
"version": "v1.5.0",
"version": "v1.6.0",
"source": {
"type": "git",
"url": "https://github.com/phpspec/prophecy.git",
"reference": "4745ded9307786b730d7a60df5cb5a6c43cf95f7"
"reference": "3c91bdf81797d725b14cb62906f9a4ce44235972"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/phpspec/prophecy/zipball/4745ded9307786b730d7a60df5cb5a6c43cf95f7",
"reference": "4745ded9307786b730d7a60df5cb5a6c43cf95f7",
"url": "https://api.github.com/repos/phpspec/prophecy/zipball/3c91bdf81797d725b14cb62906f9a4ce44235972",
"reference": "3c91bdf81797d725b14cb62906f9a4ce44235972",
"shasum": ""
},
"require": {
"doctrine/instantiator": "^1.0.2",
"php": "^5.3|^7.0",
"phpdocumentor/reflection-docblock": "~2.0",
"sebastian/comparator": "~1.1"
"sebastian/comparator": "~1.1",
"sebastian/recursion-context": "~1.0"
},
"require-dev": {
"phpspec/phpspec": "~2.0"
@ -178,7 +180,7 @@
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.4.x-dev"
"dev-master": "1.5.x-dev"
}
},
"autoload": {
@ -211,27 +213,28 @@
"spy",
"stub"
],
"time": "2015-08-13 10:07:40"
"time": "2016-02-15 07:46:21"
},
{
"name": "phpunit/php-code-coverage",
"version": "3.1.1",
"version": "3.3.0",
"source": {
"type": "git",
"url": "https://github.com/sebastianbergmann/php-code-coverage.git",
"reference": "92f5c61b5c64159faec5298325ffab0c7e59dcc8"
"reference": "fe33716763b604ade4cb442c0794f5bd5ad73004"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/92f5c61b5c64159faec5298325ffab0c7e59dcc8",
"reference": "92f5c61b5c64159faec5298325ffab0c7e59dcc8",
"url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/fe33716763b604ade4cb442c0794f5bd5ad73004",
"reference": "fe33716763b604ade4cb442c0794f5bd5ad73004",
"shasum": ""
},
"require": {
"php": ">=5.6",
"php": "^5.6 || ^7.0",
"phpunit/php-file-iterator": "~1.3",
"phpunit/php-text-template": "~1.2",
"phpunit/php-token-stream": "~1.3",
"phpunit/php-token-stream": "^1.4.2",
"sebastian/code-unit-reverse-lookup": "~1.0",
"sebastian/environment": "^1.3.2",
"sebastian/version": "~1.0|~2.0"
},
@ -247,7 +250,7 @@
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "3.1.x-dev"
"dev-master": "3.3.x-dev"
}
},
"autoload": {
@ -273,7 +276,7 @@
"testing",
"xunit"
],
"time": "2016-02-04 13:05:19"
"time": "2016-03-03 08:49:08"
},
{
"name": "phpunit/php-file-iterator",
@ -455,16 +458,16 @@
},
{
"name": "phpunit/phpunit",
"version": "5.2.3",
"version": "5.3.1",
"source": {
"type": "git",
"url": "https://github.com/sebastianbergmann/phpunit.git",
"reference": "6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59"
"reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59",
"reference": "6fdb1d3004ebc7071c4ac62f2881d67c5c11fb59",
"url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/34a3acb401ae79deb37bc6e5f5ec3d325b369b4c",
"reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c",
"shasum": ""
},
"require": {
@ -474,18 +477,19 @@
"ext-reflection": "*",
"ext-spl": "*",
"myclabs/deep-copy": "~1.3",
"php": ">=5.6",
"php": "^5.6 || ^7.0",
"phpspec/prophecy": "^1.3.1",
"phpunit/php-code-coverage": "~3.0",
"phpunit/php-code-coverage": "^3.3.0",
"phpunit/php-file-iterator": "~1.4",
"phpunit/php-text-template": "~1.2",
"phpunit/php-timer": ">=1.0.6",
"phpunit/phpunit-mock-objects": ">=3.0.5",
"phpunit/php-timer": "^1.0.6",
"phpunit/phpunit-mock-objects": "^3.1",
"sebastian/comparator": "~1.1",
"sebastian/diff": "~1.2",
"sebastian/environment": "~1.3",
"sebastian/exporter": "~1.2",
"sebastian/global-state": "~1.0",
"sebastian/object-enumerator": "~1.0",
"sebastian/resource-operations": "~1.0",
"sebastian/version": "~1.0|~2.0",
"symfony/yaml": "~2.1|~3.0"
@ -499,7 +503,7 @@
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "5.2.x-dev"
"dev-master": "5.3.x-dev"
}
},
"autoload": {
@ -525,20 +529,20 @@
"testing",
"xunit"
],
"time": "2016-02-08 12:15:53"
"time": "2016-04-07 07:04:34"
},
{
"name": "phpunit/phpunit-mock-objects",
"version": "3.0.6",
"version": "3.1.2",
"source": {
"type": "git",
"url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git",
"reference": "49bc700750196c04dd6bc2c4c99cb632b893836b"
"reference": "7c34c9bdde4131b824086457a3145e27dba10ca1"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/49bc700750196c04dd6bc2c4c99cb632b893836b",
"reference": "49bc700750196c04dd6bc2c4c99cb632b893836b",
"url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/7c34c9bdde4131b824086457a3145e27dba10ca1",
"reference": "7c34c9bdde4131b824086457a3145e27dba10ca1",
"shasum": ""
},
"require": {
@ -556,7 +560,7 @@
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "3.0.x-dev"
"dev-master": "3.1.x-dev"
}
},
"autoload": {
@ -581,7 +585,52 @@
"mock",
"xunit"
],
"time": "2015-12-08 08:47:06"
"time": "2016-03-24 05:58:25"
},
{
"name": "sebastian/code-unit-reverse-lookup",
"version": "1.0.0",
"source": {
"type": "git",
"url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git",
"reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/c36f5e7cfce482fde5bf8d10d41a53591e0198fe",
"reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe",
"shasum": ""
},
"require": {
"php": ">=5.6"
},
"require-dev": {
"phpunit/phpunit": "~5"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"classmap": [
"src/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"authors": [
{
"name": "Sebastian Bergmann",
"email": "sebastian@phpunit.de"
}
],
"description": "Looks up which function or method a line of code belongs to",
"homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/",
"time": "2016-02-13 06:45:14"
},
{
"name": "sebastian/comparator",
@ -701,16 +750,16 @@
},
{
"name": "sebastian/environment",
"version": "1.3.3",
"version": "1.3.5",
"source": {
"type": "git",
"url": "https://github.com/sebastianbergmann/environment.git",
"reference": "6e7133793a8e5a5714a551a8324337374be209df"
"reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/6e7133793a8e5a5714a551a8324337374be209df",
"reference": "6e7133793a8e5a5714a551a8324337374be209df",
"url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf",
"reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf",
"shasum": ""
},
"require": {
@ -747,7 +796,7 @@
"environment",
"hhvm"
],
"time": "2015-12-02 08:37:27"
"time": "2016-02-26 18:40:46"
},
{
"name": "sebastian/exporter",
@ -866,6 +915,52 @@
],
"time": "2015-10-12 03:26:01"
},
{
"name": "sebastian/object-enumerator",
"version": "1.0.0",
"source": {
"type": "git",
"url": "https://github.com/sebastianbergmann/object-enumerator.git",
"reference": "d4ca2fb70344987502567bc50081c03e6192fb26"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/d4ca2fb70344987502567bc50081c03e6192fb26",
"reference": "d4ca2fb70344987502567bc50081c03e6192fb26",
"shasum": ""
},
"require": {
"php": ">=5.6",
"sebastian/recursion-context": "~1.0"
},
"require-dev": {
"phpunit/phpunit": "~5"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0.x-dev"
}
},
"autoload": {
"classmap": [
"src/"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"authors": [
{
"name": "Sebastian Bergmann",
"email": "sebastian@phpunit.de"
}
],
"description": "Traverses array structures and object graphs to enumerate all referenced objects",
"homepage": "https://github.com/sebastianbergmann/object-enumerator/",
"time": "2016-01-28 13:25:10"
},
{
"name": "sebastian/recursion-context",
"version": "1.0.2",
@ -1006,16 +1101,16 @@
},
{
"name": "symfony/yaml",
"version": "v3.0.2",
"version": "v3.0.4",
"source": {
"type": "git",
"url": "https://github.com/symfony/yaml.git",
"reference": "3cf0709d7fe936e97bee9e954382e449003f1d9a"
"reference": "0047c8366744a16de7516622c5b7355336afae96"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/yaml/zipball/3cf0709d7fe936e97bee9e954382e449003f1d9a",
"reference": "3cf0709d7fe936e97bee9e954382e449003f1d9a",
"url": "https://api.github.com/repos/symfony/yaml/zipball/0047c8366744a16de7516622c5b7355336afae96",
"reference": "0047c8366744a16de7516622c5b7355336afae96",
"shasum": ""
},
"require": {
@ -1051,7 +1146,7 @@
],
"description": "Symfony Yaml Component",
"homepage": "https://symfony.com",
"time": "2016-02-02 13:44:19"
"time": "2016-03-04 07:55:57"
}
],
"aliases": [],
@ -1059,6 +1154,8 @@
"stability-flags": [],
"prefer-stable": false,
"prefer-lowest": false,
"platform": [],
"platform": {
"php": ">=7.0.0"
},
"platform-dev": []
}

37
docs/index.md Normal file
View File

@ -0,0 +1,37 @@
# PHP Machine Learning (PHP-ML)
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml)
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
## Installation
Currently this library is in the process of developing, but You can install it with Composer:
```
composer require php-ai/php-ml
```
## To-Do
* implements more algorithms
* integration with Lavacharts for data visualization
## Testing
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer)
```
bin/phpunit
```
## License
PHP-ML is released under the MIT Licence. See the bundled LICENSE file for details.
## Author
Arkadiusz Kondas (@ArkadiuszKondas)

View File

@ -0,0 +1,35 @@
# KNearestNeighbors Classifier
Classifier implementing the k-nearest neighbors algorithm.
### Constructor Parameters
* $k - number of nearest neighbors to scan (default: 3)
```
$classifier = new KNearestNeighbors($k=4);
```
### Train
To train a classifier simply provide train samples and labels (as `array`):
```
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
$classifier = new KNearestNeighbors();
$classifier->train($samples, $labels);
```
### Predict
To predict sample class use `predict` method. You can provide one sample or array of samples:
```
$classifier->predict([3, 2]);
// return 'b'
$classifier->predict([[3, 2], [1, 5]]);
// return ['b', 'a']
```

View File

@ -0,0 +1,29 @@
# RandomSplit
One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust number of samples in each group.
### Constructor Parameters
* $dataset - object that implements `Dataset` interface
* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3)
* $seed - seed for random generator (for tests)
```
$randomSplit = new RandomSplit($dataset, 0.2);
```
### Samples and labels groups
To get samples or labels from test and train group you can use getters:
```
$dataset = new RandomSplit($dataset, 0.3, 1234);
// train group
$dataset->getTrainSamples();
$dataset->getTrainLabels();
// test group
$dataset->getTestSamples();
$dataset->getTestLabels();
```

View File

@ -0,0 +1,21 @@
# ArrayDataset
Helper class that holds data as PHP `array` type. Implements the `Dataset` interface which is used heavily in other classes.
### Constructors Parameters
* $samples - (array) of samples
* $labels - (array) of labels
```
$dataset = new ArrayDataset([[1, 1], [2, 1], [3, 2], [4, 1]], ['a', 'a', 'b', 'b']);
```
### Samples and labels
To get samples or labels you can use getters:
```
$dataset->getSamples();
$dataset->getLabels();
```

View File

@ -0,0 +1,15 @@
# CsvDataset
Helper class that loads data from CSV file. It extends the `ArrayDataset`.
### Constructors Parameters
* $filepath - (string) path to `.csv` file
* $features - (int) number of columns that are features (starts from first column), last column must be a label
* $headingRow - (bool) define is file have a heading row (if `true` then first row will be ignored)
```
$dataset = new CsvDataset('dataset.csv', 2, true);
```
See Array Dataset for more information.

View File

@ -0,0 +1,34 @@
# Iris Dataset
Most popular and widely available dataset of iris flower measurement and class names.
### Specification
| Classes | 3 |
| Samples per class | 50 |
| Samples total | 150 |
| Features per sample | 4 |
### Load
To load Iris dataset simple use:
```
$dataset = new Iris();
```
### Several samples
```
sepal length,sepal width,petal length,petal width,class
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginicacs
```

View File

@ -0,0 +1,24 @@
# Accuracy
Class for calculate classifier accuracy.
### Score
To calculate classifier accuracy score use `score` static method. Parametrs:
* $actualLabels - (array) true sample labels
* $predictedLabels - (array) predicted labels (e.x. from test group)
* $normalize - (bool) normalize or not the result (default: true)
### Example
```
$actualLabels = ['a', 'b', 'a', 'b'];
$predictedLabels = ['a', 'a', 'a', 'b'];
Accuracy::score($actualLabels, $predictedLabels);
// return 0.75
Accuracy::score($actualLabels, $predictedLabels, false);
// return 3
```

View File

@ -0,0 +1,17 @@
# Distance
Special class for calculation of different types of distance.
### Euclidean
![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance")
To calculate euclidean distance:
```
$a = [4, 6];
$b = [2, 5];
Distance::euclidean($a, $b);
// return 2.2360679774998
```

17
mkdocs.yml Normal file
View File

@ -0,0 +1,17 @@
site_name: PHP Machine Learning (PHP-ML)
pages:
- Home: index.md
- Machine Learning:
- Classification:
- KNearestNeighbors: machine-learning/classification/knearestneighbors.md
- Cross Validation:
- RandomSplit: machine-learning/cross-validation/randomsplit.md
- Datasets:
- Array Dataset: machine-learning/datasets/array-dataset.md
- CSV Dataset: machine-learning/datasets/csv-dataset.md
- Demo:
- Iris: machine-learning/datasets/demo/iris.md
- Metric:
- Accuracy: machine-learning/metric/accuracy.md
- Distance: machine-learning/metric/distance.md
theme: readthedocs

14
phpunit.xml Normal file
View File

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit
colors="true"
beStrictAboutTestsThatDoNotTestAnything="true"
beStrictAboutOutputDuringTests="true"
beStrictAboutTestSize="true"
beStrictAboutChangesToGlobalState="true"
>
<testsuites>
<testsuite name="PHP-ML Test Suite">
<directory>tests/*</directory>
</testsuite>
</testsuites>
</phpunit>

View File

@ -1,20 +1,21 @@
<?php
declare (strict_types = 1);
namespace Phpml\Classifier;
interface Classifier
{
/**
* @param array $features
* @param array $samples
* @param array $labels
*/
public function train($features, $labels);
public function train(array $samples, array $labels);
/**
* @param mixed $feature
* @param array $samples
*
* @return mixed
*/
public function predict($feature);
public function predict(array $samples);
}

View File

@ -0,0 +1,105 @@
<?php
declare (strict_types = 1);
namespace Phpml\Classifier;
use Phpml\Metric\Distance;
class KNearestNeighbors implements Classifier
{
/**
* @var int
*/
private $k;
/**
* @var array
*/
private $samples;
/**
* @var array
*/
private $labels;
/**
* @param int $k
*/
public function __construct(int $k = 3)
{
$this->k = $k;
$this->samples = [];
$this->labels = [];
}
/**
* @param array $samples
* @param array $labels
*/
public function train(array $samples, array $labels)
{
$this->samples = $samples;
$this->labels = $labels;
}
/**
* @param array $samples
*
* @return mixed
*/
public function predict(array $samples)
{
if (!is_array($samples[0])) {
$predicted = $this->predictSample($samples);
} else {
$predicted = [];
foreach ($samples as $index => $sample) {
$predicted[$index] = $this->predictSample($sample);
}
}
return $predicted;
}
/**
* @param array $sample
*
* @return mixed
*/
private function predictSample(array $sample)
{
$distances = $this->kNeighborsDistances($sample);
$predictions = array_combine(array_values($this->labels), array_fill(0, count($this->labels), 0));
foreach ($distances as $index => $distance) {
++$predictions[$this->labels[$index]];
}
arsort($predictions);
reset($predictions);
return key($predictions);
}
/**
* @param array $sample
*
* @return array
*
* @throws \Phpml\Exception\InvalidArgumentException
*/
private function kNeighborsDistances(array $sample): array
{
$distances = [];
foreach ($this->samples as $index => $neighbor) {
$distances[$index] = Distance::euclidean($sample, $neighbor);
}
asort($distances);
return array_slice($distances, 0, $this->k, true);
}
}

View File

@ -1,26 +1,25 @@
<?php
declare (strict_types = 1);
namespace Phpml\Classifier;
abstract class NaiveBayes implements Classifier
class NaiveBayes implements Classifier
{
/**
* @param array $features
* @param array $samples
* @param array $labels
*/
public function train($features, $labels)
public function train(array $samples, array $labels)
{
}
/**
* @param mixed $feature
* @param array $samples
*
* @return mixed
*/
public function predict($feature)
public function predict(array $samples)
{
}
}

View File

@ -1,8 +0,0 @@
<?php
use Phpml\Classifier\NaiveBayes;
class GaussianNaiveBayes extends NaiveBayes
{
}

View File

@ -0,0 +1,105 @@
<?php
declare (strict_types = 1);
namespace Phpml\CrossValidation;
use Phpml\Dataset\Dataset;
use Phpml\Exception\InvalidArgumentException;
class RandomSplit
{
/**
* @var array
*/
private $trainSamples = [];
/**
* @var array
*/
private $testSamples = [];
/**
* @var array
*/
private $trainLabels = [];
/**
* @var array
*/
private $testLabels = [];
/**
* @param Dataset $dataset
* @param float $testSize
* @param int $seed
*
* @throws InvalidArgumentException
*/
public function __construct(Dataset $dataset, float $testSize = 0.3, int $seed = null)
{
if (0 >= $testSize || 1 <= $testSize) {
throw InvalidArgumentException::percentNotInRange('testSize');
}
$this->seedGenerator($seed);
$samples = $dataset->getSamples();
$labels = $dataset->getLabels();
$datasetSize = count($samples);
for ($i = $datasetSize; $i > 0; --$i) {
$key = mt_rand(0, $datasetSize - 1);
$setName = count($this->testSamples) / $datasetSize >= $testSize ? 'train' : 'test';
$this->{$setName.'Samples'}[] = $samples[$key];
$this->{$setName.'Labels'}[] = $labels[$key];
$samples = array_values($samples);
$labels = array_values($labels);
}
}
/**
* @return array
*/
public function getTrainSamples()
{
return $this->trainSamples;
}
/**
* @return array
*/
public function getTestSamples()
{
return $this->testSamples;
}
/**
* @return array
*/
public function getTrainLabels()
{
return $this->trainLabels;
}
/**
* @return array
*/
public function getTestLabels()
{
return $this->testLabels;
}
/**
* @param int|null $seed
*/
private function seedGenerator(int $seed = null)
{
if (null === $seed) {
mt_srand();
} else {
mt_srand($seed);
}
}
}

View File

@ -0,0 +1,52 @@
<?php
declare (strict_types = 1);
namespace Phpml\Dataset;
use Phpml\Exception\InvalidArgumentException;
class ArrayDataset implements Dataset
{
/**
* @var array
*/
protected $samples = [];
/**
* @var array
*/
protected $labels = [];
/**
* @param array $samples
* @param array $labels
*
* @throws InvalidArgumentException
*/
public function __construct(array $samples, array $labels)
{
if (count($samples) != count($labels)) {
throw InvalidArgumentException::sizeNotMatch();
}
$this->samples = $samples;
$this->labels = $labels;
}
/**
* @return array
*/
public function getSamples(): array
{
return $this->samples;
}
/**
* @return array
*/
public function getLabels(): array
{
return $this->labels;
}
}

View File

@ -0,0 +1,44 @@
<?php
declare (strict_types = 1);
namespace Phpml\Dataset;
use Phpml\Exception\DatasetException;
class CsvDataset extends ArrayDataset
{
/**
* @var string
*/
protected $filepath;
/**
* @param string $filepath
* @param int $features
* @param bool $headingRow
*
* @throws DatasetException
*/
public function __construct(string $filepath, int $features, bool $headingRow = true)
{
if (!file_exists($filepath)) {
throw DatasetException::missingFile(basename($filepath));
}
$row = 0;
if (($handle = fopen($filepath, 'r')) !== false) {
while (($data = fgetcsv($handle, 1000, ',')) !== false) {
++$row;
if ($headingRow && $row == 1) {
continue;
}
$this->samples[] = array_slice($data, 0, $features);
$this->labels[] = $data[$features];
}
fclose($handle);
} else {
throw DatasetException::cantOpenFile(basename($filepath));
}
}
}

View File

@ -1,10 +1,18 @@
<?php
abstract class Dataset
declare (strict_types = 1);
namespace Phpml\Dataset;
interface Dataset
{
/**
* @var string
* @return array
*/
protected $filepath;
public function getSamples(): array;
/**
* @return array
*/
public function getLabels(): array;
}

View File

@ -0,0 +1,22 @@
<?php
declare (strict_types = 1);
namespace Phpml\Dataset\Demo;
use Phpml\Dataset\CsvDataset;
/**
* Classes: 3
* Samples per class: 50
* Samples total: 150
* Features per sample: 4.
*/
class Iris extends CsvDataset
{
public function __construct()
{
$filepath = dirname(__FILE__).'/../../../../data/iris.csv';
parent::__construct($filepath, 4, true);
}
}

View File

@ -1,10 +0,0 @@
<?php
class Iris extends Dataset
{
/**
* @var string
*/
protected $filepath = 'iris.csv';
}

View File

@ -0,0 +1,21 @@
<?php
declare (strict_types = 1);
namespace Phpml\Exception;
class DatasetException extends \Exception
{
/**
* @return DatasetException
*/
public static function missingFile($filepath)
{
return new self(sprintf('Dataset file %s missing.', $filepath));
}
public static function cantOpenFile($filepath)
{
return new self(sprintf('Dataset file %s can\'t be open.', $filepath));
}
}

View File

@ -0,0 +1,26 @@
<?php
declare (strict_types = 1);
namespace Phpml\Exception;
class InvalidArgumentException extends \Exception
{
/**
* @return InvalidArgumentException
*/
public static function sizeNotMatch()
{
return new self('Size of given arguments not match');
}
/**
* @param $name
*
* @return InvalidArgumentException
*/
public static function percentNotInRange($name)
{
return new self(sprintf('%s must be between 0.0 and 1.0', $name));
}
}

View File

@ -0,0 +1,39 @@
<?php
declare (strict_types = 1);
namespace Phpml\Metric;
use Phpml\Exception\InvalidArgumentException;
class Accuracy
{
/**
* @param array $actualLabels
* @param array $predictedLabels
* @param bool $normalize
*
* @return float|int
*
* @throws InvalidArgumentException
*/
public static function score(array $actualLabels, array $predictedLabels, bool $normalize = true)
{
if (count($actualLabels) != count($predictedLabels)) {
throw InvalidArgumentException::sizeNotMatch();
}
$score = 0;
foreach ($actualLabels as $index => $label) {
if ($label === $predictedLabels[$index]) {
++$score;
}
}
if ($normalize) {
$score = $score / count($actualLabels);
}
return $score;
}
}

View File

@ -0,0 +1,34 @@
<?php
declare (strict_types = 1);
namespace Phpml\Metric;
use Phpml\Exception\InvalidArgumentException;
class Distance
{
/**
* @param array $a
* @param array $b
*
* @return float
*
* @throws InvalidArgumentException
*/
public static function euclidean(array $a, array $b): float
{
if (count($a) != count($b)) {
throw InvalidArgumentException::sizeNotMatch();
}
$distance = 0;
$count = count($a);
for ($i = 0; $i < $count; ++$i) {
$distance += pow($a[$i] - $b[$i], 2);
}
return sqrt($distance);
}
}

View File

@ -0,0 +1,58 @@
<?php
declare (strict_types = 1);
namespace tests\Classifier;
use Phpml\Classifier\KNearestNeighbors;
use Phpml\CrossValidation\RandomSplit;
use Phpml\Dataset\Demo\Iris;
use Phpml\Metric\Accuracy;
class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
{
public function testPredictSingleSampleWithDefaultK()
{
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
$classifier = new KNearestNeighbors();
$classifier->train($samples, $labels);
$this->assertEquals('b', $classifier->predict([3, 2]));
$this->assertEquals('b', $classifier->predict([5, 1]));
$this->assertEquals('b', $classifier->predict([4, 3]));
$this->assertEquals('b', $classifier->predict([4, -5]));
$this->assertEquals('a', $classifier->predict([2, 3]));
$this->assertEquals('a', $classifier->predict([1, 2]));
$this->assertEquals('a', $classifier->predict([1, 5]));
$this->assertEquals('a', $classifier->predict([3, 10]));
}
public function testPredictArrayOfSamples()
{
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
$testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]];
$testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'];
$classifier = new KNearestNeighbors();
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$this->assertEquals($testLabels, $predicted);
}
public function testAccuracyOnIrisDataset()
{
$dataset = new RandomSplit(new Iris(), $testSize = 0.5, $seed = 123);
$classifier = new KNearestNeighbors($k = 4);
$classifier->train($dataset->getTrainSamples(), $dataset->getTrainLabels());
$predicted = $classifier->predict($dataset->getTestSamples());
$score = Accuracy::score($dataset->getTestLabels(), $predicted);
$this->assertEquals(0.96, $score);
}
}

View File

@ -0,0 +1,94 @@
<?php
declare (strict_types = 1);
namespace tests\Phpml\CrossValidation;
use Phpml\CrossValidation\RandomSplit;
use Phpml\Dataset\ArrayDataset;
class RandomSplitTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException \Phpml\Exception\InvalidArgumentException
*/
public function testThrowExceptionOnToSmallTestSize()
{
new RandomSplit(new ArrayDataset([], []), 0);
}
/**
* @expectedException \Phpml\Exception\InvalidArgumentException
*/
public function testThrowExceptionOnToBigTestSize()
{
new RandomSplit(new ArrayDataset([], []), 1);
}
public function testDatasetRandomSplitWithoutSeed()
{
$dataset = new ArrayDataset(
$samples = [[1], [2], [3], [4]],
$labels = ['a', 'a', 'b', 'b']
);
$randomSplit = new RandomSplit($dataset, 0.5);
$this->assertEquals(2, count($randomSplit->getTestSamples()));
$this->assertEquals(2, count($randomSplit->getTrainSamples()));
$randomSplit2 = new RandomSplit($dataset, 0.25);
$this->assertEquals(1, count($randomSplit2->getTestSamples()));
$this->assertEquals(3, count($randomSplit2->getTrainSamples()));
}
public function testDatasetRandomSplitWithSameSeed()
{
$dataset = new ArrayDataset(
$samples = [[1], [2], [3], [4], [5], [6], [7], [8]],
$labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b']
);
$seed = 123;
$randomSplit1 = new RandomSplit($dataset, 0.5, $seed);
$randomSplit2 = new RandomSplit($dataset, 0.5, $seed);
$this->assertEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels());
$this->assertEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples());
$this->assertEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels());
$this->assertEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples());
}
public function testDatasetRandomSplitWithDifferentSeed()
{
$dataset = new ArrayDataset(
$samples = [[1], [2], [3], [4], [5], [6], [7], [8]],
$labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b']
);
$randomSplit1 = new RandomSplit($dataset, 0.5, 4321);
$randomSplit2 = new RandomSplit($dataset, 0.5, 1234);
$this->assertNotEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels());
$this->assertNotEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples());
$this->assertNotEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels());
$this->assertNotEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples());
}
public function testRandomSplitCorrectSampleAndLabelPosition()
{
$dataset = new ArrayDataset(
$samples = [[1], [2], [3], [4]],
$labels = [1, 2, 3, 4]
);
$randomSplit = new RandomSplit($dataset, 0.5);
$this->assertEquals($randomSplit->getTestSamples()[0][0], $randomSplit->getTestLabels()[0]);
$this->assertEquals($randomSplit->getTestSamples()[1][0], $randomSplit->getTestLabels()[1]);
$this->assertEquals($randomSplit->getTrainSamples()[0][0], $randomSplit->getTrainLabels()[0]);
$this->assertEquals($randomSplit->getTrainSamples()[1][0], $randomSplit->getTrainLabels()[1]);
}
}

View File

@ -0,0 +1,29 @@
<?php
declare (strict_types = 1);
namespace tests\Phpml\Dataset;
use Phpml\Dataset\ArrayDataset;
class ArrayDatasetTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException \Phpml\Exception\InvalidArgumentException
*/
public function testThrowExceptionOnInvalidArgumentsSize()
{
new ArrayDataset([0, 1], [0]);
}
public function testArrayDataset()
{
$dataset = new ArrayDataset(
$samples = [[1], [2], [3], [4]],
$labels = ['a', 'a', 'b', 'b']
);
$this->assertEquals($samples, $dataset->getSamples());
$this->assertEquals($labels, $dataset->getLabels());
}
}

View File

@ -0,0 +1,28 @@
<?php
declare (strict_types = 1);
namespace tests\Phpml\Dataset;
use Phpml\Dataset\CsvDataset;
class CsvDatasetTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException \Phpml\Exception\DatasetException
*/
public function testThrowExceptionOnMissingFile()
{
new CsvDataset('missingFile', 3);
}
public function testSampleCsvDataset()
{
$filePath = dirname(__FILE__).'/Resources/dataset.csv';
$dataset = new CsvDataset($filePath, 2, true);
$this->assertEquals(10, count($dataset->getSamples()));
$this->assertEquals(10, count($dataset->getLabels()));
}
}

View File

@ -0,0 +1,22 @@
<?php
declare (strict_types = 1);
namespace tests\Phpml\Dataset\Demo;
use Phpml\Dataset\Demo\Iris;
class IrisTest extends \PHPUnit_Framework_TestCase
{
public function testLoadingIrisDataset()
{
$iris = new Iris();
// whole dataset
$this->assertEquals(150, count($iris->getSamples()));
$this->assertEquals(150, count($iris->getLabels()));
// one sample features count
$this->assertEquals(4, count($iris->getSamples()[0]));
}
}

View File

@ -0,0 +1,11 @@
feature1,feature2,label
1,1,a
2,1,b
3,1,c
4,5,a
2,4,a
1,5,a
2,6,b
3,7,c
4,4,a
2,0,a
1 feature1 feature2 label
2 1 1 a
3 2 1 b
4 3 1 c
5 4 5 a
6 2 4 a
7 1 5 a
8 2 6 b
9 3 7 c
10 4 4 a
11 2 0 a

View File

@ -0,0 +1,37 @@
<?php
declare (strict_types = 1);
namespace tests\Phpml\Metric;
use Phpml\Metric\Accuracy;
class AccuracyTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException \Phpml\Exception\InvalidArgumentException
*/
public function testThrowExceptionOnInvalidArguments()
{
$actualLabels = ['a', 'b', 'a', 'b'];
$predictedLabels = ['a', 'a'];
Accuracy::score($actualLabels, $predictedLabels);
}
public function testCalculateNormalizedScore()
{
$actualLabels = ['a', 'b', 'a', 'b'];
$predictedLabels = ['a', 'a', 'b', 'b'];
$this->assertEquals(0.5, Accuracy::score($actualLabels, $predictedLabels));
}
public function testCalculateNotNormalizedScore()
{
$actualLabels = ['a', 'b', 'a', 'b'];
$predictedLabels = ['a', 'b', 'b', 'b'];
$this->assertEquals(3, Accuracy::score($actualLabels, $predictedLabels, false));
}
}

View File

@ -0,0 +1,51 @@
<?php
declare (strict_types = 1);
namespace tests\Phpml\Metric;
use Phpml\Metric\Distance;
class DistanceTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException \Phpml\Exception\InvalidArgumentException
*/
public function testThrowExceptionOnInvalidArgumentsInEuclidean()
{
$a = [0, 1, 2];
$b = [0, 2];
Distance::euclidean($a, $b);
}
public function testCalculateEuclideanDistanceForOneDimension()
{
$a = [4];
$b = [2];
$expectedDistance = 2;
$actualDistance = Distance::euclidean($a, $b);
$this->assertEquals($expectedDistance, $actualDistance);
}
public function testCalculateEuclideanDistanceForTwoAndMoreDimension()
{
$a = [4, 6];
$b = [2, 5];
$expectedDistance = 2.2360679774998;
$actualDistance = Distance::euclidean($a, $b);
$this->assertEquals($expectedDistance, $actualDistance);
$a = [6, 10, 3];
$b = [2, 5, 5];
$expectedDistance = 6.7082039324993694;
$actualDistance = Distance::euclidean($a, $b);
$this->assertEquals($expectedDistance, $actualDistance);
}
}

6
tools/php-cs-fixer.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
echo "Fixing src/ folder"
php-cs-fixer fix src/ --level=symfony
echo "Fixing tests/ folder"
php-cs-fixer fix tests/ --level=symfony