mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-26 06:46:45 +00:00
commit
2393b9f137
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1,3 @@
|
||||
/vendor/
|
||||
humbuglog.*
|
||||
/bin/phpunit
|
||||
|
12
README.md
12
README.md
@ -1,4 +1,4 @@
|
||||
# PHP Machine Learning library
|
||||
# PHP-ML - Machine Learning library for PHP
|
||||
|
||||
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
|
||||
[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop)
|
||||
@ -37,22 +37,32 @@ composer require php-ai/php-ml
|
||||
## Features
|
||||
|
||||
* Classification
|
||||
* [SVC](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/svc/)
|
||||
* [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/)
|
||||
* [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/)
|
||||
* Regression
|
||||
* [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/)
|
||||
* [SVR](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/svr/)
|
||||
* Clustering
|
||||
* [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means)
|
||||
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan)
|
||||
* Cross Validation
|
||||
* [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split)
|
||||
* Preprocessing
|
||||
* [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization)
|
||||
* [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values)
|
||||
* Feature Extraction
|
||||
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer)
|
||||
* Datasets
|
||||
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset)
|
||||
* Ready to use:
|
||||
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
|
||||
* [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/)
|
||||
* [Glass](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/glass/)
|
||||
* Math
|
||||
* [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/)
|
||||
* [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/)
|
||||
* [Statistic](http://php-ml.readthedocs.io/en/latest/math/statistic/)
|
||||
|
||||
|
||||
## Contribute
|
||||
|
BIN
bin/libsvm/svm-predict
Executable file
BIN
bin/libsvm/svm-predict
Executable file
Binary file not shown.
BIN
bin/libsvm/svm-predict.exe
Normal file
BIN
bin/libsvm/svm-predict.exe
Normal file
Binary file not shown.
BIN
bin/libsvm/svm-scale
Executable file
BIN
bin/libsvm/svm-scale
Executable file
Binary file not shown.
BIN
bin/libsvm/svm-scale.exe
Normal file
BIN
bin/libsvm/svm-scale.exe
Normal file
Binary file not shown.
BIN
bin/libsvm/svm-train
Executable file
BIN
bin/libsvm/svm-train
Executable file
Binary file not shown.
BIN
bin/libsvm/svm-train.exe
Normal file
BIN
bin/libsvm/svm-train.exe
Normal file
Binary file not shown.
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "php-ai/php-ml",
|
||||
"type": "library",
|
||||
"description": "PHP Machine Learning library",
|
||||
"description": "PHP-ML - Machine Learning library for PHP",
|
||||
"license": "MIT",
|
||||
"keywords": ["machine learning","pattern recognition","computational learning theory","artificial intelligence"],
|
||||
"homepage": "https://github.com/php-ai/php-ml",
|
||||
@ -16,13 +16,13 @@
|
||||
"Phpml": "src/"
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"bin-dir": "bin"
|
||||
},
|
||||
"require": {
|
||||
"php": ">=7.0.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "^5.2"
|
||||
},
|
||||
"config": {
|
||||
"bin-dir": "bin"
|
||||
}
|
||||
}
|
||||
|
56
composer.lock
generated
56
composer.lock
generated
@ -4,7 +4,7 @@
|
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
|
||||
"This file is @generated automatically"
|
||||
],
|
||||
"hash": "7c34eebd6b8749a1cd09df57e5d1f47a",
|
||||
"hash": "f3e2d9975d300b3ea4c3568de44d8499",
|
||||
"content-hash": "087091d0c339e9fa3a551a189ea658bf",
|
||||
"packages": [],
|
||||
"packages-dev": [
|
||||
@ -64,16 +64,16 @@
|
||||
},
|
||||
{
|
||||
"name": "myclabs/deep-copy",
|
||||
"version": "1.5.0",
|
||||
"version": "1.5.1",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/myclabs/DeepCopy.git",
|
||||
"reference": "e3abefcd7f106677fd352cd7c187d6c969aa9ddc"
|
||||
"reference": "a8773992b362b58498eed24bf85005f363c34771"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/e3abefcd7f106677fd352cd7c187d6c969aa9ddc",
|
||||
"reference": "e3abefcd7f106677fd352cd7c187d6c969aa9ddc",
|
||||
"url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/a8773992b362b58498eed24bf85005f363c34771",
|
||||
"reference": "a8773992b362b58498eed24bf85005f363c34771",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -102,7 +102,7 @@
|
||||
"object",
|
||||
"object graph"
|
||||
],
|
||||
"time": "2015-11-07 22:20:37"
|
||||
"time": "2015-11-20 12:04:31"
|
||||
},
|
||||
{
|
||||
"name": "phpdocumentor/reflection-docblock",
|
||||
@ -217,16 +217,16 @@
|
||||
},
|
||||
{
|
||||
"name": "phpunit/php-code-coverage",
|
||||
"version": "3.3.0",
|
||||
"version": "3.3.1",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/php-code-coverage.git",
|
||||
"reference": "fe33716763b604ade4cb442c0794f5bd5ad73004"
|
||||
"reference": "2431befdd451fac43fbcde94d1a92fb3b8b68f86"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/fe33716763b604ade4cb442c0794f5bd5ad73004",
|
||||
"reference": "fe33716763b604ade4cb442c0794f5bd5ad73004",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/2431befdd451fac43fbcde94d1a92fb3b8b68f86",
|
||||
"reference": "2431befdd451fac43fbcde94d1a92fb3b8b68f86",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -244,7 +244,7 @@
|
||||
},
|
||||
"suggest": {
|
||||
"ext-dom": "*",
|
||||
"ext-xdebug": ">=2.2.1",
|
||||
"ext-xdebug": ">=2.4.0",
|
||||
"ext-xmlwriter": "*"
|
||||
},
|
||||
"type": "library",
|
||||
@ -276,7 +276,7 @@
|
||||
"testing",
|
||||
"xunit"
|
||||
],
|
||||
"time": "2016-03-03 08:49:08"
|
||||
"time": "2016-04-08 08:14:53"
|
||||
},
|
||||
{
|
||||
"name": "phpunit/php-file-iterator",
|
||||
@ -458,16 +458,16 @@
|
||||
},
|
||||
{
|
||||
"name": "phpunit/phpunit",
|
||||
"version": "5.3.1",
|
||||
"version": "5.3.2",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/phpunit.git",
|
||||
"reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c"
|
||||
"reference": "2c6da3536035617bae3fe3db37283c9e0eb63ab3"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/34a3acb401ae79deb37bc6e5f5ec3d325b369b4c",
|
||||
"reference": "34a3acb401ae79deb37bc6e5f5ec3d325b369b4c",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/2c6da3536035617bae3fe3db37283c9e0eb63ab3",
|
||||
"reference": "2c6da3536035617bae3fe3db37283c9e0eb63ab3",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -529,20 +529,20 @@
|
||||
"testing",
|
||||
"xunit"
|
||||
],
|
||||
"time": "2016-04-07 07:04:34"
|
||||
"time": "2016-04-12 16:20:08"
|
||||
},
|
||||
{
|
||||
"name": "phpunit/phpunit-mock-objects",
|
||||
"version": "3.1.2",
|
||||
"version": "3.1.3",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git",
|
||||
"reference": "7c34c9bdde4131b824086457a3145e27dba10ca1"
|
||||
"reference": "151c96874bff6fe61a25039df60e776613a61489"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/7c34c9bdde4131b824086457a3145e27dba10ca1",
|
||||
"reference": "7c34c9bdde4131b824086457a3145e27dba10ca1",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/151c96874bff6fe61a25039df60e776613a61489",
|
||||
"reference": "151c96874bff6fe61a25039df60e776613a61489",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -585,7 +585,7 @@
|
||||
"mock",
|
||||
"xunit"
|
||||
],
|
||||
"time": "2016-03-24 05:58:25"
|
||||
"time": "2016-04-20 14:39:26"
|
||||
},
|
||||
{
|
||||
"name": "sebastian/code-unit-reverse-lookup",
|
||||
@ -750,16 +750,16 @@
|
||||
},
|
||||
{
|
||||
"name": "sebastian/environment",
|
||||
"version": "1.3.5",
|
||||
"version": "1.3.6",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/sebastianbergmann/environment.git",
|
||||
"reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf"
|
||||
"reference": "2292b116f43c272ff4328083096114f84ea46a56"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf",
|
||||
"reference": "dc7a29032cf72b54f36dac15a1ca5b3a1b6029bf",
|
||||
"url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/2292b116f43c272ff4328083096114f84ea46a56",
|
||||
"reference": "2292b116f43c272ff4328083096114f84ea46a56",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -796,7 +796,7 @@
|
||||
"environment",
|
||||
"hhvm"
|
||||
],
|
||||
"time": "2016-02-26 18:40:46"
|
||||
"time": "2016-05-04 07:59:13"
|
||||
},
|
||||
{
|
||||
"name": "sebastian/exporter",
|
||||
@ -1101,7 +1101,7 @@
|
||||
},
|
||||
{
|
||||
"name": "symfony/yaml",
|
||||
"version": "v3.0.4",
|
||||
"version": "v3.0.5",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/symfony/yaml.git",
|
||||
|
@ -1,4 +1,4 @@
|
||||
# PHP Machine Learning library
|
||||
# PHP-ML - Machine Learning library for PHP
|
||||
|
||||
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
|
||||
[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop)
|
||||
@ -37,22 +37,32 @@ composer require php-ai/php-ml
|
||||
## Features
|
||||
|
||||
* Classification
|
||||
* [SVC](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/svc/)
|
||||
* [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/)
|
||||
* [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/)
|
||||
* Regression
|
||||
* [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/)
|
||||
* [SVR](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/svr/)
|
||||
* Clustering
|
||||
* [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means)
|
||||
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan)
|
||||
* Cross Validation
|
||||
* [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split)
|
||||
* Preprocessing
|
||||
* [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization)
|
||||
* [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values)
|
||||
* Feature Extraction
|
||||
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer)
|
||||
* Datasets
|
||||
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset)
|
||||
* Ready to use:
|
||||
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
|
||||
* [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/)
|
||||
* [Glass](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/glass/)
|
||||
* Math
|
||||
* [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/)
|
||||
* [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/)
|
||||
* [Statistic](http://php-ml.readthedocs.io/en/latest/math/statistic/)
|
||||
|
||||
|
||||
## Contribute
|
||||
|
47
docs/machine-learning/classification/svc.md
Normal file
47
docs/machine-learning/classification/svc.md
Normal file
@ -0,0 +1,47 @@
|
||||
# Support Vector Classification
|
||||
|
||||
Classifier implementing Support Vector Machine based on libsvm.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
* $kernel (int) - kernel type to be used in the algorithm (default Kernel::LINEAR)
|
||||
* $cost (float) - parameter C of C-SVC (default 1.0)
|
||||
* $degree (int) - degree of the Kernel::POLYNOMIAL function (default 3)
|
||||
* $gamma (float) - kernel coefficient for ‘Kernel::RBF’, ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’. If gamma is ‘null’ then 1/features will be used instead.
|
||||
* $coef0 (float) - independent term in kernel function. It is only significant in ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’ (default 0.0)
|
||||
* $tolerance (float) - tolerance of termination criterion (default 0.001)
|
||||
* $cacheSize (int) - cache memory size in MB (default 100)
|
||||
* $shrinking (bool) - whether to use the shrinking heuristics (default true)
|
||||
* $probabilityEstimates (bool) - whether to enable probability estimates (default false)
|
||||
|
||||
```
|
||||
$classifier = new SVC(Kernel::LINEAR, $cost = 1000);
|
||||
$classifier = new SVC(Kernel::RBF, $cost = 1000, $degree = 3, $gamma = 6);
|
||||
```
|
||||
|
||||
### Train
|
||||
|
||||
To train a classifier simply provide train samples and labels (as `array`). Example:
|
||||
|
||||
```
|
||||
use Phpml\Classification\SVC;
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$classifier = new SVC(Kernel::LINEAR, $cost = 1000);
|
||||
$classifier->train($samples, $labels);
|
||||
```
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$classifier->predict([3, 2]);
|
||||
// return 'b'
|
||||
|
||||
$classifier->predict([[3, 2], [1, 5]]);
|
||||
// return ['b', 'a']
|
||||
```
|
42
docs/machine-learning/datasets/demo/glass.md
Normal file
42
docs/machine-learning/datasets/demo/glass.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Glass Dataset
|
||||
|
||||
From USA Forensic Science Service; 6 types of glass; defined in terms of their oxide content (i.e. Na, Fe, K, etc)
|
||||
|
||||
### Specification
|
||||
|
||||
| Classes | 6 |
|
||||
| Samples total | 214 |
|
||||
| Features per sample | 9 |
|
||||
|
||||
Samples per class:
|
||||
* 70 float processed building windows
|
||||
* 17 float processed vehicle windows
|
||||
* 76 non-float processed building windows
|
||||
* 13 containers
|
||||
* 9 tableware
|
||||
* 29 headlamps
|
||||
|
||||
### Load
|
||||
|
||||
To load Glass dataset simple use:
|
||||
|
||||
```
|
||||
use Phpml\Dataset\Demo\Glass;
|
||||
|
||||
$dataset = new Glass();
|
||||
```
|
||||
|
||||
### Several samples example
|
||||
|
||||
```
|
||||
RI: refractive index,Na: Sodium,Mg: Magnesium,Al: Aluminum,Si: Silicon,K: Potassium,Ca: Calcium,Ba: Barium,Fe: Iron,type of glass
|
||||
1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,building_windows_float_processed
|
||||
1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,building_windows_float_processed
|
||||
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,building_windows_float_processed
|
||||
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,building_windows_float_processed
|
||||
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,building_windows_float_processed
|
||||
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,building_windows_float_processed
|
||||
1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,building_windows_float_processed
|
||||
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,building_windows_float_processed
|
||||
1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,building_windows_float_processed
|
||||
```
|
@ -14,6 +14,8 @@ Most popular and widely available dataset of iris flower measurement and class n
|
||||
To load Iris dataset simple use:
|
||||
|
||||
```
|
||||
use Phpml\Dataset\Demo\Iris;
|
||||
|
||||
$dataset = new Iris();
|
||||
```
|
||||
|
||||
|
35
docs/machine-learning/datasets/demo/wine.md
Normal file
35
docs/machine-learning/datasets/demo/wine.md
Normal file
@ -0,0 +1,35 @@
|
||||
# Wine Dataset
|
||||
|
||||
These data are the results of a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. The analysis determined the quantities of 13 constituents found in each of the three types of wines.
|
||||
|
||||
### Specification
|
||||
|
||||
| Classes | 3 |
|
||||
| Samples per class | class 1 59; class 2 71; class 3 48 |
|
||||
| Samples total | 178 |
|
||||
| Features per sample | 13 |
|
||||
|
||||
### Load
|
||||
|
||||
To load Wine dataset simple use:
|
||||
|
||||
```
|
||||
use Phpml\Dataset\Demo\Wine;
|
||||
|
||||
$dataset = new Wine();
|
||||
```
|
||||
|
||||
### Several samples example
|
||||
|
||||
```
|
||||
alcohol,malic acid,ash,alcalinity of ash,magnesium,total phenols,flavanoids,nonflavanoid phenols,proanthocyanins,color intensity,hue,OD280/OD315 of diluted wines,proline,class
|
||||
14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065,1
|
||||
13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050,1
|
||||
13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185,1
|
||||
14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480,1
|
||||
13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735,1
|
||||
14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450,1
|
||||
14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290,1
|
||||
14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295,1
|
||||
14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045,1
|
||||
```
|
@ -0,0 +1,50 @@
|
||||
# Token Count Vectorizer
|
||||
|
||||
Transform a collection of text samples to a vector of token counts.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
* $tokenizer (Tokenizer) - tokenizer object (see below)
|
||||
* $minDF (float) - ignore tokens that have a samples frequency strictly lower than the given threshold. This value is also called cut-off in the literature. (default 0)
|
||||
|
||||
```
|
||||
use Phpml\FeatureExtraction\TokenCountVectorizer;
|
||||
use Phpml\Tokenization\WhitespaceTokenizer;
|
||||
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
||||
```
|
||||
|
||||
### Transformation
|
||||
|
||||
To transform a collection of text samples use `transform` method. Example:
|
||||
|
||||
```
|
||||
$samples = [
|
||||
'Lorem ipsum dolor sit amet dolor',
|
||||
'Mauris placerat ipsum dolor',
|
||||
'Mauris diam eros fringilla diam',
|
||||
];
|
||||
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
||||
$vectorizer->transform($samples)
|
||||
// return $vector = [
|
||||
// [0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 1],
|
||||
// [5 => 1, 6 => 1, 1 => 1, 2 => 1],
|
||||
// [5 => 1, 7 => 2, 8 => 1, 9 => 1],
|
||||
//];
|
||||
|
||||
```
|
||||
|
||||
### Vocabulary
|
||||
|
||||
You can extract vocabulary using `getVocabulary()` method. Example:
|
||||
|
||||
```
|
||||
$vectorizer->getVocabulary();
|
||||
// return $vocabulary = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet', 'Mauris', 'placerat', 'diam', 'eros', 'fringilla'];
|
||||
```
|
||||
|
||||
### Tokenizers
|
||||
|
||||
* WhitespaceTokenizer - select tokens by whitespace.
|
||||
* WordTokenizer - select tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator).
|
@ -0,0 +1,45 @@
|
||||
# Imputation missing values
|
||||
|
||||
For various reasons, many real world datasets contain missing values, often encoded as blanks, NaNs or other placeholders.
|
||||
To solve this problem you can use the `Imputer` class.
|
||||
|
||||
## Constructor Parameters
|
||||
|
||||
* $missingValue (mixed) - this value will be replaced (default null)
|
||||
* $strategy (Strategy) - imputation strategy (read to use: MeanStrategy, MedianStrategy, MostFrequentStrategy)
|
||||
* $axis (int) - axis for strategy, Imputer::AXIS_COLUMN or Imputer::AXIS_ROW
|
||||
|
||||
```
|
||||
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
|
||||
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW);
|
||||
```
|
||||
|
||||
## Strategy
|
||||
|
||||
* MeanStrategy - replace missing values using the mean along the axis
|
||||
* MedianStrategy - replace missing values using the median along the axis
|
||||
* MostFrequentStrategy - replace missing using the most frequent value along the axis
|
||||
|
||||
## Example of use
|
||||
|
||||
```
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
/*
|
||||
$data = [
|
||||
[1, 5.33, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[4.33, 6, 7, 8],
|
||||
[8, 7, 4, 5],
|
||||
];
|
||||
*/
|
||||
|
||||
```
|
59
docs/machine-learning/preprocessing/normalization.md
Normal file
59
docs/machine-learning/preprocessing/normalization.md
Normal file
@ -0,0 +1,59 @@
|
||||
# Normalization
|
||||
|
||||
Normalization is the process of scaling individual samples to have unit norm.
|
||||
|
||||
## L2 norm
|
||||
|
||||
[http://mathworld.wolfram.com/L2-Norm.html](http://mathworld.wolfram.com/L2-Norm.html)
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
use Phpml\Preprocessing\Normalizer;
|
||||
|
||||
$samples = [
|
||||
[1, -1, 2],
|
||||
[2, 0, 0],
|
||||
[0, 1, -1],
|
||||
];
|
||||
|
||||
$normalizer = new Normalizer();
|
||||
$normalizer->preprocess($samples);
|
||||
|
||||
/*
|
||||
$samples = [
|
||||
[0.4, -0.4, 0.81],
|
||||
[1.0, 0.0, 0.0],
|
||||
[0.0, 0.7, -0.7],
|
||||
];
|
||||
*/
|
||||
|
||||
```
|
||||
|
||||
## L1 norm
|
||||
|
||||
[http://mathworld.wolfram.com/L1-Norm.html](http://mathworld.wolfram.com/L1-Norm.html)
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
use Phpml\Preprocessing\Normalizer;
|
||||
|
||||
$samples = [
|
||||
[1, -1, 2],
|
||||
[2, 0, 0],
|
||||
[0, 1, -1],
|
||||
];
|
||||
|
||||
$normalizer = new Normalizer(Normalizer::NORM_L1);
|
||||
$normalizer->preprocess($samples);
|
||||
|
||||
/*
|
||||
$samples = [
|
||||
[0.25, -0.25, 0.5],
|
||||
[1.0, 0.0, 0.0],
|
||||
[0.0, 0.5, -0.5],
|
||||
];
|
||||
*/
|
||||
|
||||
```
|
44
docs/machine-learning/regression/svr.md
Normal file
44
docs/machine-learning/regression/svr.md
Normal file
@ -0,0 +1,44 @@
|
||||
# Support Vector Regression
|
||||
|
||||
Class implementing Epsilon-Support Vector Regression based on libsvm.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
* $kernel (int) - kernel type to be used in the algorithm (default Kernel::LINEAR)
|
||||
* $degree (int) - degree of the Kernel::POLYNOMIAL function (default 3)
|
||||
* $epsilon (float) - epsilon in loss function of epsilon-SVR (default 0.1)
|
||||
* $cost (float) - parameter C of C-SVC (default 1.0)
|
||||
* $gamma (float) - kernel coefficient for ‘Kernel::RBF’, ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’. If gamma is ‘null’ then 1/features will be used instead.
|
||||
* $coef0 (float) - independent term in kernel function. It is only significant in ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’ (default 0.0)
|
||||
* $tolerance (float) - tolerance of termination criterion (default 0.001)
|
||||
* $cacheSize (int) - cache memory size in MB (default 100)
|
||||
* $shrinking (bool) - whether to use the shrinking heuristics (default true)
|
||||
|
||||
```
|
||||
$regression = new SVR(Kernel::LINEAR);
|
||||
$regression = new SVR(Kernel::LINEAR, $degree = 3, $epsilon=10.0);
|
||||
```
|
||||
|
||||
### Train
|
||||
|
||||
To train a model simply provide train samples and targets values (as `array`). Example:
|
||||
|
||||
```
|
||||
use Phpml\Regression\SVR;
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
|
||||
$samples = [[60], [61], [62], [63], [65]];
|
||||
$targets = [3.1, 3.6, 3.8, 4, 4.1];
|
||||
|
||||
$regression = new SVR(Kernel::LINEAR);
|
||||
$regression->train($samples, $targets);
|
||||
```
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample target value use `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$regression->predict([64])
|
||||
// return 4.03
|
||||
```
|
80
docs/math/statistic.md
Normal file
80
docs/math/statistic.md
Normal file
@ -0,0 +1,80 @@
|
||||
# Statistic
|
||||
|
||||
Selected statistical methods.
|
||||
|
||||
## Correlation
|
||||
|
||||
Correlation coefficients are used in statistics to measure how strong a relationship is between two variables. There are several types of correlation coefficient.
|
||||
|
||||
### Pearson correlation
|
||||
|
||||
Pearson’s correlation or Pearson correlation is a correlation coefficient commonly used in linear regression.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
use Phpml\Math\Statistic\Correlation;
|
||||
|
||||
$x = [43, 21, 25, 42, 57, 59];
|
||||
$y = [99, 65, 79, 75, 87, 82];
|
||||
|
||||
Correlation::pearson($x, $y);
|
||||
// return 0.549
|
||||
```
|
||||
|
||||
## Mean
|
||||
|
||||
### Arithmetic
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
Mean::arithmetic([2, 5];
|
||||
// return 3.5
|
||||
|
||||
Mean::arithmetic([0.5, 0.5, 1.5, 2.5, 3.5];
|
||||
// return 1.7
|
||||
```
|
||||
|
||||
## Median
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
Mean::median([5, 2, 6, 1, 3, 4]);
|
||||
// return 3.5
|
||||
|
||||
Mean::median([5, 2, 6, 1, 3]);
|
||||
// return 3
|
||||
```
|
||||
|
||||
## Mode
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
Mean::mode([5, 2, 6, 1, 3, 4, 6, 6, 5]);
|
||||
// return 6
|
||||
```
|
||||
|
||||
## Standard Deviation
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
use Phpml\Math\Statistic\StandardDeviation;
|
||||
|
||||
$population = [5, 6, 8, 9];
|
||||
StandardDeviation::population($population)
|
||||
// return 1.825
|
||||
|
||||
$population = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025];
|
||||
StandardDeviation::population($population)
|
||||
// return 4079
|
||||
```
|
11
humbug.json.dist
Normal file
11
humbug.json.dist
Normal file
@ -0,0 +1,11 @@
|
||||
{
|
||||
"source": {
|
||||
"directories": [
|
||||
"src"
|
||||
]
|
||||
},
|
||||
"timeout": 10,
|
||||
"logs": {
|
||||
"text": "humbuglog.txt"
|
||||
}
|
||||
}
|
@ -1,17 +1,21 @@
|
||||
site_name: PHP Machine Learning (PHP-ML)
|
||||
site_name: PHP-ML - Machine Learning library for PHP
|
||||
pages:
|
||||
- Home: index.md
|
||||
- Machine Learning:
|
||||
- Classification:
|
||||
- SVC: machine-learning/classification/svc.md
|
||||
- KNearestNeighbors: machine-learning/classification/k-nearest-neighbors.md
|
||||
- NaiveBayes: machine-learning/classification/naive-bayes.md
|
||||
- Regression:
|
||||
- LeastSquares: machine-learning/regression/least-squares.md
|
||||
- SVR: machine-learning/regression/svr.md
|
||||
- Clustering:
|
||||
- KMeans: machine-learning/clustering/k-means.md
|
||||
- DBSCAN: machine-learning/clustering/dbscan.md
|
||||
- Cross Validation:
|
||||
- RandomSplit: machine-learning/cross-validation/random-split.md
|
||||
- Feature Extraction:
|
||||
- Token Count Vectorizer: machine-learning/feature-extraction/token-count-vectorizer.md
|
||||
- Datasets:
|
||||
- Array Dataset: machine-learning/datasets/array-dataset.md
|
||||
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
||||
|
@ -11,4 +11,11 @@
|
||||
<directory>tests/*</directory>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
|
||||
<filter>
|
||||
<whitelist processUncoveredFilesFromWhitelist="true">
|
||||
<directory suffix=".php">src</directory>
|
||||
</whitelist>
|
||||
</filter>
|
||||
|
||||
</phpunit>
|
||||
|
@ -4,8 +4,8 @@ declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classification;
|
||||
|
||||
use Phpml\Classification\Traits\Predictable;
|
||||
use Phpml\Classification\Traits\Trainable;
|
||||
use Phpml\Helper\Predictable;
|
||||
use Phpml\Helper\Trainable;
|
||||
use Phpml\Math\Distance;
|
||||
use Phpml\Math\Distance\Euclidean;
|
||||
|
||||
|
@ -4,8 +4,8 @@ declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classification;
|
||||
|
||||
use Phpml\Classification\Traits\Predictable;
|
||||
use Phpml\Classification\Traits\Trainable;
|
||||
use Phpml\Helper\Predictable;
|
||||
use Phpml\Helper\Trainable;
|
||||
|
||||
class NaiveBayes implements Classifier
|
||||
{
|
||||
|
31
src/Phpml/Classification/SVC.php
Normal file
31
src/Phpml/Classification/SVC.php
Normal file
@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classification;
|
||||
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
use Phpml\SupportVectorMachine\SupportVectorMachine;
|
||||
use Phpml\SupportVectorMachine\Type;
|
||||
|
||||
class SVC extends SupportVectorMachine implements Classifier
|
||||
{
|
||||
/**
|
||||
* @param int $kernel
|
||||
* @param float $cost
|
||||
* @param int $degree
|
||||
* @param float|null $gamma
|
||||
* @param float $coef0
|
||||
* @param float $tolerance
|
||||
* @param int $cacheSize
|
||||
* @param bool $shrinking
|
||||
* @param bool $probabilityEstimates
|
||||
*/
|
||||
public function __construct(
|
||||
int $kernel = Kernel::LINEAR, float $cost = 1.0, int $degree = 3, float $gamma = null, float $coef0 = 0.0,
|
||||
float $tolerance = 0.001, int $cacheSize = 100, bool $shrinking = true,
|
||||
bool $probabilityEstimates = false
|
||||
) {
|
||||
parent::__construct(Type::C_SVC, $kernel, $cost, 0.5, $degree, $gamma, $coef0, 0.1, $tolerance, $cacheSize, $shrinking, $probabilityEstimates);
|
||||
}
|
||||
}
|
@ -1,61 +0,0 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classification;
|
||||
|
||||
use Phpml\Classification\Traits\Predictable;
|
||||
use Phpml\Classification\Traits\Trainable;
|
||||
use Phpml\Math\Kernel;
|
||||
|
||||
class SupportVectorMachine implements Classifier
|
||||
{
|
||||
use Trainable, Predictable;
|
||||
|
||||
/**
|
||||
* @var Kernel
|
||||
*/
|
||||
private $kernel;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $C;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $tolerance;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $upperBound;
|
||||
|
||||
/**
|
||||
* @param Kernel $kernel
|
||||
* @param float $C
|
||||
* @param float $tolerance
|
||||
* @param int $upperBound
|
||||
*/
|
||||
public function __construct(Kernel $kernel = null, float $C = 1.0, float $tolerance = .001, int $upperBound = 100)
|
||||
{
|
||||
if (null === $kernel) {
|
||||
$kernel = new Kernel\RBF($gamma = .001);
|
||||
}
|
||||
|
||||
$this->kernel = $kernel;
|
||||
$this->C = $C;
|
||||
$this->tolerance = $tolerance;
|
||||
$this->upperBound = $upperBound;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
protected function predictSample(array $sample)
|
||||
{
|
||||
}
|
||||
}
|
@ -49,7 +49,7 @@ class Point implements ArrayAccess
|
||||
$distance += $difference * $difference;
|
||||
}
|
||||
|
||||
return $precise ? sqrt($distance) : $distance;
|
||||
return $precise ? sqrt((float) $distance) : $distance;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -150,37 +150,11 @@ class Space extends SplObjectStorage
|
||||
{
|
||||
switch ($initMethod) {
|
||||
case KMeans::INIT_RANDOM:
|
||||
list($min, $max) = $this->getBoundaries();
|
||||
for ($n = 0; $n < $clustersNumber; ++$n) {
|
||||
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
|
||||
}
|
||||
$clusters = $this->initializeRandomClusters($clustersNumber);
|
||||
break;
|
||||
|
||||
case KMeans::INIT_KMEANS_PLUS_PLUS:
|
||||
$position = rand(1, count($this));
|
||||
for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
|
||||
$clusters[] = new Cluster($this, $this->current()->getCoordinates());
|
||||
|
||||
$distances = new SplObjectStorage();
|
||||
|
||||
for ($i = 1; $i < $clustersNumber; ++$i) {
|
||||
$sum = 0;
|
||||
foreach ($this as $point) {
|
||||
$distance = $point->getDistanceWith($point->getClosest($clusters));
|
||||
$sum += $distances[$point] = $distance;
|
||||
}
|
||||
|
||||
$sum = rand(0, (int) $sum);
|
||||
foreach ($this as $point) {
|
||||
if (($sum -= $distances[$point]) > 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$clusters[] = new Cluster($this, $point->getCoordinates());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$clusters = $this->initializeKMPPClusters($clustersNumber);
|
||||
break;
|
||||
}
|
||||
$clusters[0]->attachAll($this);
|
||||
@ -230,4 +204,56 @@ class Space extends SplObjectStorage
|
||||
|
||||
return $convergence;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $clustersNumber
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function initializeRandomClusters(int $clustersNumber)
|
||||
{
|
||||
$clusters = [];
|
||||
list($min, $max) = $this->getBoundaries();
|
||||
|
||||
for ($n = 0; $n < $clustersNumber; ++$n) {
|
||||
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
|
||||
}
|
||||
|
||||
return $clusters;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $clustersNumber
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
protected function initializeKMPPClusters(int $clustersNumber)
|
||||
{
|
||||
$clusters = [];
|
||||
$position = rand(1, count($this));
|
||||
for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
|
||||
$clusters[] = new Cluster($this, $this->current()->getCoordinates());
|
||||
|
||||
$distances = new SplObjectStorage();
|
||||
|
||||
for ($i = 1; $i < $clustersNumber; ++$i) {
|
||||
$sum = 0;
|
||||
foreach ($this as $point) {
|
||||
$distance = $point->getDistanceWith($point->getClosest($clusters));
|
||||
$sum += $distances[$point] = $distance;
|
||||
}
|
||||
|
||||
$sum = rand(0, (int) $sum);
|
||||
foreach ($this as $point) {
|
||||
if (($sum -= $distances[$point]) > 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$clusters[] = new Cluster($this, $point->getCoordinates());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return $clusters;
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,6 @@ namespace Phpml\Dataset;
|
||||
|
||||
interface Dataset
|
||||
{
|
||||
const SOME = 'z';
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
|
16
src/Phpml/Exception/NormalizerException.php
Normal file
16
src/Phpml/Exception/NormalizerException.php
Normal file
@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class NormalizerException extends \Exception
|
||||
{
|
||||
/**
|
||||
* @return NormalizerException
|
||||
*/
|
||||
public static function unknownNorm()
|
||||
{
|
||||
return new self('Unknown norm supplied.');
|
||||
}
|
||||
}
|
163
src/Phpml/FeatureExtraction/TokenCountVectorizer.php
Normal file
163
src/Phpml/FeatureExtraction/TokenCountVectorizer.php
Normal file
@ -0,0 +1,163 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\FeatureExtraction;
|
||||
|
||||
use Phpml\Tokenization\Tokenizer;
|
||||
|
||||
class TokenCountVectorizer implements Vectorizer
|
||||
{
|
||||
/**
|
||||
* @var Tokenizer
|
||||
*/
|
||||
private $tokenizer;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $minDF;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $vocabulary;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $frequencies;
|
||||
|
||||
/**
|
||||
* @param Tokenizer $tokenizer
|
||||
* @param float $minDF
|
||||
*/
|
||||
public function __construct(Tokenizer $tokenizer, float $minDF = 0)
|
||||
{
|
||||
$this->tokenizer = $tokenizer;
|
||||
$this->minDF = $minDF;
|
||||
$this->vocabulary = [];
|
||||
$this->frequencies = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function transform(array $samples): array
|
||||
{
|
||||
foreach ($samples as $index => $sample) {
|
||||
$samples[$index] = $this->transformSample($sample);
|
||||
}
|
||||
|
||||
$samples = $this->checkDocumentFrequency($samples);
|
||||
|
||||
return $samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getVocabulary()
|
||||
{
|
||||
return array_flip($this->vocabulary);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $sample
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function transformSample(string $sample)
|
||||
{
|
||||
$counts = [];
|
||||
$tokens = $this->tokenizer->tokenize($sample);
|
||||
foreach ($tokens as $token) {
|
||||
$index = $this->getTokenIndex($token);
|
||||
$this->updateFrequency($token);
|
||||
if (!isset($counts[$index])) {
|
||||
$counts[$index] = 0;
|
||||
}
|
||||
|
||||
++$counts[$index];
|
||||
}
|
||||
|
||||
return $counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $token
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
private function getTokenIndex(string $token)
|
||||
{
|
||||
if (!isset($this->vocabulary[$token])) {
|
||||
$this->vocabulary[$token] = count($this->vocabulary);
|
||||
}
|
||||
|
||||
return $this->vocabulary[$token];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $token
|
||||
*/
|
||||
private function updateFrequency(string $token)
|
||||
{
|
||||
if (!isset($this->frequencies[$token])) {
|
||||
$this->frequencies[$token] = 0;
|
||||
}
|
||||
|
||||
++$this->frequencies[$token];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function checkDocumentFrequency(array $samples)
|
||||
{
|
||||
if ($this->minDF > 0) {
|
||||
$beyondMinimum = $this->getBeyondMinimumIndexes(count($samples));
|
||||
foreach ($samples as $index => $sample) {
|
||||
$samples[$index] = $this->unsetBeyondMinimum($sample, $beyondMinimum);
|
||||
}
|
||||
}
|
||||
|
||||
return $samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
* @param array $beyondMinimum
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function unsetBeyondMinimum(array $sample, array $beyondMinimum)
|
||||
{
|
||||
foreach ($beyondMinimum as $index) {
|
||||
unset($sample[$index]);
|
||||
}
|
||||
|
||||
return $sample;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $samplesCount
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function getBeyondMinimumIndexes(int $samplesCount)
|
||||
{
|
||||
$indexes = [];
|
||||
foreach ($this->frequencies as $token => $frequency) {
|
||||
if (($frequency / $samplesCount) < $this->minDF) {
|
||||
$indexes[] = $this->getTokenIndex($token);
|
||||
}
|
||||
}
|
||||
|
||||
return $indexes;
|
||||
}
|
||||
}
|
15
src/Phpml/FeatureExtraction/Vectorizer.php
Normal file
15
src/Phpml/FeatureExtraction/Vectorizer.php
Normal file
@ -0,0 +1,15 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\FeatureExtraction;
|
||||
|
||||
interface Vectorizer
|
||||
{
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function transform(array $samples): array;
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classification\Traits;
|
||||
namespace Phpml\Helper;
|
||||
|
||||
trait Predictable
|
||||
{
|
@ -2,7 +2,7 @@
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Classification\Traits;
|
||||
namespace Phpml\Helper;
|
||||
|
||||
trait Trainable
|
||||
{
|
@ -30,6 +30,6 @@ class Euclidean implements Distance
|
||||
$distance += pow($a[$i] - $b[$i], 2);
|
||||
}
|
||||
|
||||
return sqrt($distance);
|
||||
return sqrt((float) $distance);
|
||||
}
|
||||
}
|
||||
|
@ -147,7 +147,7 @@ class Matrix
|
||||
for ($j = 0; $j < $this->columns; ++$j) {
|
||||
$subMatrix = $this->crossOut(0, $j);
|
||||
$minor = $this->matrix[0][$j] * $subMatrix->getDeterminant();
|
||||
$determinant += fmod($j, 2) == 0 ? $minor : -$minor;
|
||||
$determinant += fmod((float) $j, 2.0) == 0 ? $minor : -$minor;
|
||||
}
|
||||
}
|
||||
|
||||
@ -236,7 +236,7 @@ class Matrix
|
||||
for ($i = 0; $i < $this->rows; ++$i) {
|
||||
for ($j = 0; $j < $this->columns; ++$j) {
|
||||
$minor = $this->crossOut($i, $j)->getDeterminant();
|
||||
$newMatrix[$i][$j] = fmod($i + $j, 2) == 0 ? $minor : -$minor;
|
||||
$newMatrix[$i][$j] = fmod((float) ($i + $j), 2.0) == 0 ? $minor : -$minor;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,7 +38,7 @@ class Correlation
|
||||
$b2 = $b2 + pow($b, 2);
|
||||
}
|
||||
|
||||
$corr = $axb / sqrt($a2 * $b2);
|
||||
$corr = $axb / sqrt((float) ($a2 * $b2));
|
||||
|
||||
return $corr;
|
||||
}
|
||||
|
@ -4,15 +4,72 @@ declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Math\Statistic;
|
||||
|
||||
use Phpml\Exception\InvalidArgumentException;
|
||||
|
||||
class Mean
|
||||
{
|
||||
/**
|
||||
* @param array $a
|
||||
* @param array $numbers
|
||||
*
|
||||
* @return float
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function arithmetic(array $a)
|
||||
public static function arithmetic(array $numbers)
|
||||
{
|
||||
return array_sum($a) / count($a);
|
||||
self::checkArrayLength($numbers);
|
||||
|
||||
return array_sum($numbers) / count($numbers);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $numbers
|
||||
*
|
||||
* @return float|mixed
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function median(array $numbers)
|
||||
{
|
||||
self::checkArrayLength($numbers);
|
||||
|
||||
$count = count($numbers);
|
||||
$middleIndex = floor($count / 2);
|
||||
sort($numbers, SORT_NUMERIC);
|
||||
$median = $numbers[$middleIndex];
|
||||
|
||||
if (0 == $count % 2) {
|
||||
$median = ($median + $numbers[$middleIndex - 1]) / 2;
|
||||
}
|
||||
|
||||
return $median;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $numbers
|
||||
*
|
||||
* @return mixed
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function mode(array $numbers)
|
||||
{
|
||||
self::checkArrayLength($numbers);
|
||||
|
||||
$values = array_count_values($numbers);
|
||||
|
||||
return array_search(max($values), $values);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $array
|
||||
*
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
private static function checkArrayLength(array $array)
|
||||
{
|
||||
if (0 == count($array)) {
|
||||
throw InvalidArgumentException::arrayCantBeEmpty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,6 @@ class StandardDeviation
|
||||
--$n;
|
||||
}
|
||||
|
||||
return sqrt($carry / $n);
|
||||
return sqrt((float) ($carry / $n));
|
||||
}
|
||||
}
|
||||
|
86
src/Phpml/Preprocessing/Imputer.php
Normal file
86
src/Phpml/Preprocessing/Imputer.php
Normal file
@ -0,0 +1,86 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
use Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
class Imputer implements Preprocessor
|
||||
{
|
||||
const AXIS_COLUMN = 0;
|
||||
const AXIS_ROW = 1;
|
||||
|
||||
/**
|
||||
* @var mixed
|
||||
*/
|
||||
private $missingValue;
|
||||
|
||||
/**
|
||||
* @var Strategy
|
||||
*/
|
||||
private $strategy;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $axis;
|
||||
|
||||
/**
|
||||
* @param mixed $missingValue
|
||||
* @param Strategy $strategy
|
||||
* @param int $axis
|
||||
*/
|
||||
public function __construct($missingValue = null, Strategy $strategy, int $axis = self::AXIS_COLUMN)
|
||||
{
|
||||
$this->missingValue = $missingValue;
|
||||
$this->strategy = $strategy;
|
||||
$this->axis = $axis;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*/
|
||||
public function preprocess(array &$samples)
|
||||
{
|
||||
foreach ($samples as &$sample) {
|
||||
$this->preprocessSample($sample, $samples);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
* @param array $samples
|
||||
*/
|
||||
private function preprocessSample(array &$sample, array $samples)
|
||||
{
|
||||
foreach ($sample as $column => &$value) {
|
||||
if ($value === $this->missingValue) {
|
||||
$value = $this->strategy->replaceValue($this->getAxis($column, $sample, $samples));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $column
|
||||
* @param array $currentSample
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
private function getAxis(int $column, array $currentSample, array $samples): array
|
||||
{
|
||||
if (self::AXIS_ROW === $this->axis) {
|
||||
return array_diff($currentSample, [$this->missingValue]);
|
||||
}
|
||||
|
||||
$axis = [];
|
||||
foreach ($samples as $sample) {
|
||||
if ($sample[$column] !== $this->missingValue) {
|
||||
$axis[] = $sample[$column];
|
||||
}
|
||||
}
|
||||
|
||||
return $axis;
|
||||
}
|
||||
}
|
15
src/Phpml/Preprocessing/Imputer/Strategy.php
Normal file
15
src/Phpml/Preprocessing/Imputer/Strategy.php
Normal file
@ -0,0 +1,15 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing\Imputer;
|
||||
|
||||
interface Strategy
|
||||
{
|
||||
/**
|
||||
* @param array $currentAxis
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function replaceValue(array $currentAxis);
|
||||
}
|
21
src/Phpml/Preprocessing/Imputer/Strategy/MeanStrategy.php
Normal file
21
src/Phpml/Preprocessing/Imputer/Strategy/MeanStrategy.php
Normal file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
use Phpml\Preprocessing\Imputer\Strategy;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
class MeanStrategy implements Strategy
|
||||
{
|
||||
/**
|
||||
* @param array $currentAxis
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function replaceValue(array $currentAxis)
|
||||
{
|
||||
return Mean::arithmetic($currentAxis);
|
||||
}
|
||||
}
|
21
src/Phpml/Preprocessing/Imputer/Strategy/MedianStrategy.php
Normal file
21
src/Phpml/Preprocessing/Imputer/Strategy/MedianStrategy.php
Normal file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
use Phpml\Preprocessing\Imputer\Strategy;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
class MedianStrategy implements Strategy
|
||||
{
|
||||
/**
|
||||
* @param array $currentAxis
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function replaceValue(array $currentAxis)
|
||||
{
|
||||
return Mean::median($currentAxis);
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing\Imputer\Strategy;
|
||||
|
||||
use Phpml\Preprocessing\Imputer\Strategy;
|
||||
use Phpml\Math\Statistic\Mean;
|
||||
|
||||
class MostFrequentStrategy implements Strategy
|
||||
{
|
||||
/**
|
||||
* @param array $currentAxis
|
||||
*
|
||||
* @return float|mixed
|
||||
*/
|
||||
public function replaceValue(array $currentAxis)
|
||||
{
|
||||
return Mean::mode($currentAxis);
|
||||
}
|
||||
}
|
83
src/Phpml/Preprocessing/Normalizer.php
Normal file
83
src/Phpml/Preprocessing/Normalizer.php
Normal file
@ -0,0 +1,83 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
use Phpml\Exception\NormalizerException;
|
||||
|
||||
class Normalizer implements Preprocessor
|
||||
{
|
||||
const NORM_L1 = 1;
|
||||
const NORM_L2 = 2;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $norm;
|
||||
|
||||
/**
|
||||
* @param int $norm
|
||||
*
|
||||
* @throws NormalizerException
|
||||
*/
|
||||
public function __construct(int $norm = self::NORM_L2)
|
||||
{
|
||||
if (!in_array($norm, [self::NORM_L1, self::NORM_L2])) {
|
||||
throw NormalizerException::unknownNorm();
|
||||
}
|
||||
|
||||
$this->norm = $norm;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*/
|
||||
public function preprocess(array &$samples)
|
||||
{
|
||||
$method = sprintf('normalizeL%s', $this->norm);
|
||||
foreach ($samples as &$sample) {
|
||||
$this->$method($sample);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
*/
|
||||
private function normalizeL1(array &$sample)
|
||||
{
|
||||
$norm1 = 0;
|
||||
foreach ($sample as $feature) {
|
||||
$norm1 += abs($feature);
|
||||
}
|
||||
|
||||
if (0 == $norm1) {
|
||||
$count = count($sample);
|
||||
$sample = array_fill(0, $count, 1.0 / $count);
|
||||
} else {
|
||||
foreach ($sample as &$feature) {
|
||||
$feature = $feature / $norm1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
*/
|
||||
private function normalizeL2(array &$sample)
|
||||
{
|
||||
$norm2 = 0;
|
||||
foreach ($sample as $feature) {
|
||||
$norm2 += $feature * $feature;
|
||||
}
|
||||
$norm2 = sqrt($norm2);
|
||||
|
||||
if (0 == $norm2) {
|
||||
$sample = array_fill(0, count($sample), 1);
|
||||
} else {
|
||||
foreach ($sample as &$feature) {
|
||||
$feature = $feature / $norm2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
13
src/Phpml/Preprocessing/Preprocessor.php
Normal file
13
src/Phpml/Preprocessing/Preprocessor.php
Normal file
@ -0,0 +1,13 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Preprocessing;
|
||||
|
||||
interface Preprocessor
|
||||
{
|
||||
/**
|
||||
* @param array $samples
|
||||
*/
|
||||
public function preprocess(array &$samples);
|
||||
}
|
@ -4,10 +4,12 @@ declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Regression;
|
||||
|
||||
use Phpml\Helper\Predictable;
|
||||
use Phpml\Math\Matrix;
|
||||
|
||||
class LeastSquares implements Regression
|
||||
{
|
||||
use Predictable;
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
@ -45,7 +47,7 @@ class LeastSquares implements Regression
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function predict($sample)
|
||||
public function predictSample(array $sample)
|
||||
{
|
||||
$result = $this->intercept;
|
||||
foreach ($this->coefficients as $index => $coefficient) {
|
||||
|
@ -13,9 +13,9 @@ interface Regression
|
||||
public function train(array $samples, array $targets);
|
||||
|
||||
/**
|
||||
* @param float $sample
|
||||
* @param array $samples
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function predict($sample);
|
||||
public function predict(array $samples);
|
||||
}
|
||||
|
31
src/Phpml/Regression/SVR.php
Normal file
31
src/Phpml/Regression/SVR.php
Normal file
@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Regression;
|
||||
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
use Phpml\SupportVectorMachine\SupportVectorMachine;
|
||||
use Phpml\SupportVectorMachine\Type;
|
||||
|
||||
class SVR extends SupportVectorMachine implements Regression
|
||||
{
|
||||
/**
|
||||
* @param int $kernel
|
||||
* @param int $degree
|
||||
* @param float $epsilon
|
||||
* @param float $cost
|
||||
* @param float|null $gamma
|
||||
* @param float $coef0
|
||||
* @param float $tolerance
|
||||
* @param int $cacheSize
|
||||
* @param bool $shrinking
|
||||
*/
|
||||
public function __construct(
|
||||
int $kernel = Kernel::RBF, int $degree = 3, float $epsilon = 0.1, float $cost = 1.0,
|
||||
float $gamma = null, float $coef0 = 0.0, float $tolerance = 0.001,
|
||||
int $cacheSize = 100, bool $shrinking = true
|
||||
) {
|
||||
parent::__construct(Type::EPSILON_SVR, $kernel, $cost, 0.5, $degree, $gamma, $coef0, $epsilon, $tolerance, $cacheSize, $shrinking, false);
|
||||
}
|
||||
}
|
101
src/Phpml/SupportVectorMachine/DataTransformer.php
Normal file
101
src/Phpml/SupportVectorMachine/DataTransformer.php
Normal file
@ -0,0 +1,101 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\SupportVectorMachine;
|
||||
|
||||
class DataTransformer
|
||||
{
|
||||
/**
|
||||
* @param array $samples
|
||||
* @param array $labels
|
||||
* @param bool $targets
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function trainingSet(array $samples, array $labels, bool $targets = false): string
|
||||
{
|
||||
$set = '';
|
||||
if (!$targets) {
|
||||
$numericLabels = self::numericLabels($labels);
|
||||
}
|
||||
|
||||
foreach ($labels as $index => $label) {
|
||||
$set .= sprintf('%s %s %s', ($targets ? $label : $numericLabels[$label]), self::sampleRow($samples[$index]), PHP_EOL);
|
||||
}
|
||||
|
||||
return $set;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function testSet(array $samples): string
|
||||
{
|
||||
if (!is_array($samples[0])) {
|
||||
$samples = [$samples];
|
||||
}
|
||||
|
||||
$set = '';
|
||||
foreach ($samples as $sample) {
|
||||
$set .= sprintf('0 %s %s', self::sampleRow($sample), PHP_EOL);
|
||||
}
|
||||
|
||||
return $set;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $rawPredictions
|
||||
* @param array $labels
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function predictions(string $rawPredictions, array $labels): array
|
||||
{
|
||||
$numericLabels = self::numericLabels($labels);
|
||||
$results = [];
|
||||
foreach (explode(PHP_EOL, $rawPredictions) as $result) {
|
||||
if (strlen($result) > 0) {
|
||||
$results[] = array_search($result, $numericLabels);
|
||||
}
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $labels
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function numericLabels(array $labels): array
|
||||
{
|
||||
$numericLabels = [];
|
||||
foreach ($labels as $label) {
|
||||
if (isset($numericLabels[$label])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$numericLabels[$label] = count($numericLabels);
|
||||
}
|
||||
|
||||
return $numericLabels;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $sample
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private static function sampleRow(array $sample): string
|
||||
{
|
||||
$row = [];
|
||||
foreach ($sample as $index => $feature) {
|
||||
$row[] = sprintf('%s:%s', $index + 1, $feature);
|
||||
}
|
||||
|
||||
return implode(' ', $row);
|
||||
}
|
||||
}
|
28
src/Phpml/SupportVectorMachine/Kernel.php
Normal file
28
src/Phpml/SupportVectorMachine/Kernel.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\SupportVectorMachine;
|
||||
|
||||
abstract class Kernel
|
||||
{
|
||||
/**
|
||||
* u'*v.
|
||||
*/
|
||||
const LINEAR = 0;
|
||||
|
||||
/**
|
||||
* (gamma*u'*v + coef0)^degree.
|
||||
*/
|
||||
const POLYNOMIAL = 1;
|
||||
|
||||
/**
|
||||
* exp(-gamma*|u-v|^2).
|
||||
*/
|
||||
const RBF = 2;
|
||||
|
||||
/**
|
||||
* tanh(gamma*u'*v + coef0).
|
||||
*/
|
||||
const SIGMOID = 3;
|
||||
}
|
230
src/Phpml/SupportVectorMachine/SupportVectorMachine.php
Normal file
230
src/Phpml/SupportVectorMachine/SupportVectorMachine.php
Normal file
@ -0,0 +1,230 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\SupportVectorMachine;
|
||||
|
||||
class SupportVectorMachine
|
||||
{
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $type;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $kernel;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $cost;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $nu;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $degree;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $gamma;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $coef0;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $epsilon;
|
||||
|
||||
/**
|
||||
* @var float
|
||||
*/
|
||||
private $tolerance;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
private $cacheSize;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $shrinking;
|
||||
|
||||
/**
|
||||
* @var bool
|
||||
*/
|
||||
private $probabilityEstimates;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
private $binPath;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
private $varPath;
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
private $model;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
private $labels;
|
||||
|
||||
/**
|
||||
* @param int $type
|
||||
* @param int $kernel
|
||||
* @param float $cost
|
||||
* @param float $nu
|
||||
* @param int $degree
|
||||
* @param float|null $gamma
|
||||
* @param float $coef0
|
||||
* @param float $epsilon
|
||||
* @param float $tolerance
|
||||
* @param int $cacheSize
|
||||
* @param bool $shrinking
|
||||
* @param bool $probabilityEstimates
|
||||
*/
|
||||
public function __construct(
|
||||
int $type, int $kernel, float $cost = 1.0, float $nu = 0.5, int $degree = 3,
|
||||
float $gamma = null, float $coef0 = 0.0, float $epsilon = 0.1, float $tolerance = 0.001,
|
||||
int $cacheSize = 100, bool $shrinking = true, bool $probabilityEstimates = false
|
||||
) {
|
||||
$this->type = $type;
|
||||
$this->kernel = $kernel;
|
||||
$this->cost = $cost;
|
||||
$this->nu = $nu;
|
||||
$this->degree = $degree;
|
||||
$this->gamma = $gamma;
|
||||
$this->coef0 = $coef0;
|
||||
$this->epsilon = $epsilon;
|
||||
$this->tolerance = $tolerance;
|
||||
$this->cacheSize = $cacheSize;
|
||||
$this->shrinking = $shrinking;
|
||||
$this->probabilityEstimates = $probabilityEstimates;
|
||||
|
||||
$rootPath = realpath(implode(DIRECTORY_SEPARATOR, [dirname(__FILE__), '..', '..', '..'])).DIRECTORY_SEPARATOR;
|
||||
|
||||
$this->binPath = $rootPath.'bin'.DIRECTORY_SEPARATOR.'libsvm'.DIRECTORY_SEPARATOR;
|
||||
$this->varPath = $rootPath.'var'.DIRECTORY_SEPARATOR;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
* @param array $labels
|
||||
*/
|
||||
public function train(array $samples, array $labels)
|
||||
{
|
||||
$this->labels = $labels;
|
||||
$trainingSet = DataTransformer::trainingSet($samples, $labels, in_array($this->type, [Type::EPSILON_SVR, Type::NU_SVR]));
|
||||
file_put_contents($trainingSetFileName = $this->varPath.uniqid(), $trainingSet);
|
||||
$modelFileName = $trainingSetFileName.'-model';
|
||||
|
||||
$command = $this->buildTrainCommand($trainingSetFileName, $modelFileName);
|
||||
$output = '';
|
||||
exec(escapeshellcmd($command), $output);
|
||||
|
||||
$this->model = file_get_contents($modelFileName);
|
||||
|
||||
unlink($trainingSetFileName);
|
||||
unlink($modelFileName);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getModel()
|
||||
{
|
||||
return $this->model;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $samples
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function predict(array $samples)
|
||||
{
|
||||
$testSet = DataTransformer::testSet($samples);
|
||||
file_put_contents($testSetFileName = $this->varPath.uniqid(), $testSet);
|
||||
file_put_contents($modelFileName = $testSetFileName.'-model', $this->model);
|
||||
$outputFileName = $testSetFileName.'-output';
|
||||
|
||||
$command = sprintf('%ssvm-predict%s %s %s %s', $this->binPath, $this->getOSExtension(), $testSetFileName, $modelFileName, $outputFileName);
|
||||
$output = '';
|
||||
exec(escapeshellcmd($command), $output);
|
||||
|
||||
$predictions = file_get_contents($outputFileName);
|
||||
|
||||
unlink($testSetFileName);
|
||||
unlink($modelFileName);
|
||||
unlink($outputFileName);
|
||||
|
||||
if (in_array($this->type, [Type::C_SVC, Type::NU_SVC])) {
|
||||
$predictions = DataTransformer::predictions($predictions, $this->labels);
|
||||
} else {
|
||||
$predictions = explode(PHP_EOL, trim($predictions));
|
||||
}
|
||||
|
||||
if (!is_array($samples[0])) {
|
||||
return $predictions[0];
|
||||
}
|
||||
|
||||
return $predictions;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
private function getOSExtension()
|
||||
{
|
||||
if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
|
||||
return '.exe';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $trainingSetFileName
|
||||
* @param $modelFileName
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function buildTrainCommand(string $trainingSetFileName, string $modelFileName): string
|
||||
{
|
||||
return sprintf('%ssvm-train%s -s %s -t %s -c %s -n %s -d %s%s -r %s -p %s -m %s -e %s -h %d -b %d \'%s\' \'%s\'',
|
||||
$this->binPath,
|
||||
$this->getOSExtension(),
|
||||
$this->type,
|
||||
$this->kernel,
|
||||
$this->cost,
|
||||
$this->nu,
|
||||
$this->degree,
|
||||
$this->gamma !== null ? ' -g '.$this->gamma : '',
|
||||
$this->coef0,
|
||||
$this->epsilon,
|
||||
$this->cacheSize,
|
||||
$this->tolerance,
|
||||
$this->shrinking,
|
||||
$this->probabilityEstimates,
|
||||
$trainingSetFileName,
|
||||
$modelFileName
|
||||
);
|
||||
}
|
||||
}
|
33
src/Phpml/SupportVectorMachine/Type.php
Normal file
33
src/Phpml/SupportVectorMachine/Type.php
Normal file
@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\SupportVectorMachine;
|
||||
|
||||
abstract class Type
|
||||
{
|
||||
/**
|
||||
* classification.
|
||||
*/
|
||||
const C_SVC = 0;
|
||||
|
||||
/**
|
||||
* classification.
|
||||
*/
|
||||
const NU_SVC = 1;
|
||||
|
||||
/**
|
||||
* distribution estimation.
|
||||
*/
|
||||
const ONE_CLASS_SVM = 2;
|
||||
|
||||
/**
|
||||
* regression.
|
||||
*/
|
||||
const EPSILON_SVR = 3;
|
||||
|
||||
/**
|
||||
* regression.
|
||||
*/
|
||||
const NU_SVR = 4;
|
||||
}
|
15
src/Phpml/Tokenization/Tokenizer.php
Normal file
15
src/Phpml/Tokenization/Tokenizer.php
Normal file
@ -0,0 +1,15 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Tokenization;
|
||||
|
||||
interface Tokenizer
|
||||
{
|
||||
/**
|
||||
* @param string $text
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function tokenize(string $text): array;
|
||||
}
|
18
src/Phpml/Tokenization/WhitespaceTokenizer.php
Normal file
18
src/Phpml/Tokenization/WhitespaceTokenizer.php
Normal file
@ -0,0 +1,18 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Tokenization;
|
||||
|
||||
class WhitespaceTokenizer implements Tokenizer
|
||||
{
|
||||
/**
|
||||
* @param string $text
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function tokenize(string $text): array
|
||||
{
|
||||
return preg_split('/[\pZ\pC]+/u', $text, -1, PREG_SPLIT_NO_EMPTY);
|
||||
}
|
||||
}
|
21
src/Phpml/Tokenization/WordTokenizer.php
Normal file
21
src/Phpml/Tokenization/WordTokenizer.php
Normal file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace Phpml\Tokenization;
|
||||
|
||||
class WordTokenizer implements Tokenizer
|
||||
{
|
||||
/**
|
||||
* @param string $text
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function tokenize(string $text): array
|
||||
{
|
||||
$tokens = [];
|
||||
preg_match_all('/\w\w+/u', $text, $tokens);
|
||||
|
||||
return $tokens[0];
|
||||
}
|
||||
}
|
45
tests/Phpml/Classification/SVCTest.php
Normal file
45
tests/Phpml/Classification/SVCTest.php
Normal file
@ -0,0 +1,45 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Classification;
|
||||
|
||||
use Phpml\Classification\SVC;
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
|
||||
class SVCTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testPredictSingleSampleWithLinearKernel()
|
||||
{
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$classifier = new SVC(Kernel::LINEAR, $cost = 1000);
|
||||
$classifier->train($samples, $labels);
|
||||
|
||||
$this->assertEquals('b', $classifier->predict([3, 2]));
|
||||
$this->assertEquals('b', $classifier->predict([5, 1]));
|
||||
$this->assertEquals('b', $classifier->predict([4, 3]));
|
||||
$this->assertEquals('b', $classifier->predict([4, -5]));
|
||||
|
||||
$this->assertEquals('a', $classifier->predict([2, 3]));
|
||||
$this->assertEquals('a', $classifier->predict([1, 2]));
|
||||
$this->assertEquals('a', $classifier->predict([1, 5]));
|
||||
$this->assertEquals('a', $classifier->predict([3, 10]));
|
||||
}
|
||||
|
||||
public function testPredictArrayOfSamplesWithLinearKernel()
|
||||
{
|
||||
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]];
|
||||
$testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'];
|
||||
|
||||
$classifier = new SVC(Kernel::LINEAR, $cost = 1000);
|
||||
$classifier->train($trainSamples, $trainLabels);
|
||||
$predictions = $classifier->predict($testSamples);
|
||||
|
||||
$this->assertEquals($testLabels, $predictions);
|
||||
}
|
||||
}
|
73
tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php
Normal file
73
tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php
Normal file
@ -0,0 +1,73 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Phpml\FeatureExtraction;
|
||||
|
||||
use Phpml\FeatureExtraction\TokenCountVectorizer;
|
||||
use Phpml\Tokenization\WhitespaceTokenizer;
|
||||
|
||||
class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testTokenCountVectorizerWithWhitespaceTokenizer()
|
||||
{
|
||||
$samples = [
|
||||
'Lorem ipsum dolor sit amet dolor',
|
||||
'Mauris placerat ipsum dolor',
|
||||
'Mauris diam eros fringilla diam',
|
||||
];
|
||||
|
||||
$vocabulary = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet', 'Mauris', 'placerat', 'diam', 'eros', 'fringilla'];
|
||||
$vector = [
|
||||
[0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 1],
|
||||
[5 => 1, 6 => 1, 1 => 1, 2 => 1],
|
||||
[5 => 1, 7 => 2, 8 => 1, 9 => 1],
|
||||
];
|
||||
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
||||
|
||||
$this->assertEquals($vector, $vectorizer->transform($samples));
|
||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
||||
}
|
||||
|
||||
public function testMinimumDocumentTokenCountFrequency()
|
||||
{
|
||||
// word at least in half samples
|
||||
$samples = [
|
||||
'Lorem ipsum dolor sit amet',
|
||||
'Lorem ipsum sit amet',
|
||||
'ipsum sit amet',
|
||||
'ipsum sit amet',
|
||||
];
|
||||
|
||||
$vocabulary = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet'];
|
||||
$vector = [
|
||||
[0 => 1, 1 => 1, 3 => 1, 4 => 1],
|
||||
[0 => 1, 1 => 1, 3 => 1, 4 => 1],
|
||||
[1 => 1, 3 => 1, 4 => 1],
|
||||
[1 => 1, 3 => 1, 4 => 1],
|
||||
];
|
||||
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), 0.5);
|
||||
|
||||
$this->assertEquals($vector, $vectorizer->transform($samples));
|
||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
||||
|
||||
// word at least in all samples
|
||||
$samples = [
|
||||
'Lorem ipsum dolor sit amet',
|
||||
'Morbi quis lacinia arcu. Sed eu sagittis Lorem',
|
||||
'Suspendisse gravida consequat eros Lorem',
|
||||
];
|
||||
|
||||
$vector = [
|
||||
[0 => 1],
|
||||
[0 => 1],
|
||||
[0 => 1],
|
||||
];
|
||||
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), 1);
|
||||
|
||||
$this->assertEquals($vector, $vectorizer->transform($samples));
|
||||
}
|
||||
}
|
@ -8,6 +8,14 @@ use Phpml\Math\Statistic\Mean;
|
||||
|
||||
class MeanTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testArithmeticThrowExceptionOnEmptyArray()
|
||||
{
|
||||
Mean::arithmetic([]);
|
||||
}
|
||||
|
||||
public function testArithmeticMean()
|
||||
{
|
||||
$delta = 0.01;
|
||||
@ -15,4 +23,41 @@ class MeanTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals(41.16, Mean::arithmetic([43, 21, 25, 42, 57, 59]), '', $delta);
|
||||
$this->assertEquals(1.7, Mean::arithmetic([0.5, 0.5, 1.5, 2.5, 3.5]), '', $delta);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testMedianThrowExceptionOnEmptyArray()
|
||||
{
|
||||
Mean::median([]);
|
||||
}
|
||||
|
||||
public function testMedianOnOddLengthArray()
|
||||
{
|
||||
$numbers = [5, 2, 6, 1, 3];
|
||||
|
||||
$this->assertEquals(3, Mean::median($numbers));
|
||||
}
|
||||
|
||||
public function testMedianOnEvenLengthArray()
|
||||
{
|
||||
$numbers = [5, 2, 6, 1, 3, 4];
|
||||
|
||||
$this->assertEquals(3.5, Mean::median($numbers));
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||
*/
|
||||
public function testModeThrowExceptionOnEmptyArray()
|
||||
{
|
||||
Mean::mode([]);
|
||||
}
|
||||
|
||||
public function testModeOnArray()
|
||||
{
|
||||
$numbers = [5, 2, 6, 1, 3, 4, 6, 6, 5];
|
||||
|
||||
$this->assertEquals(6, Mean::mode($numbers));
|
||||
}
|
||||
}
|
||||
|
149
tests/Phpml/Preprocessing/ImputerTest.php
Normal file
149
tests/Phpml/Preprocessing/ImputerTest.php
Normal file
@ -0,0 +1,149 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Preprocessing;
|
||||
|
||||
use Phpml\Preprocessing\Imputer;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MedianStrategy;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
||||
|
||||
class ImputerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testComplementsMissingValuesWithMeanStrategyOnColumnAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 5.33, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[4.33, 6, 7, 8],
|
||||
[8, 7, 4, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
||||
}
|
||||
|
||||
public function testComplementsMissingValuesWithMeanStrategyOnRowAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 2.66, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[7, 6, 7, 8],
|
||||
[8, 7, 6.66, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_ROW);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
||||
}
|
||||
|
||||
public function testComplementsMissingValuesWithMediaStrategyOnColumnAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 6, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[4, 6, 7, 8],
|
||||
[8, 7, 3, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_COLUMN);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
||||
}
|
||||
|
||||
public function testComplementsMissingValuesWithMediaStrategyOnRowAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 3, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[7, 6, 7, 8],
|
||||
[8, 7, 7, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data, '', $delta = 0.01);
|
||||
}
|
||||
|
||||
public function testComplementsMissingValuesWithMostFrequentStrategyOnColumnAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[null, 6, 7, 8],
|
||||
[8, 7, null, 5],
|
||||
[8, 3, 2, 5],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 3, 3, 4],
|
||||
[4, 3, 2, 1],
|
||||
[8, 6, 7, 8],
|
||||
[8, 7, 2, 5],
|
||||
[8, 3, 2, 5],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_COLUMN);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data);
|
||||
}
|
||||
|
||||
public function testComplementsMissingValuesWithMostFrequentStrategyOnRowAxis()
|
||||
{
|
||||
$data = [
|
||||
[1, null, 3, 4, 3],
|
||||
[4, 3, 2, 1, 7],
|
||||
[null, 6, 7, 8, 6],
|
||||
[8, 7, null, 5, 5],
|
||||
[8, 3, 2, 5, 4],
|
||||
];
|
||||
|
||||
$imputeData = [
|
||||
[1, 3, 3, 4, 3],
|
||||
[4, 3, 2, 1, 7],
|
||||
[6, 6, 7, 8, 6],
|
||||
[8, 7, 5, 5, 5],
|
||||
[8, 3, 2, 5, 4],
|
||||
];
|
||||
|
||||
$imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_ROW);
|
||||
$imputer->preprocess($data);
|
||||
|
||||
$this->assertEquals($imputeData, $data);
|
||||
}
|
||||
}
|
58
tests/Phpml/Preprocessing/NormalizerTest.php
Normal file
58
tests/Phpml/Preprocessing/NormalizerTest.php
Normal file
@ -0,0 +1,58 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Preprocessing;
|
||||
|
||||
use Phpml\Preprocessing\Normalizer;
|
||||
|
||||
class NormalizerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\NormalizerException
|
||||
*/
|
||||
public function testThrowExceptionOnInvalidNorm()
|
||||
{
|
||||
new Normalizer(99);
|
||||
}
|
||||
|
||||
public function testNormalizeSamplesWithL2Norm()
|
||||
{
|
||||
$samples = [
|
||||
[1, -1, 2],
|
||||
[2, 0, 0],
|
||||
[0, 1, -1],
|
||||
];
|
||||
|
||||
$normalized = [
|
||||
[0.4, -0.4, 0.81],
|
||||
[1.0, 0.0, 0.0],
|
||||
[0.0, 0.7, -0.7],
|
||||
];
|
||||
|
||||
$normalizer = new Normalizer();
|
||||
$normalizer->preprocess($samples);
|
||||
|
||||
$this->assertEquals($normalized, $samples, '', $delta = 0.01);
|
||||
}
|
||||
|
||||
public function testNormalizeSamplesWithL1Norm()
|
||||
{
|
||||
$samples = [
|
||||
[1, -1, 2],
|
||||
[2, 0, 0],
|
||||
[0, 1, -1],
|
||||
];
|
||||
|
||||
$normalized = [
|
||||
[0.25, -0.25, 0.5],
|
||||
[1.0, 0.0, 0.0],
|
||||
[0.0, 0.5, -0.5],
|
||||
];
|
||||
|
||||
$normalizer = new Normalizer(Normalizer::NORM_L1);
|
||||
$normalizer->preprocess($samples);
|
||||
|
||||
$this->assertEquals($normalized, $samples, '', $delta = 0.01);
|
||||
}
|
||||
}
|
37
tests/Phpml/Regression/SVRTest.php
Normal file
37
tests/Phpml/Regression/SVRTest.php
Normal file
@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Regression;
|
||||
|
||||
use Phpml\Regression\SVR;
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
|
||||
class SVRTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testPredictSingleFeatureSamples()
|
||||
{
|
||||
$delta = 0.01;
|
||||
|
||||
$samples = [[60], [61], [62], [63], [65]];
|
||||
$targets = [3.1, 3.6, 3.8, 4, 4.1];
|
||||
|
||||
$regression = new SVR(Kernel::LINEAR);
|
||||
$regression->train($samples, $targets);
|
||||
|
||||
$this->assertEquals(4.03, $regression->predict([64]), '', $delta);
|
||||
}
|
||||
|
||||
public function testPredictMultiFeaturesSamples()
|
||||
{
|
||||
$delta = 0.01;
|
||||
|
||||
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
||||
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];
|
||||
|
||||
$regression = new SVR(Kernel::LINEAR);
|
||||
$regression->train($samples, $targets);
|
||||
|
||||
$this->assertEquals([4109.82, 4112.28], $regression->predict([[60000, 1996], [60000, 2000]]), '', $delta);
|
||||
}
|
||||
}
|
39
tests/Phpml/SupportVectorMachine/DataTransformerTest.php
Normal file
39
tests/Phpml/SupportVectorMachine/DataTransformerTest.php
Normal file
@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\SupportVectorMachine;
|
||||
|
||||
use Phpml\SupportVectorMachine\DataTransformer;
|
||||
|
||||
class DataTransformerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testTransformDatasetToTrainingSet()
|
||||
{
|
||||
$samples = [[1, 1], [2, 1], [3, 2], [4, 5]];
|
||||
$labels = ['a', 'a', 'b', 'b'];
|
||||
|
||||
$trainingSet =
|
||||
'0 1:1 2:1 '.PHP_EOL.
|
||||
'0 1:2 2:1 '.PHP_EOL.
|
||||
'1 1:3 2:2 '.PHP_EOL.
|
||||
'1 1:4 2:5 '.PHP_EOL
|
||||
;
|
||||
|
||||
$this->assertEquals($trainingSet, DataTransformer::trainingSet($samples, $labels));
|
||||
}
|
||||
|
||||
public function testTransformSamplesToTestSet()
|
||||
{
|
||||
$samples = [[1, 1], [2, 1], [3, 2], [4, 5]];
|
||||
|
||||
$testSet =
|
||||
'0 1:1 2:1 '.PHP_EOL.
|
||||
'0 1:2 2:1 '.PHP_EOL.
|
||||
'0 1:3 2:2 '.PHP_EOL.
|
||||
'0 1:4 2:5 '.PHP_EOL
|
||||
;
|
||||
|
||||
$this->assertEquals($testSet, DataTransformer::testSet($samples));
|
||||
}
|
||||
}
|
@ -0,0 +1,82 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\SupportVectorMachine;
|
||||
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
use Phpml\SupportVectorMachine\SupportVectorMachine;
|
||||
use Phpml\SupportVectorMachine\Type;
|
||||
|
||||
class SupportVectorMachineTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testTrainCSVCModelWithLinearKernel()
|
||||
{
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$model =
|
||||
'svm_type c_svc
|
||||
kernel_type linear
|
||||
nr_class 2
|
||||
total_sv 2
|
||||
rho 0
|
||||
label 0 1
|
||||
nr_sv 1 1
|
||||
SV
|
||||
0.25 1:2 2:4
|
||||
-0.25 1:4 2:2
|
||||
';
|
||||
|
||||
$svm = new SupportVectorMachine(Type::C_SVC, Kernel::LINEAR, 100.0);
|
||||
$svm->train($samples, $labels);
|
||||
|
||||
$this->assertEquals($model, $svm->getModel());
|
||||
}
|
||||
|
||||
public function testPredictSampleWithLinearKernel()
|
||||
{
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$svm = new SupportVectorMachine(Type::C_SVC, Kernel::LINEAR, 100.0);
|
||||
$svm->train($samples, $labels);
|
||||
|
||||
$predictions = $svm->predict([
|
||||
[3, 2],
|
||||
[2, 3],
|
||||
[4, -5],
|
||||
]);
|
||||
|
||||
$this->assertEquals('b', $predictions[0]);
|
||||
$this->assertEquals('a', $predictions[1]);
|
||||
$this->assertEquals('b', $predictions[2]);
|
||||
}
|
||||
|
||||
public function testPredictSampleFromMultipleClassWithRbfKernel()
|
||||
{
|
||||
$samples = [
|
||||
[1, 3], [1, 4], [1, 4],
|
||||
[3, 1], [4, 1], [4, 2],
|
||||
[-3, -1], [-4, -1], [-4, -2],
|
||||
];
|
||||
$labels = [
|
||||
'a', 'a', 'a',
|
||||
'b', 'b', 'b',
|
||||
'c', 'c', 'c',
|
||||
];
|
||||
|
||||
$svm = new SupportVectorMachine(Type::C_SVC, Kernel::RBF, 100.0);
|
||||
$svm->train($samples, $labels);
|
||||
|
||||
$predictions = $svm->predict([
|
||||
[1, 5],
|
||||
[4, 3],
|
||||
[-4, -3],
|
||||
]);
|
||||
|
||||
$this->assertEquals('a', $predictions[0]);
|
||||
$this->assertEquals('b', $predictions[1]);
|
||||
$this->assertEquals('c', $predictions[2]);
|
||||
}
|
||||
}
|
40
tests/Phpml/Tokenization/WhitespaceTokenizerTest.php
Normal file
40
tests/Phpml/Tokenization/WhitespaceTokenizerTest.php
Normal file
@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Tokenization;
|
||||
|
||||
use Phpml\Tokenization\WhitespaceTokenizer;
|
||||
|
||||
class WhitespaceTokenizerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testTokenizationOnAscii()
|
||||
{
|
||||
$tokenizer = new WhitespaceTokenizer();
|
||||
|
||||
$text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
||||
Cras consectetur, dui et lobortis auctor.
|
||||
Nulla vitae congue lorem.';
|
||||
|
||||
$tokens = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet,', 'consectetur', 'adipiscing', 'elit.',
|
||||
'Cras', 'consectetur,', 'dui', 'et', 'lobortis', 'auctor.',
|
||||
'Nulla', 'vitae', 'congue', 'lorem.', ];
|
||||
|
||||
$this->assertEquals($tokens, $tokenizer->tokenize($text));
|
||||
}
|
||||
|
||||
public function testTokenizationOnUtf8()
|
||||
{
|
||||
$tokenizer = new WhitespaceTokenizer();
|
||||
|
||||
$text = '鋍鞎 鳼 鞮鞢騉 袟袘觕, 炟砏 蒮 謺貙蹖 偢偣唲 蒛 箷箯緷 鑴鱱爧 覮轀,
|
||||
剆坲 煘煓瑐 鬐鶤鶐 飹勫嫢 銪 餀 枲柊氠 鍎鞚韕 焲犈,
|
||||
殍涾烰 齞齝囃 蹅輶 鄜, 孻憵 擙樲橚 藒襓謥 岯岪弨 蒮 廞徲 孻憵懥 趡趛踠 槏';
|
||||
|
||||
$tokens = ['鋍鞎', '鳼', '鞮鞢騉', '袟袘觕,', '炟砏', '蒮', '謺貙蹖', '偢偣唲', '蒛', '箷箯緷', '鑴鱱爧', '覮轀,',
|
||||
'剆坲', '煘煓瑐', '鬐鶤鶐', '飹勫嫢', '銪', '餀', '枲柊氠', '鍎鞚韕', '焲犈,',
|
||||
'殍涾烰', '齞齝囃', '蹅輶', '鄜,', '孻憵', '擙樲橚', '藒襓謥', '岯岪弨', '蒮', '廞徲', '孻憵懥', '趡趛踠', '槏', ];
|
||||
|
||||
$this->assertEquals($tokens, $tokenizer->tokenize($text));
|
||||
}
|
||||
}
|
40
tests/Phpml/Tokenization/WordTokenizerTest.php
Normal file
40
tests/Phpml/Tokenization/WordTokenizerTest.php
Normal file
@ -0,0 +1,40 @@
|
||||
<?php
|
||||
|
||||
declare (strict_types = 1);
|
||||
|
||||
namespace tests\Tokenization;
|
||||
|
||||
use Phpml\Tokenization\WordTokenizer;
|
||||
|
||||
class WordTokenizerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testTokenizationOnAscii()
|
||||
{
|
||||
$tokenizer = new WordTokenizer();
|
||||
|
||||
$text = 'Lorem ipsum-dolor sit amet, consectetur/adipiscing elit.
|
||||
Cras consectetur, dui et lobortis;auctor.
|
||||
Nulla vitae ,.,/ congue lorem.';
|
||||
|
||||
$tokens = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit',
|
||||
'Cras', 'consectetur', 'dui', 'et', 'lobortis', 'auctor',
|
||||
'Nulla', 'vitae', 'congue', 'lorem', ];
|
||||
|
||||
$this->assertEquals($tokens, $tokenizer->tokenize($text));
|
||||
}
|
||||
|
||||
public function testTokenizationOnUtf8()
|
||||
{
|
||||
$tokenizer = new WordTokenizer();
|
||||
|
||||
$text = '鋍鞎 鳼 鞮鞢騉 袟袘觕, 炟砏 蒮 謺貙蹖 偢偣唲 蒛 箷箯緷 鑴鱱爧 覮轀,
|
||||
剆坲 煘煓瑐 鬐鶤鶐 飹勫嫢 銪 餀 枲柊氠 鍎鞚韕 焲犈,
|
||||
殍涾烰 齞齝囃 蹅輶 鄜, 孻憵 擙樲橚 藒襓謥 岯岪弨 蒮 廞徲 孻憵懥 趡趛踠 槏';
|
||||
|
||||
$tokens = ['鋍鞎', '鞮鞢騉', '袟袘觕', '炟砏', '謺貙蹖', '偢偣唲', '箷箯緷', '鑴鱱爧', '覮轀',
|
||||
'剆坲', '煘煓瑐', '鬐鶤鶐', '飹勫嫢', '枲柊氠', '鍎鞚韕', '焲犈',
|
||||
'殍涾烰', '齞齝囃', '蹅輶', '孻憵', '擙樲橚', '藒襓謥', '岯岪弨', '廞徲', '孻憵懥', '趡趛踠', ];
|
||||
|
||||
$this->assertEquals($tokens, $tokenizer->tokenize($text));
|
||||
}
|
||||
}
|
0
var/.gitkeep
Normal file
0
var/.gitkeep
Normal file
Loading…
Reference in New Issue
Block a user