mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-11 08:10:56 +00:00
Merge pull request #4 from php-ai/develop
New algorithms, math functions and documentation
This commit is contained in:
commit
096db0e9dd
5
.travis.yml
Normal file
5
.travis.yml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
language: php
|
||||||
|
php:
|
||||||
|
- '7.0'
|
||||||
|
before_script: composer install
|
||||||
|
script: bin/phpunit
|
47
README.md
47
README.md
@ -6,11 +6,25 @@
|
|||||||
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
|
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
|
||||||
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
|
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
|
||||||
|
|
||||||
Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
|
Fresh approach to Machine Learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
|
||||||
|
|
||||||
|
Simple example of classification:
|
||||||
|
```php
|
||||||
|
use Phpml\Classification\KNearestNeighbors;
|
||||||
|
|
||||||
|
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||||
|
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||||
|
|
||||||
|
$classifier = new KNearestNeighbors();
|
||||||
|
$classifier->train($samples, $labels);
|
||||||
|
|
||||||
|
$classifier->predict([3, 2]);
|
||||||
|
// return 'b'
|
||||||
|
```
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
To find out how to use PHP-ML follow [Documentation](php-ml.readthedocs.org).
|
To find out how to use PHP-ML follow [Documentation](http://php-ml.readthedocs.org/).
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@ -20,14 +34,33 @@ Currently this library is in the process of developing, but You can install it w
|
|||||||
composer require php-ai/php-ml
|
composer require php-ai/php-ml
|
||||||
```
|
```
|
||||||
|
|
||||||
## To-Do
|
## Features
|
||||||
|
|
||||||
* implements more algorithms
|
* Classification
|
||||||
* integration with Lavacharts for data visualization
|
* [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/)
|
||||||
|
* [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/)
|
||||||
|
* Regression
|
||||||
|
* [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/)
|
||||||
|
* Clustering
|
||||||
|
* [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means)
|
||||||
|
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan)
|
||||||
|
* Cross Validation
|
||||||
|
* [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split)
|
||||||
|
* Datasets
|
||||||
|
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset)
|
||||||
|
* Ready to use:
|
||||||
|
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
|
||||||
|
* Math
|
||||||
|
* [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/)
|
||||||
|
* [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/)
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer)
|
## Contribute
|
||||||
|
|
||||||
|
- Issue Tracker: github.com/php-ai/php-ml/issues
|
||||||
|
- Source Code: github.com/php-ai/php-ml
|
||||||
|
|
||||||
|
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with Composer)
|
||||||
|
|
||||||
```
|
```
|
||||||
bin/phpunit
|
bin/phpunit
|
||||||
|
215
data/glass.csv
Normal file
215
data/glass.csv
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
RI: refractive index,Na: Sodium,Mg: Magnesium,Al: Aluminum,Si: Silicon,K: Potassium,Ca: Calcium,Ba: Barium,Fe: Iron,type of glass
|
||||||
|
1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,building_windows_float_processed
|
||||||
|
1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51755,13.00,3.60,1.36,72.99,0.57,8.40,0.00,0.11,building_windows_float_processed
|
||||||
|
1.51571,12.72,3.46,1.56,73.20,0.67,8.09,0.00,0.24,building_windows_float_processed
|
||||||
|
1.51763,12.80,3.66,1.27,73.01,0.60,8.56,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51589,12.88,3.43,1.40,73.28,0.69,8.05,0.00,0.24,building_windows_float_processed
|
||||||
|
1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.00,0.17,building_windows_float_processed
|
||||||
|
1.51763,12.61,3.59,1.31,73.29,0.58,8.50,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51784,12.68,3.67,1.16,73.11,0.61,8.70,0.00,0.00,building_windows_float_processed
|
||||||
|
1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51911,13.90,3.73,1.18,72.12,0.06,8.89,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.00,0.07,building_windows_float_processed
|
||||||
|
1.51750,12.82,3.55,1.49,72.75,0.54,8.52,0.00,0.19,building_windows_float_processed
|
||||||
|
1.51966,14.77,3.75,0.29,72.02,0.03,9.00,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51736,12.78,3.62,1.29,72.79,0.59,8.70,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51720,13.38,3.50,1.15,72.85,0.50,8.43,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51764,12.98,3.54,1.21,73.00,0.65,8.53,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51784,13.08,3.49,1.28,72.86,0.60,8.49,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51768,12.65,3.56,1.30,73.08,0.61,8.69,0.00,0.14,building_windows_float_processed
|
||||||
|
1.51747,12.84,3.50,1.14,73.27,0.56,8.55,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,building_windows_float_processed
|
||||||
|
1.51753,12.57,3.47,1.38,73.39,0.60,8.55,0.00,0.06,building_windows_float_processed
|
||||||
|
1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.00,building_windows_float_processed
|
||||||
|
1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.00,0.00,building_windows_float_processed
|
||||||
|
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,building_windows_float_processed
|
||||||
|
1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51755,12.71,3.42,1.20,73.20,0.59,8.64,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.00,0.00,building_windows_float_processed
|
||||||
|
1.52210,13.73,3.84,0.72,71.76,0.17,9.74,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.00,0.30,building_windows_float_processed
|
||||||
|
1.51900,13.49,3.48,1.35,71.95,0.55,9.00,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.00,0.16,building_windows_float_processed
|
||||||
|
1.52667,13.99,3.70,0.71,71.57,0.02,9.82,0.00,0.10,building_windows_float_processed
|
||||||
|
1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.00,0.00,building_windows_float_processed
|
||||||
|
1.52320,13.72,3.72,0.51,71.75,0.09,10.06,0.00,0.16,building_windows_float_processed
|
||||||
|
1.51926,13.20,3.33,1.28,72.36,0.60,9.14,0.00,0.11,building_windows_float_processed
|
||||||
|
1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.00,0.09,building_windows_float_processed
|
||||||
|
1.51769,12.45,2.71,1.29,73.70,0.56,9.06,0.00,0.24,building_windows_float_processed
|
||||||
|
1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.00,0.31,building_windows_float_processed
|
||||||
|
1.51824,12.87,3.48,1.29,72.95,0.60,8.43,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.00,0.11,building_windows_float_processed
|
||||||
|
1.51905,13.60,3.62,1.11,72.64,0.14,8.76,0.00,0.00,building_windows_float_processed
|
||||||
|
1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.00,building_windows_float_processed
|
||||||
|
1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.00,0.11,building_windows_float_processed
|
||||||
|
1.52227,14.17,3.81,0.78,71.35,0.00,9.69,0.00,0.00,building_windows_float_processed
|
||||||
|
1.52172,13.48,3.74,0.90,72.01,0.18,9.61,0.00,0.07,building_windows_float_processed
|
||||||
|
1.52099,13.69,3.59,1.12,71.96,0.09,9.40,0.00,0.00,building_windows_float_processed
|
||||||
|
1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.00,0.17,building_windows_float_processed
|
||||||
|
1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.00,0.17,building_windows_float_processed
|
||||||
|
1.52152,13.12,3.58,0.90,72.20,0.23,9.82,0.00,0.16,building_windows_float_processed
|
||||||
|
1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,building_windows_float_processed
|
||||||
|
1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.00,0.12,building_windows_non_float_processed
|
||||||
|
1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.00,0.32,building_windows_non_float_processed
|
||||||
|
1.51593,13.09,3.59,1.52,73.10,0.67,7.83,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51596,13.02,3.56,1.54,73.11,0.72,7.90,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51590,13.02,3.58,1.51,73.12,0.69,7.96,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51627,13.00,3.58,1.54,72.83,0.61,8.04,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.00,0.14,building_windows_non_float_processed
|
||||||
|
1.51590,12.82,3.52,1.90,72.86,0.69,7.97,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51646,13.41,3.55,1.25,72.81,0.68,8.10,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.00,0.09,building_windows_non_float_processed
|
||||||
|
1.51409,14.25,3.09,2.08,72.28,1.10,7.08,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51645,13.40,3.49,1.52,72.65,0.67,8.08,0.00,0.10,building_windows_non_float_processed
|
||||||
|
1.51618,13.01,3.50,1.48,72.89,0.60,8.12,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51640,12.55,3.48,1.87,73.23,0.63,8.08,0.00,0.09,building_windows_non_float_processed
|
||||||
|
1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.00,0.22,building_windows_non_float_processed
|
||||||
|
1.51605,12.90,3.44,1.45,73.06,0.44,8.27,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.00,0.19,building_windows_non_float_processed
|
||||||
|
1.51590,13.24,3.34,1.47,73.10,0.39,8.22,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51860,13.36,3.43,1.43,72.26,0.51,8.60,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.00,0.15,building_windows_non_float_processed
|
||||||
|
1.51743,12.20,3.25,1.16,73.55,0.62,8.90,0.00,0.24,building_windows_non_float_processed
|
||||||
|
1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51811,12.96,2.96,1.43,72.92,0.60,8.79,0.14,0.00,building_windows_non_float_processed
|
||||||
|
1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,building_windows_non_float_processed
|
||||||
|
1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51820,12.62,2.76,0.83,73.81,0.35,9.42,0.00,0.20,building_windows_non_float_processed
|
||||||
|
1.52725,13.80,3.15,0.66,70.57,0.08,11.64,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52410,13.83,2.90,1.17,71.15,0.08,10.79,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52475,11.45,0.00,1.88,72.19,0.81,13.24,0.00,0.34,building_windows_non_float_processed
|
||||||
|
1.53125,10.73,0.00,2.10,69.81,0.58,13.30,3.15,0.28,building_windows_non_float_processed
|
||||||
|
1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.00,0.24,building_windows_non_float_processed
|
||||||
|
1.52222,14.43,0.00,1.00,72.67,0.10,11.52,0.00,0.08,building_windows_non_float_processed
|
||||||
|
1.51818,13.72,0.00,0.56,74.45,0.00,10.99,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52664,11.23,0.00,0.77,73.21,0.00,14.68,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52739,11.02,0.00,0.75,73.08,0.00,14.96,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52777,12.64,0.00,0.67,72.02,0.06,14.40,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.00,0.14,building_windows_non_float_processed
|
||||||
|
1.51847,13.10,3.97,1.19,72.44,0.60,8.43,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51829,13.24,3.90,1.41,72.33,0.55,8.31,0.00,0.10,building_windows_non_float_processed
|
||||||
|
1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,building_windows_non_float_processed
|
||||||
|
1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51844,13.25,3.76,1.32,72.40,0.58,8.42,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.00,0.21,building_windows_non_float_processed
|
||||||
|
1.51687,13.23,3.54,1.48,72.84,0.56,8.10,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52177,13.20,3.68,1.15,72.75,0.54,8.52,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.00,0.12,building_windows_non_float_processed
|
||||||
|
1.51667,12.94,3.61,1.26,72.75,0.56,8.60,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.00,0.17,building_windows_non_float_processed
|
||||||
|
1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,building_windows_non_float_processed
|
||||||
|
1.52020,13.98,1.35,1.63,71.76,0.39,10.56,0.00,0.18,building_windows_non_float_processed
|
||||||
|
1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.52614,13.70,0.00,1.36,71.24,0.19,13.44,0.00,0.10,building_windows_non_float_processed
|
||||||
|
1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51800,13.71,3.93,1.54,71.81,0.54,8.21,0.00,0.15,building_windows_non_float_processed
|
||||||
|
1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51789,13.19,3.90,1.30,72.33,0.55,8.44,0.00,0.28,building_windows_non_float_processed
|
||||||
|
1.51806,13.00,3.80,1.08,73.07,0.56,8.38,0.00,0.12,building_windows_non_float_processed
|
||||||
|
1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51674,12.79,3.52,1.54,73.36,0.66,7.90,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51690,13.33,3.54,1.61,72.54,0.68,8.11,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51851,13.20,3.63,1.07,72.83,0.57,8.41,0.09,0.17,building_windows_non_float_processed
|
||||||
|
1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,building_windows_non_float_processed
|
||||||
|
1.51709,13.00,3.47,1.79,72.72,0.66,8.18,0.00,0.00,building_windows_non_float_processed
|
||||||
|
1.51660,12.99,3.18,1.23,72.97,0.58,8.81,0.00,0.24,building_windows_non_float_processed
|
||||||
|
1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.00,0.35,building_windows_non_float_processed
|
||||||
|
1.51769,13.65,3.66,1.11,72.77,0.11,8.60,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51610,13.33,3.53,1.34,72.67,0.56,8.33,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51670,13.24,3.57,1.38,72.70,0.56,8.44,0.00,0.10,vehicle_windows_float_processed
|
||||||
|
1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51665,13.14,3.45,1.76,72.48,0.60,8.38,0.00,0.17,vehicle_windows_float_processed
|
||||||
|
1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51779,13.64,3.65,0.65,73.00,0.06,8.93,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51646,13.04,3.40,1.26,73.01,0.52,8.58,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51796,13.50,3.36,1.63,71.94,0.57,8.81,0.00,0.09,vehicle_windows_float_processed
|
||||||
|
1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.00,0.00,vehicle_windows_float_processed
|
||||||
|
1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,vehicle_windows_float_processed
|
||||||
|
1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,vehicle_windows_float_processed
|
||||||
|
1.51514,14.01,2.68,3.50,69.89,1.68,5.87,2.20,0.00,containers
|
||||||
|
1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,containers
|
||||||
|
1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.00,0.00,containers
|
||||||
|
1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.00,0.00,containers
|
||||||
|
1.51969,12.64,0.00,1.65,73.75,0.38,11.53,0.00,0.00,containers
|
||||||
|
1.51666,12.86,0.00,1.83,73.88,0.97,10.17,0.00,0.00,containers
|
||||||
|
1.51994,13.27,0.00,1.76,73.03,0.47,11.32,0.00,0.00,containers
|
||||||
|
1.52369,13.44,0.00,1.58,72.22,0.32,12.24,0.00,0.00,containers
|
||||||
|
1.51316,13.02,0.00,3.04,70.48,6.21,6.96,0.00,0.00,containers
|
||||||
|
1.51321,13.00,0.00,3.02,70.70,6.21,6.93,0.00,0.00,containers
|
||||||
|
1.52043,13.38,0.00,1.40,72.25,0.33,12.50,0.00,0.00,containers
|
||||||
|
1.52058,12.85,1.61,2.17,72.18,0.76,9.70,0.24,0.51,containers
|
||||||
|
1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.00,0.28,containers
|
||||||
|
1.51905,14.00,2.39,1.56,72.37,0.00,9.57,0.00,0.00,tableware
|
||||||
|
1.51937,13.79,2.41,1.19,72.76,0.00,9.77,0.00,0.00,tableware
|
||||||
|
1.51829,14.46,2.24,1.62,72.38,0.00,9.26,0.00,0.00,tableware
|
||||||
|
1.51852,14.09,2.19,1.66,72.67,0.00,9.32,0.00,0.00,tableware
|
||||||
|
1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.00,0.00,tableware
|
||||||
|
1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,tableware
|
||||||
|
1.51916,14.15,0.00,2.09,72.74,0.00,10.88,0.00,0.00,tableware
|
||||||
|
1.51969,14.56,0.00,0.56,73.48,0.00,11.22,0.00,0.00,tableware
|
||||||
|
1.51115,17.38,0.00,0.34,75.41,0.00,6.65,0.00,0.00,tableware
|
||||||
|
1.51131,13.69,3.20,1.81,72.81,1.76,5.43,1.19,0.00,headlamps
|
||||||
|
1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.00,headlamps
|
||||||
|
1.52315,13.44,3.34,1.23,72.38,0.60,8.83,0.00,0.00,headlamps
|
||||||
|
1.52247,14.86,2.20,2.06,70.26,0.76,9.76,0.00,0.00,headlamps
|
||||||
|
1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.00,headlamps
|
||||||
|
1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,headlamps
|
||||||
|
1.51602,14.85,0.00,2.38,73.28,0.00,8.76,0.64,0.09,headlamps
|
||||||
|
1.51623,14.20,0.00,2.79,73.46,0.04,9.04,0.40,0.09,headlamps
|
||||||
|
1.51719,14.75,0.00,2.00,73.02,0.00,8.53,1.59,0.08,headlamps
|
||||||
|
1.51683,14.56,0.00,1.98,73.29,0.00,8.52,1.57,0.07,headlamps
|
||||||
|
1.51545,14.14,0.00,2.68,73.39,0.08,9.07,0.61,0.05,headlamps
|
||||||
|
1.51556,13.87,0.00,2.54,73.23,0.14,9.41,0.81,0.01,headlamps
|
||||||
|
1.51727,14.70,0.00,2.34,73.28,0.00,8.95,0.66,0.00,headlamps
|
||||||
|
1.51531,14.38,0.00,2.66,73.10,0.04,9.08,0.64,0.00,headlamps
|
||||||
|
1.51609,15.01,0.00,2.51,73.05,0.05,8.83,0.53,0.00,headlamps
|
||||||
|
1.51508,15.15,0.00,2.25,73.50,0.00,8.34,0.63,0.00,headlamps
|
||||||
|
1.51653,11.95,0.00,1.19,75.18,2.70,8.93,0.00,0.00,headlamps
|
||||||
|
1.51514,14.85,0.00,2.42,73.72,0.00,8.39,0.56,0.00,headlamps
|
||||||
|
1.51658,14.80,0.00,1.99,73.11,0.00,8.28,1.71,0.00,headlamps
|
||||||
|
1.51617,14.95,0.00,2.27,73.30,0.00,8.71,0.67,0.00,headlamps
|
||||||
|
1.51732,14.95,0.00,1.80,72.99,0.00,8.61,1.55,0.00,headlamps
|
||||||
|
1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,headlamps
|
||||||
|
1.51831,14.39,0.00,1.82,72.86,1.41,6.47,2.88,0.00,headlamps
|
||||||
|
1.51640,14.37,0.00,2.74,72.85,0.00,9.45,0.54,0.00,headlamps
|
||||||
|
1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.00,headlamps
|
||||||
|
1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.00,headlamps
|
||||||
|
1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.00,headlamps
|
||||||
|
1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.00,headlamps
|
||||||
|
1.51711,14.23,0.00,2.08,73.36,0.00,8.62,1.67,0.00,headlamps
|
|
179
data/wine.csv
Normal file
179
data/wine.csv
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
alcohol,malic acid,ash,alcalinity of ash,magnesium,total phenols,flavanoids,nonflavanoid phenols,proanthocyanins,color intensity,hue,OD280/OD315 of diluted wines,proline,class
|
||||||
|
14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065,1
|
||||||
|
13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050,1
|
||||||
|
13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185,1
|
||||||
|
14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480,1
|
||||||
|
13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735,1
|
||||||
|
14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450,1
|
||||||
|
14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290,1
|
||||||
|
14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295,1
|
||||||
|
14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045,1
|
||||||
|
13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045,1
|
||||||
|
14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510,1
|
||||||
|
14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280,1
|
||||||
|
13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320,1
|
||||||
|
14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150,1
|
||||||
|
14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547,1
|
||||||
|
13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310,1
|
||||||
|
14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280,1
|
||||||
|
13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130,1
|
||||||
|
14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680,1
|
||||||
|
13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845,1
|
||||||
|
14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780,1
|
||||||
|
12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770,1
|
||||||
|
13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035,1
|
||||||
|
12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015,1
|
||||||
|
13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845,1
|
||||||
|
13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830,1
|
||||||
|
13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195,1
|
||||||
|
13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285,1
|
||||||
|
13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915,1
|
||||||
|
14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035,1
|
||||||
|
13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285,1
|
||||||
|
13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515,1
|
||||||
|
13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990,1
|
||||||
|
13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235,1
|
||||||
|
13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095,1
|
||||||
|
13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920,1
|
||||||
|
13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880,1
|
||||||
|
13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105,1
|
||||||
|
13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020,1
|
||||||
|
14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760,1
|
||||||
|
13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795,1
|
||||||
|
13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035,1
|
||||||
|
13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095,1
|
||||||
|
13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680,1
|
||||||
|
13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885,1
|
||||||
|
14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080,1
|
||||||
|
14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065,1
|
||||||
|
13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985,1
|
||||||
|
14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060,1
|
||||||
|
13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260,1
|
||||||
|
13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150,1
|
||||||
|
13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265,1
|
||||||
|
13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190,1
|
||||||
|
13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375,1
|
||||||
|
13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060,1
|
||||||
|
13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120,1
|
||||||
|
14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970,1
|
||||||
|
13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270,1
|
||||||
|
13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285,1
|
||||||
|
12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520,2
|
||||||
|
12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680,2
|
||||||
|
12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450,2
|
||||||
|
13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630,2
|
||||||
|
12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420,2
|
||||||
|
12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355,2
|
||||||
|
12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678,2
|
||||||
|
13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502,2
|
||||||
|
12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510,2
|
||||||
|
13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750,2
|
||||||
|
12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718,2
|
||||||
|
12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870,2
|
||||||
|
13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410,2
|
||||||
|
13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472,2
|
||||||
|
12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985,2
|
||||||
|
11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886,2
|
||||||
|
11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428,2
|
||||||
|
13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392,2
|
||||||
|
11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500,2
|
||||||
|
12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750,2
|
||||||
|
12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463,2
|
||||||
|
12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278,2
|
||||||
|
12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714,2
|
||||||
|
12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630,2
|
||||||
|
13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515,2
|
||||||
|
11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520,2
|
||||||
|
12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450,2
|
||||||
|
12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495,2
|
||||||
|
11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562,2
|
||||||
|
11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680,2
|
||||||
|
12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625,2
|
||||||
|
12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480,2
|
||||||
|
12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450,2
|
||||||
|
12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495,2
|
||||||
|
12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290,2
|
||||||
|
11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345,2
|
||||||
|
12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937,2
|
||||||
|
11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625,2
|
||||||
|
12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428,2
|
||||||
|
12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660,2
|
||||||
|
12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406,2
|
||||||
|
12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710,2
|
||||||
|
12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562,2
|
||||||
|
12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438,2
|
||||||
|
11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415,2
|
||||||
|
12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672,2
|
||||||
|
12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315,2
|
||||||
|
12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510,2
|
||||||
|
12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488,2
|
||||||
|
12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312,2
|
||||||
|
11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680,2
|
||||||
|
11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562,2
|
||||||
|
12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325,2
|
||||||
|
11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607,2
|
||||||
|
11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434,2
|
||||||
|
12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385,2
|
||||||
|
11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407,2
|
||||||
|
11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495,2
|
||||||
|
12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345,2
|
||||||
|
12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372,2
|
||||||
|
12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564,2
|
||||||
|
11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625,2
|
||||||
|
11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465,2
|
||||||
|
12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365,2
|
||||||
|
13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380,2
|
||||||
|
11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380,2
|
||||||
|
12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378,2
|
||||||
|
12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352,2
|
||||||
|
11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466,2
|
||||||
|
12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342,2
|
||||||
|
12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580,2
|
||||||
|
12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630,3
|
||||||
|
12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530,3
|
||||||
|
12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560,3
|
||||||
|
12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600,3
|
||||||
|
12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650,3
|
||||||
|
12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695,3
|
||||||
|
12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720,3
|
||||||
|
12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515,3
|
||||||
|
13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580,3
|
||||||
|
12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590,3
|
||||||
|
12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600,3
|
||||||
|
13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780,3
|
||||||
|
13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520,3
|
||||||
|
13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550,3
|
||||||
|
12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855,3
|
||||||
|
13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830,3
|
||||||
|
13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415,3
|
||||||
|
12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625,3
|
||||||
|
13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650,3
|
||||||
|
13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550,3
|
||||||
|
13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500,3
|
||||||
|
12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480,3
|
||||||
|
13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425,3
|
||||||
|
13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675,3
|
||||||
|
12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640,3
|
||||||
|
13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725,3
|
||||||
|
13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480,3
|
||||||
|
12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880,3
|
||||||
|
14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660,3
|
||||||
|
13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620,3
|
||||||
|
12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520,3
|
||||||
|
13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680,3
|
||||||
|
12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570,3
|
||||||
|
12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675,3
|
||||||
|
13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615,3
|
||||||
|
13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520,3
|
||||||
|
13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695,3
|
||||||
|
12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685,3
|
||||||
|
13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750,3
|
||||||
|
13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630,3
|
||||||
|
12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510,3
|
||||||
|
12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470,3
|
||||||
|
14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660,3
|
||||||
|
13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740,3
|
||||||
|
13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750,3
|
||||||
|
13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835,3
|
||||||
|
13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840,3
|
||||||
|
14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560,3
|
|
@ -1,11 +1,30 @@
|
|||||||
# PHP Machine Learning (PHP-ML)
|
# PHP Machine Learning library
|
||||||
|
|
||||||
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
|
[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop)
|
||||||
|
[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop)
|
||||||
[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml)
|
[![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml)
|
||||||
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
|
[![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml)
|
||||||
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
|
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop)
|
||||||
|
|
||||||
Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
|
Fresh approach to Machine Learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ...
|
||||||
|
|
||||||
|
Simple example of classification:
|
||||||
|
```php
|
||||||
|
use Phpml\Classification\KNearestNeighbors;
|
||||||
|
|
||||||
|
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||||
|
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||||
|
|
||||||
|
$classifier = new KNearestNeighbors();
|
||||||
|
$classifier->train($samples, $labels);
|
||||||
|
|
||||||
|
$classifier->predict([3, 2]);
|
||||||
|
// return 'b'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
To find out how to use PHP-ML follow [Documentation](http://php-ml.readthedocs.org/).
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
@ -15,14 +34,33 @@ Currently this library is in the process of developing, but You can install it w
|
|||||||
composer require php-ai/php-ml
|
composer require php-ai/php-ml
|
||||||
```
|
```
|
||||||
|
|
||||||
## To-Do
|
## Features
|
||||||
|
|
||||||
* implements more algorithms
|
* Classification
|
||||||
* integration with Lavacharts for data visualization
|
* [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/)
|
||||||
|
* [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/)
|
||||||
|
* Regression
|
||||||
|
* [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/)
|
||||||
|
* Clustering
|
||||||
|
* [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means)
|
||||||
|
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan)
|
||||||
|
* Cross Validation
|
||||||
|
* [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split)
|
||||||
|
* Datasets
|
||||||
|
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset)
|
||||||
|
* Ready to use:
|
||||||
|
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
|
||||||
|
* Math
|
||||||
|
* [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/)
|
||||||
|
* [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/)
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer)
|
## Contribute
|
||||||
|
|
||||||
|
- Issue Tracker: github.com/php-ai/php-ml/issues
|
||||||
|
- Source Code: github.com/php-ai/php-ml
|
||||||
|
|
||||||
|
After installation, you can launch the test suite in project root directory (you will need to install dev requirements with Composer)
|
||||||
|
|
||||||
```
|
```
|
||||||
bin/phpunit
|
bin/phpunit
|
||||||
|
@ -5,14 +5,16 @@ Classifier implementing the k-nearest neighbors algorithm.
|
|||||||
### Constructor Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $k - number of nearest neighbors to scan (default: 3)
|
* $k - number of nearest neighbors to scan (default: 3)
|
||||||
|
* $distanceMetric - Distance object, default Euclidean (see [distance documentation](math/distance/))
|
||||||
|
|
||||||
```
|
```
|
||||||
$classifier = new KNearestNeighbors($k=4);
|
$classifier = new KNearestNeighbors($k=4);
|
||||||
|
$classifier = new KNearestNeighbors($k=3, new Minkowski($lambda=4));
|
||||||
```
|
```
|
||||||
|
|
||||||
### Train
|
### Train
|
||||||
|
|
||||||
To train a classifier simply provide train samples and labels (as `array`):
|
To train a classifier simply provide train samples and labels (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||||
@ -24,7 +26,7 @@ $classifier->train($samples, $labels);
|
|||||||
|
|
||||||
### Predict
|
### Predict
|
||||||
|
|
||||||
To predict sample class use `predict` method. You can provide one sample or array of samples:
|
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
```
|
```
|
||||||
$classifier->predict([3, 2]);
|
$classifier->predict([3, 2]);
|
27
docs/machine-learning/classification/naive-bayes.md
Normal file
27
docs/machine-learning/classification/naive-bayes.md
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# NaiveBayes Classifier
|
||||||
|
|
||||||
|
Classifier based on applying Bayes' theorem with strong (naive) independence assumptions between the features.
|
||||||
|
|
||||||
|
### Train
|
||||||
|
|
||||||
|
To train a classifier simply provide train samples and labels (as `array`). Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||||
|
$labels = ['a', 'b', 'c'];
|
||||||
|
|
||||||
|
$classifier = new NaiveBayes();
|
||||||
|
$classifier->train($samples, $labels);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Predict
|
||||||
|
|
||||||
|
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
|
```
|
||||||
|
$classifier->predict([3, 1, 1]);
|
||||||
|
// return 'a'
|
||||||
|
|
||||||
|
$classifier->predict([[3, 1, 1], [1, 4, 1]);
|
||||||
|
// return ['a', 'b']
|
||||||
|
```
|
27
docs/machine-learning/clustering/dbscan.md
Normal file
27
docs/machine-learning/clustering/dbscan.md
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# DBSCAN clustering
|
||||||
|
|
||||||
|
It is a density-based clustering algorithm: given a set of points in some space, it groups together points that are closely packed together (points with many nearby neighbors), marking as outliers points that lie alone in low-density regions (whose nearest neighbors are too far away). DBSCAN is one of the most common clustering algorithms and also most cited in scientific literature.
|
||||||
|
*(source: wikipedia)*
|
||||||
|
|
||||||
|
### Constructor Parameters
|
||||||
|
|
||||||
|
* $epsilon - epsilon, maximum distance between two samples for them to be considered as in the same neighborhood
|
||||||
|
* $minSamples - number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)
|
||||||
|
* $distanceMetric - Distance object, default Euclidean (see [distance documentation](math/distance/))
|
||||||
|
|
||||||
|
```
|
||||||
|
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3);
|
||||||
|
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3, new Minkowski($lambda=4));
|
||||||
|
```
|
||||||
|
|
||||||
|
### Clustering
|
||||||
|
|
||||||
|
To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside.
|
||||||
|
|
||||||
|
```
|
||||||
|
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||||
|
|
||||||
|
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3);
|
||||||
|
$dbscan->cluster($samples);
|
||||||
|
// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]]
|
||||||
|
```
|
37
docs/machine-learning/clustering/k-means.md
Normal file
37
docs/machine-learning/clustering/k-means.md
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# K-means clustering
|
||||||
|
|
||||||
|
The K-Means algorithm clusters data by trying to separate samples in n groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares.
|
||||||
|
This algorithm requires the number of clusters to be specified.
|
||||||
|
|
||||||
|
### Constructor Parameters
|
||||||
|
|
||||||
|
* $clustersNumber - number of clusters to find
|
||||||
|
* $initialization - initialization method, default kmeans++ (see below)
|
||||||
|
|
||||||
|
```
|
||||||
|
$kmeans = new KMeans(2);
|
||||||
|
$kmeans = new KMeans(4, KMeans::INIT_RANDOM);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Clustering
|
||||||
|
|
||||||
|
To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside.
|
||||||
|
|
||||||
|
```
|
||||||
|
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||||
|
|
||||||
|
$kmeans = new KMeans(2);
|
||||||
|
$kmeans->cluster($samples);
|
||||||
|
// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Initialization methods
|
||||||
|
|
||||||
|
#### kmeans++ (default)
|
||||||
|
|
||||||
|
K-means++ method selects initial cluster centers for k-mean clustering in a smart way to speed up convergence.
|
||||||
|
It use the DASV seeding method consists of finding good initial centroids for the clusters.
|
||||||
|
|
||||||
|
#### random
|
||||||
|
|
||||||
|
Random initialization method chooses completely random centroid. It get the space boundaries to avoid placing clusters centroid too far from samples data.
|
@ -12,4 +12,4 @@ Helper class that loads data from CSV file. It extends the `ArrayDataset`.
|
|||||||
$dataset = new CsvDataset('dataset.csv', 2, true);
|
$dataset = new CsvDataset('dataset.csv', 2, true);
|
||||||
```
|
```
|
||||||
|
|
||||||
See Array Dataset for more information.
|
See [ArrayDataset](machine-learning/datasets/array-dataset/) for more information.
|
||||||
|
@ -17,7 +17,7 @@ To load Iris dataset simple use:
|
|||||||
$dataset = new Iris();
|
$dataset = new Iris();
|
||||||
```
|
```
|
||||||
|
|
||||||
### Several samples
|
### Several samples example
|
||||||
|
|
||||||
```
|
```
|
||||||
sepal length,sepal width,petal length,petal width,class
|
sepal length,sepal width,petal length,petal width,class
|
||||||
|
@ -4,7 +4,7 @@ Class for calculate classifier accuracy.
|
|||||||
|
|
||||||
### Score
|
### Score
|
||||||
|
|
||||||
To calculate classifier accuracy score use `score` static method. Parametrs:
|
To calculate classifier accuracy score use `score` static method. Parameters:
|
||||||
|
|
||||||
* $actualLabels - (array) true sample labels
|
* $actualLabels - (array) true sample labels
|
||||||
* $predictedLabels - (array) predicted labels (e.x. from test group)
|
* $predictedLabels - (array) predicted labels (e.x. from test group)
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
# Distance
|
|
||||||
|
|
||||||
Special class for calculation of different types of distance.
|
|
||||||
|
|
||||||
### Euclidean
|
|
||||||
|
|
||||||
![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance")
|
|
||||||
|
|
||||||
To calculate euclidean distance:
|
|
||||||
|
|
||||||
```
|
|
||||||
$a = [4, 6];
|
|
||||||
$b = [2, 5];
|
|
||||||
|
|
||||||
Distance::euclidean($a, $b);
|
|
||||||
// return 2.2360679774998
|
|
||||||
```
|
|
51
docs/machine-learning/regression/least-squares.md
Normal file
51
docs/machine-learning/regression/least-squares.md
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
# LeastSquares Linear Regression
|
||||||
|
|
||||||
|
Linear model that use least squares method to approximate solution.
|
||||||
|
|
||||||
|
### Train
|
||||||
|
|
||||||
|
To train a model simply provide train samples and targets values (as `array`). Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
$samples = [[60], [61], [62], [63], [65]];
|
||||||
|
$targets = [3.1, 3.6, 3.8, 4, 4.1];
|
||||||
|
|
||||||
|
$regression = new LeastSquares();
|
||||||
|
$regression->train($samples, $targets);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Predict
|
||||||
|
|
||||||
|
To predict sample target value use `predict` method with sample to check (as `array`). Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
$regression->predict([64]);
|
||||||
|
// return 4.06
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multiple Linear Regression
|
||||||
|
|
||||||
|
The term multiple attached to linear regression means that there are two or more sample parameters used to predict target.
|
||||||
|
For example you can use: mileage and production year to predict price of a car.
|
||||||
|
|
||||||
|
```
|
||||||
|
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
||||||
|
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];
|
||||||
|
|
||||||
|
$regression = new LeastSquares();
|
||||||
|
$regression->train($samples, $targets);
|
||||||
|
$regression->predict([60000, 1996])
|
||||||
|
// return 4094.82
|
||||||
|
```
|
||||||
|
|
||||||
|
### Intercept and Coefficients
|
||||||
|
|
||||||
|
After you train your model you can get the intercept and coefficients array.
|
||||||
|
|
||||||
|
```
|
||||||
|
$regression->getIntercept();
|
||||||
|
// return -7.9635135135131
|
||||||
|
|
||||||
|
$regression->getCoefficients();
|
||||||
|
// return [array(1) {[0]=>float(0.18783783783783)}]
|
||||||
|
```
|
109
docs/math/distance.md
Normal file
109
docs/math/distance.md
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
# Distance
|
||||||
|
|
||||||
|
Selected algorithms require the use of a function for calculating the distance.
|
||||||
|
|
||||||
|
### Euclidean
|
||||||
|
|
||||||
|
Class for calculation Euclidean distance.
|
||||||
|
|
||||||
|
![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance")
|
||||||
|
|
||||||
|
To calculate Euclidean distance:
|
||||||
|
|
||||||
|
```
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$euclidean = new Euclidean();
|
||||||
|
$euclidean->distance($a, $b);
|
||||||
|
// return 2.2360679774998
|
||||||
|
```
|
||||||
|
|
||||||
|
### Manhattan
|
||||||
|
|
||||||
|
Class for calculation Manhattan distance.
|
||||||
|
|
||||||
|
![manhattan](https://upload.wikimedia.org/math/4/c/5/4c568bd1d76a6b15e19cb2ac3ad75350.png "Manhattan Distance")
|
||||||
|
|
||||||
|
To calculate Manhattan distance:
|
||||||
|
|
||||||
|
```
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$manhattan = new Manhattan();
|
||||||
|
$manhattan->distance($a, $b);
|
||||||
|
// return 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Chebyshev
|
||||||
|
|
||||||
|
Class for calculation Chebyshev distance.
|
||||||
|
|
||||||
|
![chebyshev](https://upload.wikimedia.org/math/7/1/2/71200f7dbb43b3bcfbcbdb9e02ab0a0c.png "Chebyshev Distance")
|
||||||
|
|
||||||
|
To calculate Chebyshev distance:
|
||||||
|
|
||||||
|
```
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$chebyshev = new Chebyshev();
|
||||||
|
$chebyshev->distance($a, $b);
|
||||||
|
// return 2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Minkowski
|
||||||
|
|
||||||
|
Class for calculation Minkowski distance.
|
||||||
|
|
||||||
|
![minkowski](https://upload.wikimedia.org/math/a/a/0/aa0c62083c12390cb15ac3217de88e66.png "Minkowski Distance")
|
||||||
|
|
||||||
|
To calculate Minkowski distance:
|
||||||
|
|
||||||
|
```
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$minkowski = new Minkowski();
|
||||||
|
$minkowski->distance($a, $b);
|
||||||
|
// return 2.080
|
||||||
|
```
|
||||||
|
|
||||||
|
You can provide the `lambda` parameter:
|
||||||
|
|
||||||
|
```
|
||||||
|
$a = [6, 10, 3];
|
||||||
|
$b = [2, 5, 5];
|
||||||
|
|
||||||
|
$minkowski = new Minkowski($lambda = 5);
|
||||||
|
$minkowski->distance($a, $b);
|
||||||
|
// return 5.300
|
||||||
|
```
|
||||||
|
|
||||||
|
### Custom distance
|
||||||
|
|
||||||
|
To apply your own function of distance use `Distance` interface. Example
|
||||||
|
|
||||||
|
```
|
||||||
|
class CustomDistance implements Distance
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $a
|
||||||
|
* @param array $b
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*/
|
||||||
|
public function distance(array $a, array $b): float
|
||||||
|
{
|
||||||
|
$distance = [];
|
||||||
|
$count = count($a);
|
||||||
|
|
||||||
|
for ($i = 0; $i < $count; ++$i) {
|
||||||
|
$distance[] = $a[$i] * $b[$i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return min($distance);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
129
docs/math/matrix.md
Normal file
129
docs/math/matrix.md
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
# Matrix
|
||||||
|
|
||||||
|
Class that wraps PHP arrays to mathematical matrix.
|
||||||
|
|
||||||
|
### Creation
|
||||||
|
|
||||||
|
To create Matrix use simple arrays:
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 3, 3],
|
||||||
|
[4, 2, 1],
|
||||||
|
[5, 6, 7],
|
||||||
|
]);
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also create Matrix (one dimension) from flat array:
|
||||||
|
|
||||||
|
```
|
||||||
|
$flatArray = [1, 2, 3, 4];
|
||||||
|
$matrix = Matrix::fromFlatArray($flatArray);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Matrix data
|
||||||
|
|
||||||
|
Methods for reading data from Matrix:
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix->toArray(); // cast matrix to PHP array
|
||||||
|
$matrix->getRows(); // rows count
|
||||||
|
$matrix->getColumns(); // columns count
|
||||||
|
$matrix->getColumnValues($column=4); // get values from given column
|
||||||
|
```
|
||||||
|
|
||||||
|
### Determinant
|
||||||
|
|
||||||
|
Read more about [matrix determinant](https://en.wikipedia.org/wiki/Determinant).
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 3, 3],
|
||||||
|
[4, 2, 1],
|
||||||
|
[5, 6, 7],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$matrix->getDeterminant();
|
||||||
|
// return -3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Transpose
|
||||||
|
|
||||||
|
Read more about [matrix transpose](https://en.wikipedia.org/wiki/Transpose).
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix->transpose();
|
||||||
|
// return new Matrix
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multiply
|
||||||
|
|
||||||
|
Multiply Matrix by another Matrix.
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix1 = new Matrix([
|
||||||
|
[1, 2, 3],
|
||||||
|
[4, 5, 6],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$matrix2 = new Matrix([
|
||||||
|
[7, 8],
|
||||||
|
[9, 10],
|
||||||
|
[11, 12],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$matrix1->multiply($matrix2);
|
||||||
|
|
||||||
|
// result $product = [
|
||||||
|
// [58, 64],
|
||||||
|
// [139, 154],
|
||||||
|
//];
|
||||||
|
```
|
||||||
|
|
||||||
|
### Divide by scalar
|
||||||
|
|
||||||
|
You can divide Matrix by scalar value.
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix->divideByScalar(2);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Inverse
|
||||||
|
|
||||||
|
Read more about [invertible matrix](https://en.wikipedia.org/wiki/Invertible_matrix)
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 4, 2],
|
||||||
|
[4, 5, 5],
|
||||||
|
[1, 1, 1],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$matrix->inverse();
|
||||||
|
|
||||||
|
// result $inverseMatrix = [
|
||||||
|
// [0, -1, 5],
|
||||||
|
// [1 / 2, 1 / 2, -7 / 2],
|
||||||
|
// [-1 / 2, 1 / 2, -1 / 2],
|
||||||
|
//];
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cross out
|
||||||
|
|
||||||
|
Cross out given row and column from Matrix.
|
||||||
|
|
||||||
|
```
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 4, 2],
|
||||||
|
[4, 5, 5],
|
||||||
|
[1, 1, 1],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$matrix->crossOut(1, 1)
|
||||||
|
|
||||||
|
// result $crossOuted = [
|
||||||
|
// [3, 2],
|
||||||
|
// [1, 1],
|
||||||
|
//];
|
||||||
|
```
|
16
mkdocs.yml
16
mkdocs.yml
@ -3,15 +3,23 @@ pages:
|
|||||||
- Home: index.md
|
- Home: index.md
|
||||||
- Machine Learning:
|
- Machine Learning:
|
||||||
- Classification:
|
- Classification:
|
||||||
- KNearestNeighbors: machine-learning/classification/knearestneighbors.md
|
- KNearestNeighbors: machine-learning/classification/k-nearest-neighbors.md
|
||||||
|
- NaiveBayes: machine-learning/classification/naive-bayes.md
|
||||||
|
- Regression:
|
||||||
|
- LeastSquares: machine-learning/regression/least-squares.md
|
||||||
|
- Clustering:
|
||||||
|
- KMeans: machine-learning/clustering/k-means.md
|
||||||
|
- DBSCAN: machine-learning/clustering/dbscan.md
|
||||||
- Cross Validation:
|
- Cross Validation:
|
||||||
- RandomSplit: machine-learning/cross-validation/randomsplit.md
|
- RandomSplit: machine-learning/cross-validation/random-split.md
|
||||||
- Datasets:
|
- Datasets:
|
||||||
- Array Dataset: machine-learning/datasets/array-dataset.md
|
- Array Dataset: machine-learning/datasets/array-dataset.md
|
||||||
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
||||||
- Demo:
|
- Ready to use datasets:
|
||||||
- Iris: machine-learning/datasets/demo/iris.md
|
- Iris: machine-learning/datasets/demo/iris.md
|
||||||
- Metric:
|
- Metric:
|
||||||
- Accuracy: machine-learning/metric/accuracy.md
|
- Accuracy: machine-learning/metric/accuracy.md
|
||||||
- Distance: machine-learning/metric/distance.md
|
- Math:
|
||||||
|
- Distance: math/distance.md
|
||||||
|
- Matrix: math/matrix.md
|
||||||
theme: readthedocs
|
theme: readthedocs
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
declare (strict_types = 1);
|
declare (strict_types = 1);
|
||||||
|
|
||||||
namespace Phpml\Classifier;
|
namespace Phpml\Classification;
|
||||||
|
|
||||||
interface Classifier
|
interface Classifier
|
||||||
{
|
{
|
@ -2,64 +2,41 @@
|
|||||||
|
|
||||||
declare (strict_types = 1);
|
declare (strict_types = 1);
|
||||||
|
|
||||||
namespace Phpml\Classifier;
|
namespace Phpml\Classification;
|
||||||
|
|
||||||
use Phpml\Metric\Distance;
|
use Phpml\Classification\Traits\Predictable;
|
||||||
|
use Phpml\Classification\Traits\Trainable;
|
||||||
|
use Phpml\Math\Distance;
|
||||||
|
use Phpml\Math\Distance\Euclidean;
|
||||||
|
|
||||||
class KNearestNeighbors implements Classifier
|
class KNearestNeighbors implements Classifier
|
||||||
{
|
{
|
||||||
|
use Trainable, Predictable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var int
|
* @var int
|
||||||
*/
|
*/
|
||||||
private $k;
|
private $k;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @var array
|
* @var Distance
|
||||||
*/
|
*/
|
||||||
private $samples;
|
private $distanceMetric;
|
||||||
|
|
||||||
/**
|
|
||||||
* @var array
|
|
||||||
*/
|
|
||||||
private $labels;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param int $k
|
* @param int $k
|
||||||
|
* @param Distance|null $distanceMetric (if null then Euclidean distance as default)
|
||||||
*/
|
*/
|
||||||
public function __construct(int $k = 3)
|
public function __construct(int $k = 3, Distance $distanceMetric = null)
|
||||||
{
|
{
|
||||||
|
if (null === $distanceMetric) {
|
||||||
|
$distanceMetric = new Euclidean();
|
||||||
|
}
|
||||||
|
|
||||||
$this->k = $k;
|
$this->k = $k;
|
||||||
$this->samples = [];
|
$this->samples = [];
|
||||||
$this->labels = [];
|
$this->labels = [];
|
||||||
}
|
$this->distanceMetric = $distanceMetric;
|
||||||
|
|
||||||
/**
|
|
||||||
* @param array $samples
|
|
||||||
* @param array $labels
|
|
||||||
*/
|
|
||||||
public function train(array $samples, array $labels)
|
|
||||||
{
|
|
||||||
$this->samples = $samples;
|
|
||||||
$this->labels = $labels;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param array $samples
|
|
||||||
*
|
|
||||||
* @return mixed
|
|
||||||
*/
|
|
||||||
public function predict(array $samples)
|
|
||||||
{
|
|
||||||
if (!is_array($samples[0])) {
|
|
||||||
$predicted = $this->predictSample($samples);
|
|
||||||
} else {
|
|
||||||
$predicted = [];
|
|
||||||
foreach ($samples as $index => $sample) {
|
|
||||||
$predicted[$index] = $this->predictSample($sample);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return $predicted;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -67,7 +44,7 @@ class KNearestNeighbors implements Classifier
|
|||||||
*
|
*
|
||||||
* @return mixed
|
* @return mixed
|
||||||
*/
|
*/
|
||||||
private function predictSample(array $sample)
|
protected function predictSample(array $sample)
|
||||||
{
|
{
|
||||||
$distances = $this->kNeighborsDistances($sample);
|
$distances = $this->kNeighborsDistances($sample);
|
||||||
|
|
||||||
@ -95,7 +72,7 @@ class KNearestNeighbors implements Classifier
|
|||||||
$distances = [];
|
$distances = [];
|
||||||
|
|
||||||
foreach ($this->samples as $index => $neighbor) {
|
foreach ($this->samples as $index => $neighbor) {
|
||||||
$distances[$index] = Distance::euclidean($sample, $neighbor);
|
$distances[$index] = $this->distanceMetric->distance($sample, $neighbor);
|
||||||
}
|
}
|
||||||
|
|
||||||
asort($distances);
|
asort($distances);
|
36
src/Phpml/Classification/NaiveBayes.php
Normal file
36
src/Phpml/Classification/NaiveBayes.php
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Classification;
|
||||||
|
|
||||||
|
use Phpml\Classification\Traits\Predictable;
|
||||||
|
use Phpml\Classification\Traits\Trainable;
|
||||||
|
|
||||||
|
class NaiveBayes implements Classifier
|
||||||
|
{
|
||||||
|
use Trainable, Predictable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $sample
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
protected function predictSample(array $sample)
|
||||||
|
{
|
||||||
|
$predictions = [];
|
||||||
|
foreach ($this->labels as $index => $label) {
|
||||||
|
$predictions[$label] = 0;
|
||||||
|
foreach ($sample as $token => $count) {
|
||||||
|
if (array_key_exists($token, $this->samples[$index])) {
|
||||||
|
$predictions[$label] += $count * $this->samples[$index][$token];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
arsort($predictions, SORT_NUMERIC);
|
||||||
|
reset($predictions);
|
||||||
|
|
||||||
|
return key($predictions);
|
||||||
|
}
|
||||||
|
}
|
61
src/Phpml/Classification/SupportVectorMachine.php
Normal file
61
src/Phpml/Classification/SupportVectorMachine.php
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Classification;
|
||||||
|
|
||||||
|
use Phpml\Classification\Traits\Predictable;
|
||||||
|
use Phpml\Classification\Traits\Trainable;
|
||||||
|
use Phpml\Math\Kernel;
|
||||||
|
|
||||||
|
class SupportVectorMachine implements Classifier
|
||||||
|
{
|
||||||
|
use Trainable, Predictable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var Kernel
|
||||||
|
*/
|
||||||
|
private $kernel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
private $C;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
private $tolerance;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $upperBound;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Kernel $kernel
|
||||||
|
* @param float $C
|
||||||
|
* @param float $tolerance
|
||||||
|
* @param int $upperBound
|
||||||
|
*/
|
||||||
|
public function __construct(Kernel $kernel = null, float $C = 1.0, float $tolerance = .001, int $upperBound = 100)
|
||||||
|
{
|
||||||
|
if (null === $kernel) {
|
||||||
|
$kernel = new Kernel\RBF($gamma = .001);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->kernel = $kernel;
|
||||||
|
$this->C = $C;
|
||||||
|
$this->tolerance = $tolerance;
|
||||||
|
$this->upperBound = $upperBound;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $sample
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
protected function predictSample(array $sample)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
34
src/Phpml/Classification/Traits/Predictable.php
Normal file
34
src/Phpml/Classification/Traits/Predictable.php
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Classification\Traits;
|
||||||
|
|
||||||
|
trait Predictable
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function predict(array $samples)
|
||||||
|
{
|
||||||
|
if (!is_array($samples[0])) {
|
||||||
|
$predicted = $this->predictSample($samples);
|
||||||
|
} else {
|
||||||
|
$predicted = [];
|
||||||
|
foreach ($samples as $index => $sample) {
|
||||||
|
$predicted[$index] = $this->predictSample($sample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $predicted;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $sample
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
abstract protected function predictSample(array $sample);
|
||||||
|
}
|
28
src/Phpml/Classification/Traits/Trainable.php
Normal file
28
src/Phpml/Classification/Traits/Trainable.php
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Classification\Traits;
|
||||||
|
|
||||||
|
trait Trainable
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $samples;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $labels;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
* @param array $labels
|
||||||
|
*/
|
||||||
|
public function train(array $samples, array $labels)
|
||||||
|
{
|
||||||
|
$this->samples = $samples;
|
||||||
|
$this->labels = $labels;
|
||||||
|
}
|
||||||
|
}
|
@ -1,25 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
declare (strict_types = 1);
|
|
||||||
|
|
||||||
namespace Phpml\Classifier;
|
|
||||||
|
|
||||||
class NaiveBayes implements Classifier
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* @param array $samples
|
|
||||||
* @param array $labels
|
|
||||||
*/
|
|
||||||
public function train(array $samples, array $labels)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param array $samples
|
|
||||||
*
|
|
||||||
* @return mixed
|
|
||||||
*/
|
|
||||||
public function predict(array $samples)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
}
|
|
15
src/Phpml/Clustering/Clusterer.php
Normal file
15
src/Phpml/Clustering/Clusterer.php
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Clustering;
|
||||||
|
|
||||||
|
interface Clusterer
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function cluster(array $samples);
|
||||||
|
}
|
111
src/Phpml/Clustering/DBSCAN.php
Normal file
111
src/Phpml/Clustering/DBSCAN.php
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Clustering;
|
||||||
|
|
||||||
|
use Phpml\Math\Distance;
|
||||||
|
use Phpml\Math\Distance\Euclidean;
|
||||||
|
|
||||||
|
class DBSCAN implements Clusterer
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
private $epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $minSamples;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var Distance
|
||||||
|
*/
|
||||||
|
private $distanceMetric;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param float $epsilon
|
||||||
|
* @param int $minSamples
|
||||||
|
* @param Distance $distanceMetric
|
||||||
|
*/
|
||||||
|
public function __construct($epsilon = 0.5, $minSamples = 3, Distance $distanceMetric = null)
|
||||||
|
{
|
||||||
|
if (null === $distanceMetric) {
|
||||||
|
$distanceMetric = new Euclidean();
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->epsilon = $epsilon;
|
||||||
|
$this->minSamples = $minSamples;
|
||||||
|
$this->distanceMetric = $distanceMetric;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function cluster(array $samples)
|
||||||
|
{
|
||||||
|
$clusters = [];
|
||||||
|
$visited = [];
|
||||||
|
|
||||||
|
foreach ($samples as $index => $sample) {
|
||||||
|
if (isset($visited[$index])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$visited[$index] = true;
|
||||||
|
|
||||||
|
$regionSamples = $this->getSamplesInRegion($sample, $samples);
|
||||||
|
if (count($regionSamples) >= $this->minSamples) {
|
||||||
|
$clusters[] = $this->expandCluster($regionSamples, $visited);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $localSample
|
||||||
|
* @param array $samples
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
private function getSamplesInRegion($localSample, $samples)
|
||||||
|
{
|
||||||
|
$region = [];
|
||||||
|
|
||||||
|
foreach ($samples as $index => $sample) {
|
||||||
|
if ($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) {
|
||||||
|
$region[$index] = $sample;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $region;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
* @param array $visited
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
private function expandCluster($samples, &$visited)
|
||||||
|
{
|
||||||
|
$cluster = [];
|
||||||
|
|
||||||
|
foreach ($samples as $index => $sample) {
|
||||||
|
if (!isset($visited[$index])) {
|
||||||
|
$visited[$index] = true;
|
||||||
|
$regionSamples = $this->getSamplesInRegion($sample, $samples);
|
||||||
|
if (count($regionSamples) > $this->minSamples) {
|
||||||
|
$cluster = array_merge($regionSamples, $cluster);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$cluster[] = $sample;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $cluster;
|
||||||
|
}
|
||||||
|
}
|
60
src/Phpml/Clustering/KMeans.php
Normal file
60
src/Phpml/Clustering/KMeans.php
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Clustering;
|
||||||
|
|
||||||
|
use Phpml\Clustering\KMeans\Space;
|
||||||
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
|
||||||
|
class KMeans implements Clusterer
|
||||||
|
{
|
||||||
|
const INIT_RANDOM = 1;
|
||||||
|
const INIT_KMEANS_PLUS_PLUS = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $clustersNumber;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $initialization;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param int $clustersNumber
|
||||||
|
* @param int $initialization
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function __construct(int $clustersNumber, int $initialization = self::INIT_KMEANS_PLUS_PLUS)
|
||||||
|
{
|
||||||
|
if ($clustersNumber <= 0) {
|
||||||
|
throw InvalidArgumentException::invalidClustersNumber();
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->clustersNumber = $clustersNumber;
|
||||||
|
$this->initialization = $initialization;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function cluster(array $samples)
|
||||||
|
{
|
||||||
|
$space = new Space(count($samples[0]));
|
||||||
|
foreach ($samples as $sample) {
|
||||||
|
$space->addPoint($sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
$clusters = [];
|
||||||
|
foreach ($space->cluster($this->clustersNumber, $this->initialization) as $cluster) {
|
||||||
|
$clusters[] = $cluster->getPoints();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $clusters;
|
||||||
|
}
|
||||||
|
}
|
137
src/Phpml/Clustering/KMeans/Cluster.php
Normal file
137
src/Phpml/Clustering/KMeans/Cluster.php
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Clustering\KMeans;
|
||||||
|
|
||||||
|
use IteratorAggregate;
|
||||||
|
use Countable;
|
||||||
|
use SplObjectStorage;
|
||||||
|
use LogicException;
|
||||||
|
|
||||||
|
class Cluster extends Point implements IteratorAggregate, Countable
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var Space
|
||||||
|
*/
|
||||||
|
protected $space;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var SplObjectStorage|Point[]
|
||||||
|
*/
|
||||||
|
protected $points;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Space $space
|
||||||
|
* @param array $coordinates
|
||||||
|
*/
|
||||||
|
public function __construct(Space $space, array $coordinates)
|
||||||
|
{
|
||||||
|
parent::__construct($coordinates);
|
||||||
|
$this->space = $space;
|
||||||
|
$this->points = new SplObjectStorage();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getPoints()
|
||||||
|
{
|
||||||
|
$points = [];
|
||||||
|
foreach ($this->points as $point) {
|
||||||
|
$points[] = $point->toArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $points;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function toArray()
|
||||||
|
{
|
||||||
|
return array(
|
||||||
|
'centroid' => parent::toArray(),
|
||||||
|
'points' => $this->getPoints(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Point $point
|
||||||
|
*
|
||||||
|
* @return Point
|
||||||
|
*/
|
||||||
|
public function attach(Point $point)
|
||||||
|
{
|
||||||
|
if ($point instanceof self) {
|
||||||
|
throw new LogicException('cannot attach a cluster to another');
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->points->attach($point);
|
||||||
|
|
||||||
|
return $point;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Point $point
|
||||||
|
*
|
||||||
|
* @return Point
|
||||||
|
*/
|
||||||
|
public function detach(Point $point)
|
||||||
|
{
|
||||||
|
$this->points->detach($point);
|
||||||
|
|
||||||
|
return $point;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param SplObjectStorage $points
|
||||||
|
*/
|
||||||
|
public function attachAll(SplObjectStorage $points)
|
||||||
|
{
|
||||||
|
$this->points->addAll($points);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param SplObjectStorage $points
|
||||||
|
*/
|
||||||
|
public function detachAll(SplObjectStorage $points)
|
||||||
|
{
|
||||||
|
$this->points->removeAll($points);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function updateCentroid()
|
||||||
|
{
|
||||||
|
if (!$count = count($this->points)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$centroid = $this->space->newPoint(array_fill(0, $this->dimension, 0));
|
||||||
|
|
||||||
|
foreach ($this->points as $point) {
|
||||||
|
for ($n = 0; $n < $this->dimension; ++$n) {
|
||||||
|
$centroid->coordinates[$n] += $point->coordinates[$n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ($n = 0; $n < $this->dimension; ++$n) {
|
||||||
|
$this->coordinates[$n] = $centroid->coordinates[$n] / $count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Point[]|SplObjectStorage
|
||||||
|
*/
|
||||||
|
public function getIterator()
|
||||||
|
{
|
||||||
|
return $this->points;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function count()
|
||||||
|
{
|
||||||
|
return count($this->points);
|
||||||
|
}
|
||||||
|
}
|
124
src/Phpml/Clustering/KMeans/Point.php
Normal file
124
src/Phpml/Clustering/KMeans/Point.php
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Clustering\KMeans;
|
||||||
|
|
||||||
|
use ArrayAccess;
|
||||||
|
|
||||||
|
class Point implements ArrayAccess
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
protected $dimension;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
protected $coordinates;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $coordinates
|
||||||
|
*/
|
||||||
|
public function __construct(array $coordinates)
|
||||||
|
{
|
||||||
|
$this->dimension = count($coordinates);
|
||||||
|
$this->coordinates = $coordinates;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function toArray()
|
||||||
|
{
|
||||||
|
return $this->coordinates;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Point $point
|
||||||
|
* @param bool $precise
|
||||||
|
*
|
||||||
|
* @return int|mixed
|
||||||
|
*/
|
||||||
|
public function getDistanceWith(self $point, $precise = true)
|
||||||
|
{
|
||||||
|
$distance = 0;
|
||||||
|
for ($n = 0; $n < $this->dimension; ++$n) {
|
||||||
|
$difference = $this->coordinates[$n] - $point->coordinates[$n];
|
||||||
|
$distance += $difference * $difference;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $precise ? sqrt($distance) : $distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $points
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function getClosest($points)
|
||||||
|
{
|
||||||
|
foreach ($points as $point) {
|
||||||
|
$distance = $this->getDistanceWith($point, false);
|
||||||
|
|
||||||
|
if (!isset($minDistance)) {
|
||||||
|
$minDistance = $distance;
|
||||||
|
$minPoint = $point;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($distance < $minDistance) {
|
||||||
|
$minDistance = $distance;
|
||||||
|
$minPoint = $point;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $minPoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getCoordinates()
|
||||||
|
{
|
||||||
|
return $this->coordinates;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param mixed $offset
|
||||||
|
*
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public function offsetExists($offset)
|
||||||
|
{
|
||||||
|
return isset($this->coordinates[$offset]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param mixed $offset
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function offsetGet($offset)
|
||||||
|
{
|
||||||
|
return $this->coordinates[$offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param mixed $offset
|
||||||
|
* @param mixed $value
|
||||||
|
*/
|
||||||
|
public function offsetSet($offset, $value)
|
||||||
|
{
|
||||||
|
$this->coordinates[$offset] = $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param mixed $offset
|
||||||
|
*/
|
||||||
|
public function offsetUnset($offset)
|
||||||
|
{
|
||||||
|
unset($this->coordinates[$offset]);
|
||||||
|
}
|
||||||
|
}
|
233
src/Phpml/Clustering/KMeans/Space.php
Normal file
233
src/Phpml/Clustering/KMeans/Space.php
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Clustering\KMeans;
|
||||||
|
|
||||||
|
use Phpml\Clustering\KMeans;
|
||||||
|
use SplObjectStorage;
|
||||||
|
use LogicException;
|
||||||
|
use InvalidArgumentException;
|
||||||
|
|
||||||
|
class Space extends SplObjectStorage
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
protected $dimension;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $dimension
|
||||||
|
*/
|
||||||
|
public function __construct($dimension)
|
||||||
|
{
|
||||||
|
if ($dimension < 1) {
|
||||||
|
throw new LogicException('a space dimension cannot be null or negative');
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->dimension = $dimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function toArray()
|
||||||
|
{
|
||||||
|
$points = [];
|
||||||
|
foreach ($this as $point) {
|
||||||
|
$points[] = $point->toArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
return ['points' => $points];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $coordinates
|
||||||
|
*
|
||||||
|
* @return Point
|
||||||
|
*/
|
||||||
|
public function newPoint(array $coordinates)
|
||||||
|
{
|
||||||
|
if (count($coordinates) != $this->dimension) {
|
||||||
|
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Point($coordinates);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $coordinates
|
||||||
|
* @param null $data
|
||||||
|
*/
|
||||||
|
public function addPoint(array $coordinates, $data = null)
|
||||||
|
{
|
||||||
|
return $this->attach($this->newPoint($coordinates), $data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param object $point
|
||||||
|
* @param null $data
|
||||||
|
*/
|
||||||
|
public function attach($point, $data = null)
|
||||||
|
{
|
||||||
|
if (!$point instanceof Point) {
|
||||||
|
throw new InvalidArgumentException('can only attach points to spaces');
|
||||||
|
}
|
||||||
|
|
||||||
|
return parent::attach($point, $data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
public function getDimension()
|
||||||
|
{
|
||||||
|
return $this->dimension;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array|bool
|
||||||
|
*/
|
||||||
|
public function getBoundaries()
|
||||||
|
{
|
||||||
|
if (!count($this)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$min = $this->newPoint(array_fill(0, $this->dimension, null));
|
||||||
|
$max = $this->newPoint(array_fill(0, $this->dimension, null));
|
||||||
|
|
||||||
|
foreach ($this as $point) {
|
||||||
|
for ($n = 0; $n < $this->dimension; ++$n) {
|
||||||
|
($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n];
|
||||||
|
($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return array($min, $max);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Point $min
|
||||||
|
* @param Point $max
|
||||||
|
*
|
||||||
|
* @return Point
|
||||||
|
*/
|
||||||
|
public function getRandomPoint(Point $min, Point $max)
|
||||||
|
{
|
||||||
|
$point = $this->newPoint(array_fill(0, $this->dimension, null));
|
||||||
|
|
||||||
|
for ($n = 0; $n < $this->dimension; ++$n) {
|
||||||
|
$point[$n] = rand($min[$n], $max[$n]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $point;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param int $clustersNumber
|
||||||
|
* @param int $initMethod
|
||||||
|
*
|
||||||
|
* @return array|Cluster[]
|
||||||
|
*/
|
||||||
|
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM)
|
||||||
|
{
|
||||||
|
$clusters = $this->initializeClusters($clustersNumber, $initMethod);
|
||||||
|
|
||||||
|
do {
|
||||||
|
} while (!$this->iterate($clusters));
|
||||||
|
|
||||||
|
return $clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $clustersNumber
|
||||||
|
* @param $initMethod
|
||||||
|
*
|
||||||
|
* @return array|Cluster[]
|
||||||
|
*/
|
||||||
|
protected function initializeClusters(int $clustersNumber, int $initMethod)
|
||||||
|
{
|
||||||
|
switch ($initMethod) {
|
||||||
|
case KMeans::INIT_RANDOM:
|
||||||
|
list($min, $max) = $this->getBoundaries();
|
||||||
|
for ($n = 0; $n < $clustersNumber; ++$n) {
|
||||||
|
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case KMeans::INIT_KMEANS_PLUS_PLUS:
|
||||||
|
$position = rand(1, count($this));
|
||||||
|
for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
|
||||||
|
$clusters[] = new Cluster($this, $this->current()->getCoordinates());
|
||||||
|
|
||||||
|
$distances = new SplObjectStorage();
|
||||||
|
|
||||||
|
for ($i = 1; $i < $clustersNumber; ++$i) {
|
||||||
|
$sum = 0;
|
||||||
|
foreach ($this as $point) {
|
||||||
|
$distance = $point->getDistanceWith($point->getClosest($clusters));
|
||||||
|
$sum += $distances[$point] = $distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
$sum = rand(0, (int) $sum);
|
||||||
|
foreach ($this as $point) {
|
||||||
|
if (($sum -= $distances[$point]) > 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
$clusters[] = new Cluster($this, $point->getCoordinates());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$clusters[0]->attachAll($this);
|
||||||
|
|
||||||
|
return $clusters;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $clusters
|
||||||
|
*
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
protected function iterate($clusters)
|
||||||
|
{
|
||||||
|
$convergence = true;
|
||||||
|
|
||||||
|
$attach = new SplObjectStorage();
|
||||||
|
$detach = new SplObjectStorage();
|
||||||
|
|
||||||
|
foreach ($clusters as $cluster) {
|
||||||
|
foreach ($cluster as $point) {
|
||||||
|
$closest = $point->getClosest($clusters);
|
||||||
|
|
||||||
|
if ($closest !== $cluster) {
|
||||||
|
isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage();
|
||||||
|
isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage();
|
||||||
|
|
||||||
|
$attach[$closest]->attach($point);
|
||||||
|
$detach[$cluster]->attach($point);
|
||||||
|
|
||||||
|
$convergence = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($attach as $cluster) {
|
||||||
|
$cluster->attachAll($attach[$cluster]);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($detach as $cluster) {
|
||||||
|
$cluster->detachAll($detach[$cluster]);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($clusters as $cluster) {
|
||||||
|
$cluster->updateCentroid();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $convergence;
|
||||||
|
}
|
||||||
|
}
|
@ -27,7 +27,7 @@ class ArrayDataset implements Dataset
|
|||||||
public function __construct(array $samples, array $labels)
|
public function __construct(array $samples, array $labels)
|
||||||
{
|
{
|
||||||
if (count($samples) != count($labels)) {
|
if (count($samples) != count($labels)) {
|
||||||
throw InvalidArgumentException::sizeNotMatch();
|
throw InvalidArgumentException::arraySizeNotMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->samples = $samples;
|
$this->samples = $samples;
|
||||||
|
@ -26,19 +26,18 @@ class CsvDataset extends ArrayDataset
|
|||||||
throw DatasetException::missingFile(basename($filepath));
|
throw DatasetException::missingFile(basename($filepath));
|
||||||
}
|
}
|
||||||
|
|
||||||
$row = 0;
|
if (false === $handle = fopen($filepath, 'r')) {
|
||||||
if (($handle = fopen($filepath, 'r')) !== false) {
|
throw DatasetException::cantOpenFile(basename($filepath));
|
||||||
while (($data = fgetcsv($handle, 1000, ',')) !== false) {
|
|
||||||
++$row;
|
|
||||||
if ($headingRow && $row == 1) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($headingRow) {
|
||||||
|
fgets($handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (($data = fgetcsv($handle, 1000, ',')) !== false) {
|
||||||
$this->samples[] = array_slice($data, 0, $features);
|
$this->samples[] = array_slice($data, 0, $features);
|
||||||
$this->labels[] = $data[$features];
|
$this->labels[] = $data[$features];
|
||||||
}
|
}
|
||||||
fclose($handle);
|
fclose($handle);
|
||||||
} else {
|
|
||||||
throw DatasetException::cantOpenFile(basename($filepath));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,7 @@ namespace Phpml\Dataset;
|
|||||||
|
|
||||||
interface Dataset
|
interface Dataset
|
||||||
{
|
{
|
||||||
|
const SOME = 'z';
|
||||||
/**
|
/**
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
|
28
src/Phpml/Dataset/Demo/Glass.php
Normal file
28
src/Phpml/Dataset/Demo/Glass.php
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Dataset\Demo;
|
||||||
|
|
||||||
|
use Phpml\Dataset\CsvDataset;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classes: 6
|
||||||
|
* Samples per class:
|
||||||
|
* 70 float processed building windows
|
||||||
|
* 17 float processed vehicle windows
|
||||||
|
* 76 non-float processed building windows
|
||||||
|
* 13 containers
|
||||||
|
* 9 tableware
|
||||||
|
* 29 headlamps
|
||||||
|
* Samples total: 214
|
||||||
|
* Features per sample: 9.
|
||||||
|
*/
|
||||||
|
class Glass extends CsvDataset
|
||||||
|
{
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$filepath = dirname(__FILE__).'/../../../../data/glass.csv';
|
||||||
|
parent::__construct($filepath, 9, true);
|
||||||
|
}
|
||||||
|
}
|
22
src/Phpml/Dataset/Demo/Wine.php
Normal file
22
src/Phpml/Dataset/Demo/Wine.php
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Dataset\Demo;
|
||||||
|
|
||||||
|
use Phpml\Dataset\CsvDataset;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classes: 3
|
||||||
|
* Samples per class: class 1 59; class 2 71; class 3 48
|
||||||
|
* Samples total: 178
|
||||||
|
* Features per sample: 13.
|
||||||
|
*/
|
||||||
|
class Wine extends CsvDataset
|
||||||
|
{
|
||||||
|
public function __construct()
|
||||||
|
{
|
||||||
|
$filepath = dirname(__FILE__).'/../../../../data/wine.csv';
|
||||||
|
parent::__construct($filepath, 13, true);
|
||||||
|
}
|
||||||
|
}
|
@ -9,9 +9,9 @@ class InvalidArgumentException extends \Exception
|
|||||||
/**
|
/**
|
||||||
* @return InvalidArgumentException
|
* @return InvalidArgumentException
|
||||||
*/
|
*/
|
||||||
public static function sizeNotMatch()
|
public static function arraySizeNotMatch()
|
||||||
{
|
{
|
||||||
return new self('Size of given arguments not match');
|
return new self('Size of given arrays not match');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -23,4 +23,46 @@ class InvalidArgumentException extends \Exception
|
|||||||
{
|
{
|
||||||
return new self(sprintf('%s must be between 0.0 and 1.0', $name));
|
return new self(sprintf('%s must be between 0.0 and 1.0', $name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public static function arrayCantBeEmpty()
|
||||||
|
{
|
||||||
|
return new self('The array has zero elements');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param int $minimumSize
|
||||||
|
*
|
||||||
|
* @return InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public static function arraySizeToSmall($minimumSize = 2)
|
||||||
|
{
|
||||||
|
return new self(sprintf('The array must have at least %s elements', $minimumSize));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public static function matrixDimensionsDidNotMatch()
|
||||||
|
{
|
||||||
|
return new self('Matrix dimensions did not match');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public static function inconsistentMatrixSupplied()
|
||||||
|
{
|
||||||
|
return new self('Inconsistent matrix aupplied');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public static function invalidClustersNumber()
|
||||||
|
{
|
||||||
|
return new self('Invalid clusters number');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
24
src/Phpml/Exception/MatrixException.php
Normal file
24
src/Phpml/Exception/MatrixException.php
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Exception;
|
||||||
|
|
||||||
|
class MatrixException extends \Exception
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @return MatrixException
|
||||||
|
*/
|
||||||
|
public static function notSquareMatrix()
|
||||||
|
{
|
||||||
|
return new self('Matrix is not square matrix');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return MatrixException
|
||||||
|
*/
|
||||||
|
public static function columnOutOfRange()
|
||||||
|
{
|
||||||
|
return new self('Column out of range');
|
||||||
|
}
|
||||||
|
}
|
16
src/Phpml/Math/Distance.php
Normal file
16
src/Phpml/Math/Distance.php
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math;
|
||||||
|
|
||||||
|
interface Distance
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $a
|
||||||
|
* @param array $b
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*/
|
||||||
|
public function distance(array $a, array $b): float;
|
||||||
|
}
|
35
src/Phpml/Math/Distance/Chebyshev.php
Normal file
35
src/Phpml/Math/Distance/Chebyshev.php
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math\Distance;
|
||||||
|
|
||||||
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
use Phpml\Math\Distance;
|
||||||
|
|
||||||
|
class Chebyshev implements Distance
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $a
|
||||||
|
* @param array $b
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function distance(array $a, array $b): float
|
||||||
|
{
|
||||||
|
if (count($a) !== count($b)) {
|
||||||
|
throw InvalidArgumentException::arraySizeNotMatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
$differences = [];
|
||||||
|
$count = count($a);
|
||||||
|
|
||||||
|
for ($i = 0; $i < $count; ++$i) {
|
||||||
|
$differences[] = abs($a[$i] - $b[$i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return max($differences);
|
||||||
|
}
|
||||||
|
}
|
@ -2,11 +2,12 @@
|
|||||||
|
|
||||||
declare (strict_types = 1);
|
declare (strict_types = 1);
|
||||||
|
|
||||||
namespace Phpml\Metric;
|
namespace Phpml\Math\Distance;
|
||||||
|
|
||||||
use Phpml\Exception\InvalidArgumentException;
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
use Phpml\Math\Distance;
|
||||||
|
|
||||||
class Distance
|
class Euclidean implements Distance
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* @param array $a
|
* @param array $a
|
||||||
@ -16,10 +17,10 @@ class Distance
|
|||||||
*
|
*
|
||||||
* @throws InvalidArgumentException
|
* @throws InvalidArgumentException
|
||||||
*/
|
*/
|
||||||
public static function euclidean(array $a, array $b): float
|
public function distance(array $a, array $b): float
|
||||||
{
|
{
|
||||||
if (count($a) != count($b)) {
|
if (count($a) !== count($b)) {
|
||||||
throw InvalidArgumentException::sizeNotMatch();
|
throw InvalidArgumentException::arraySizeNotMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
$distance = 0;
|
$distance = 0;
|
35
src/Phpml/Math/Distance/Manhattan.php
Normal file
35
src/Phpml/Math/Distance/Manhattan.php
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math\Distance;
|
||||||
|
|
||||||
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
use Phpml\Math\Distance;
|
||||||
|
|
||||||
|
class Manhattan implements Distance
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $a
|
||||||
|
* @param array $b
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function distance(array $a, array $b): float
|
||||||
|
{
|
||||||
|
if (count($a) !== count($b)) {
|
||||||
|
throw InvalidArgumentException::arraySizeNotMatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
$distance = 0;
|
||||||
|
$count = count($a);
|
||||||
|
|
||||||
|
for ($i = 0; $i < $count; ++$i) {
|
||||||
|
$distance += abs($a[$i] - $b[$i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $distance;
|
||||||
|
}
|
||||||
|
}
|
48
src/Phpml/Math/Distance/Minkowski.php
Normal file
48
src/Phpml/Math/Distance/Minkowski.php
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math\Distance;
|
||||||
|
|
||||||
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
use Phpml\Math\Distance;
|
||||||
|
|
||||||
|
class Minkowski implements Distance
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
private $lambda;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param float $lambda
|
||||||
|
*/
|
||||||
|
public function __construct(float $lambda = 3)
|
||||||
|
{
|
||||||
|
$this->lambda = $lambda;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $a
|
||||||
|
* @param array $b
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function distance(array $a, array $b): float
|
||||||
|
{
|
||||||
|
if (count($a) !== count($b)) {
|
||||||
|
throw InvalidArgumentException::arraySizeNotMatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
$distance = 0;
|
||||||
|
$count = count($a);
|
||||||
|
|
||||||
|
for ($i = 0; $i < $count; ++$i) {
|
||||||
|
$distance += pow(abs($a[$i] - $b[$i]), $this->lambda);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pow($distance, 1 / $this->lambda);
|
||||||
|
}
|
||||||
|
}
|
16
src/Phpml/Math/Kernel.php
Normal file
16
src/Phpml/Math/Kernel.php
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math;
|
||||||
|
|
||||||
|
interface Kernel
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param float $a
|
||||||
|
* @param float $b
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*/
|
||||||
|
public function compute($a, $b);
|
||||||
|
}
|
39
src/Phpml/Math/Kernel/RBF.php
Normal file
39
src/Phpml/Math/Kernel/RBF.php
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math\Kernel;
|
||||||
|
|
||||||
|
use Phpml\Math\Kernel;
|
||||||
|
use Phpml\Math\Product;
|
||||||
|
|
||||||
|
class RBF implements Kernel
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
private $gamma;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param float $gamma
|
||||||
|
*/
|
||||||
|
public function __construct(float $gamma)
|
||||||
|
{
|
||||||
|
$this->gamma = $gamma;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param float $a
|
||||||
|
* @param float $b
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*/
|
||||||
|
public function compute($a, $b)
|
||||||
|
{
|
||||||
|
$score = 2 * Product::scalar($a, $b);
|
||||||
|
$squares = Product::scalar($a, $a) + Product::scalar($b, $b);
|
||||||
|
$result = exp(-$this->gamma * ($squares - $score));
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
273
src/Phpml/Math/Matrix.php
Normal file
273
src/Phpml/Math/Matrix.php
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math;
|
||||||
|
|
||||||
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
use Phpml\Exception\MatrixException;
|
||||||
|
|
||||||
|
class Matrix
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $matrix;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $rows;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $columns;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
private $determinant;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $matrix
|
||||||
|
* @param bool $validate
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function __construct(array $matrix, bool $validate = true)
|
||||||
|
{
|
||||||
|
$this->rows = count($matrix);
|
||||||
|
$this->columns = count($matrix[0]);
|
||||||
|
|
||||||
|
if ($validate) {
|
||||||
|
for ($i = 0; $i < $this->rows; ++$i) {
|
||||||
|
if (count($matrix[$i]) !== $this->columns) {
|
||||||
|
throw InvalidArgumentException::matrixDimensionsDidNotMatch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->matrix = $matrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $array
|
||||||
|
*
|
||||||
|
* @return Matrix
|
||||||
|
*/
|
||||||
|
public static function fromFlatArray(array $array)
|
||||||
|
{
|
||||||
|
$matrix = [];
|
||||||
|
foreach ($array as $value) {
|
||||||
|
$matrix[] = [$value];
|
||||||
|
}
|
||||||
|
|
||||||
|
return new self($matrix);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function toArray()
|
||||||
|
{
|
||||||
|
return $this->matrix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
public function getRows()
|
||||||
|
{
|
||||||
|
return $this->rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
public function getColumns()
|
||||||
|
{
|
||||||
|
return $this->columns;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $column
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*
|
||||||
|
* @throws MatrixException
|
||||||
|
*/
|
||||||
|
public function getColumnValues($column)
|
||||||
|
{
|
||||||
|
if ($column >= $this->columns) {
|
||||||
|
throw MatrixException::columnOutOfRange();
|
||||||
|
}
|
||||||
|
|
||||||
|
$values = [];
|
||||||
|
for ($i = 0; $i < $this->rows; ++$i) {
|
||||||
|
$values[] = $this->matrix[$i][$column];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $values;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return float|int
|
||||||
|
*
|
||||||
|
* @throws MatrixException
|
||||||
|
*/
|
||||||
|
public function getDeterminant()
|
||||||
|
{
|
||||||
|
if ($this->determinant) {
|
||||||
|
return $this->determinant;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$this->isSquare()) {
|
||||||
|
throw MatrixException::notSquareMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->determinant = $this->calculateDeterminant();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return float|int
|
||||||
|
*
|
||||||
|
* @throws MatrixException
|
||||||
|
*/
|
||||||
|
private function calculateDeterminant()
|
||||||
|
{
|
||||||
|
$determinant = 0;
|
||||||
|
if ($this->rows == 1 && $this->columns == 1) {
|
||||||
|
$determinant = $this->matrix[0][0];
|
||||||
|
} elseif ($this->rows == 2 && $this->columns == 2) {
|
||||||
|
$determinant =
|
||||||
|
$this->matrix[0][0] * $this->matrix[1][1] -
|
||||||
|
$this->matrix[0][1] * $this->matrix[1][0];
|
||||||
|
} else {
|
||||||
|
for ($j = 0; $j < $this->columns; ++$j) {
|
||||||
|
$subMatrix = $this->crossOut(0, $j);
|
||||||
|
$minor = $this->matrix[0][$j] * $subMatrix->getDeterminant();
|
||||||
|
$determinant += fmod($j, 2) == 0 ? $minor : -$minor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $determinant;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public function isSquare()
|
||||||
|
{
|
||||||
|
return $this->columns === $this->rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Matrix
|
||||||
|
*/
|
||||||
|
public function transpose()
|
||||||
|
{
|
||||||
|
$newMatrix = [];
|
||||||
|
for ($i = 0; $i < $this->rows; ++$i) {
|
||||||
|
for ($j = 0; $j < $this->columns; ++$j) {
|
||||||
|
$newMatrix[$j][$i] = $this->matrix[$i][$j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new self($newMatrix, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param Matrix $matrix
|
||||||
|
*
|
||||||
|
* @return Matrix
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function multiply(Matrix $matrix)
|
||||||
|
{
|
||||||
|
if ($this->columns != $matrix->getRows()) {
|
||||||
|
throw InvalidArgumentException::inconsistentMatrixSupplied();
|
||||||
|
}
|
||||||
|
|
||||||
|
$product = [];
|
||||||
|
$multiplier = $matrix->toArray();
|
||||||
|
for ($i = 0; $i < $this->rows; ++$i) {
|
||||||
|
for ($j = 0; $j < $matrix->getColumns(); ++$j) {
|
||||||
|
$product[$i][$j] = 0;
|
||||||
|
for ($k = 0; $k < $this->columns; ++$k) {
|
||||||
|
$product[$i][$j] += $this->matrix[$i][$k] * $multiplier[$k][$j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new self($product, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $value
|
||||||
|
*
|
||||||
|
* @return Matrix
|
||||||
|
*/
|
||||||
|
public function divideByScalar($value)
|
||||||
|
{
|
||||||
|
$newMatrix = array();
|
||||||
|
for ($i = 0; $i < $this->rows; ++$i) {
|
||||||
|
for ($j = 0; $j < $this->columns; ++$j) {
|
||||||
|
$newMatrix[$i][$j] = $this->matrix[$i][$j] / $value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new self($newMatrix, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Matrix
|
||||||
|
*
|
||||||
|
* @throws MatrixException
|
||||||
|
*/
|
||||||
|
public function inverse()
|
||||||
|
{
|
||||||
|
if (!$this->isSquare()) {
|
||||||
|
throw MatrixException::notSquareMatrix();
|
||||||
|
}
|
||||||
|
|
||||||
|
$newMatrix = array();
|
||||||
|
for ($i = 0; $i < $this->rows; ++$i) {
|
||||||
|
for ($j = 0; $j < $this->columns; ++$j) {
|
||||||
|
$minor = $this->crossOut($i, $j)->getDeterminant();
|
||||||
|
$newMatrix[$i][$j] = fmod($i + $j, 2) == 0 ? $minor : -$minor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$cofactorMatrix = new self($newMatrix, false);
|
||||||
|
|
||||||
|
return $cofactorMatrix->transpose()->divideByScalar($this->getDeterminant());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param int $row
|
||||||
|
* @param int $column
|
||||||
|
*
|
||||||
|
* @return Matrix
|
||||||
|
*/
|
||||||
|
public function crossOut(int $row, int $column)
|
||||||
|
{
|
||||||
|
$newMatrix = [];
|
||||||
|
$r = 0;
|
||||||
|
for ($i = 0; $i < $this->rows; ++$i) {
|
||||||
|
$c = 0;
|
||||||
|
if ($row != $i) {
|
||||||
|
for ($j = 0; $j < $this->columns; ++$j) {
|
||||||
|
if ($column != $j) {
|
||||||
|
$newMatrix[$r][$c] = $this->matrix[$i][$j];
|
||||||
|
++$c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++$r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new self($newMatrix, false);
|
||||||
|
}
|
||||||
|
}
|
24
src/Phpml/Math/Product.php
Normal file
24
src/Phpml/Math/Product.php
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math;
|
||||||
|
|
||||||
|
class Product
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $a
|
||||||
|
* @param array $b
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public static function scalar(array $a, array $b)
|
||||||
|
{
|
||||||
|
$product = 0;
|
||||||
|
foreach ($a as $index => $value) {
|
||||||
|
$product += $value * $b[$index];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $product;
|
||||||
|
}
|
||||||
|
}
|
45
src/Phpml/Math/Statistic/Correlation.php
Normal file
45
src/Phpml/Math/Statistic/Correlation.php
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math\Statistic;
|
||||||
|
|
||||||
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
|
||||||
|
class Correlation
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array|int[]|float[] $x
|
||||||
|
* @param array|int[]|float[] $y
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public static function pearson(array $x, array $y)
|
||||||
|
{
|
||||||
|
if (count($x) !== count($y)) {
|
||||||
|
throw InvalidArgumentException::arraySizeNotMatch();
|
||||||
|
}
|
||||||
|
|
||||||
|
$count = count($x);
|
||||||
|
$meanX = Mean::arithmetic($x);
|
||||||
|
$meanY = Mean::arithmetic($y);
|
||||||
|
|
||||||
|
$axb = 0;
|
||||||
|
$a2 = 0;
|
||||||
|
$b2 = 0;
|
||||||
|
|
||||||
|
for ($i = 0;$i < $count;++$i) {
|
||||||
|
$a = $x[$i] - $meanX;
|
||||||
|
$b = $y[$i] - $meanY;
|
||||||
|
$axb = $axb + ($a * $b);
|
||||||
|
$a2 = $a2 + pow($a, 2);
|
||||||
|
$b2 = $b2 + pow($b, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
$corr = $axb / sqrt($a2 * $b2);
|
||||||
|
|
||||||
|
return $corr;
|
||||||
|
}
|
||||||
|
}
|
18
src/Phpml/Math/Statistic/Mean.php
Normal file
18
src/Phpml/Math/Statistic/Mean.php
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math\Statistic;
|
||||||
|
|
||||||
|
class Mean
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $a
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*/
|
||||||
|
public static function arithmetic(array $a)
|
||||||
|
{
|
||||||
|
return array_sum($a) / count($a);
|
||||||
|
}
|
||||||
|
}
|
44
src/Phpml/Math/Statistic/StandardDeviation.php
Normal file
44
src/Phpml/Math/Statistic/StandardDeviation.php
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Math\Statistic;
|
||||||
|
|
||||||
|
use Phpml\Exception\InvalidArgumentException;
|
||||||
|
|
||||||
|
class StandardDeviation
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array|float[] $a
|
||||||
|
* @param bool $sample
|
||||||
|
*
|
||||||
|
* @return float
|
||||||
|
*
|
||||||
|
* @throws InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public static function population(array $a, $sample = true)
|
||||||
|
{
|
||||||
|
if (empty($a)) {
|
||||||
|
throw InvalidArgumentException::arrayCantBeEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
$n = count($a);
|
||||||
|
|
||||||
|
if ($sample && $n === 1) {
|
||||||
|
throw InvalidArgumentException::arraySizeToSmall(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
$mean = Mean::arithmetic($a);
|
||||||
|
$carry = 0.0;
|
||||||
|
foreach ($a as $val) {
|
||||||
|
$d = $val - $mean;
|
||||||
|
$carry += $d * $d;
|
||||||
|
};
|
||||||
|
|
||||||
|
if ($sample) {
|
||||||
|
--$n;
|
||||||
|
}
|
||||||
|
|
||||||
|
return sqrt($carry / $n);
|
||||||
|
}
|
||||||
|
}
|
@ -20,12 +20,12 @@ class Accuracy
|
|||||||
public static function score(array $actualLabels, array $predictedLabels, bool $normalize = true)
|
public static function score(array $actualLabels, array $predictedLabels, bool $normalize = true)
|
||||||
{
|
{
|
||||||
if (count($actualLabels) != count($predictedLabels)) {
|
if (count($actualLabels) != count($predictedLabels)) {
|
||||||
throw InvalidArgumentException::sizeNotMatch();
|
throw InvalidArgumentException::arraySizeNotMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
$score = 0;
|
$score = 0;
|
||||||
foreach ($actualLabels as $index => $label) {
|
foreach ($actualLabels as $index => $label) {
|
||||||
if ($label === $predictedLabels[$index]) {
|
if ($label == $predictedLabels[$index]) {
|
||||||
++$score;
|
++$score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
116
src/Phpml/Regression/LeastSquares.php
Normal file
116
src/Phpml/Regression/LeastSquares.php
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Regression;
|
||||||
|
|
||||||
|
use Phpml\Math\Matrix;
|
||||||
|
|
||||||
|
class LeastSquares implements Regression
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $samples;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $targets;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var float
|
||||||
|
*/
|
||||||
|
private $intercept;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
private $coefficients;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
* @param array $targets
|
||||||
|
*/
|
||||||
|
public function train(array $samples, array $targets)
|
||||||
|
{
|
||||||
|
$this->samples = $samples;
|
||||||
|
$this->targets = $targets;
|
||||||
|
|
||||||
|
$this->computeCoefficients();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param array $sample
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function predict($sample)
|
||||||
|
{
|
||||||
|
$result = $this->intercept;
|
||||||
|
foreach ($this->coefficients as $index => $coefficient) {
|
||||||
|
$result += $coefficient * $sample[$index];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getCoefficients()
|
||||||
|
{
|
||||||
|
return $this->coefficients;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return float
|
||||||
|
*/
|
||||||
|
public function getIntercept()
|
||||||
|
{
|
||||||
|
return $this->intercept;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* coefficient(b) = (X'X)-1X'Y.
|
||||||
|
*/
|
||||||
|
private function computeCoefficients()
|
||||||
|
{
|
||||||
|
$samplesMatrix = $this->getSamplesMatrix();
|
||||||
|
$targetsMatrix = $this->getTargetsMatrix();
|
||||||
|
|
||||||
|
$ts = $samplesMatrix->transpose()->multiply($samplesMatrix)->inverse();
|
||||||
|
$tf = $samplesMatrix->transpose()->multiply($targetsMatrix);
|
||||||
|
|
||||||
|
$this->coefficients = $ts->multiply($tf)->getColumnValues(0);
|
||||||
|
$this->intercept = array_shift($this->coefficients);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add one dimension for intercept calculation.
|
||||||
|
*
|
||||||
|
* @return Matrix
|
||||||
|
*/
|
||||||
|
private function getSamplesMatrix()
|
||||||
|
{
|
||||||
|
$samples = [];
|
||||||
|
foreach ($this->samples as $sample) {
|
||||||
|
array_unshift($sample, 1);
|
||||||
|
$samples[] = $sample;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Matrix($samples);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Matrix
|
||||||
|
*/
|
||||||
|
private function getTargetsMatrix()
|
||||||
|
{
|
||||||
|
if (is_array($this->targets[0])) {
|
||||||
|
return new Matrix($this->targets);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Matrix::fromFlatArray($this->targets);
|
||||||
|
}
|
||||||
|
}
|
21
src/Phpml/Regression/Regression.php
Normal file
21
src/Phpml/Regression/Regression.php
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace Phpml\Regression;
|
||||||
|
|
||||||
|
interface Regression
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @param array $samples
|
||||||
|
* @param array $targets
|
||||||
|
*/
|
||||||
|
public function train(array $samples, array $targets);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param float $sample
|
||||||
|
*
|
||||||
|
* @return mixed
|
||||||
|
*/
|
||||||
|
public function predict($sample);
|
||||||
|
}
|
@ -2,12 +2,10 @@
|
|||||||
|
|
||||||
declare (strict_types = 1);
|
declare (strict_types = 1);
|
||||||
|
|
||||||
namespace tests\Classifier;
|
namespace tests\Classification;
|
||||||
|
|
||||||
use Phpml\Classifier\KNearestNeighbors;
|
use Phpml\Classification\KNearestNeighbors;
|
||||||
use Phpml\CrossValidation\RandomSplit;
|
use Phpml\Math\Distance\Chebyshev;
|
||||||
use Phpml\Dataset\Demo\Iris;
|
|
||||||
use Phpml\Metric\Accuracy;
|
|
||||||
|
|
||||||
class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
|
class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
|
||||||
{
|
{
|
||||||
@ -45,14 +43,18 @@ class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
|
|||||||
$this->assertEquals($testLabels, $predicted);
|
$this->assertEquals($testLabels, $predicted);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testAccuracyOnIrisDataset()
|
public function testPredictArrayOfSamplesUsingChebyshevDistanceMetric()
|
||||||
{
|
{
|
||||||
$dataset = new RandomSplit(new Iris(), $testSize = 0.5, $seed = 123);
|
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||||
$classifier = new KNearestNeighbors($k = 4);
|
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||||
$classifier->train($dataset->getTrainSamples(), $dataset->getTrainLabels());
|
|
||||||
$predicted = $classifier->predict($dataset->getTestSamples());
|
|
||||||
$score = Accuracy::score($dataset->getTestLabels(), $predicted);
|
|
||||||
|
|
||||||
$this->assertEquals(0.96, $score);
|
$testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]];
|
||||||
|
$testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'];
|
||||||
|
|
||||||
|
$classifier = new KNearestNeighbors(3, new Chebyshev());
|
||||||
|
$classifier->train($trainSamples, $trainLabels);
|
||||||
|
$predicted = $classifier->predict($testSamples);
|
||||||
|
|
||||||
|
$this->assertEquals($testLabels, $predicted);
|
||||||
}
|
}
|
||||||
}
|
}
|
38
tests/Phpml/Classification/NaiveBayesTest.php
Normal file
38
tests/Phpml/Classification/NaiveBayesTest.php
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Classification;
|
||||||
|
|
||||||
|
use Phpml\Classification\NaiveBayes;
|
||||||
|
|
||||||
|
class NaiveBayesTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testPredictSingleSample()
|
||||||
|
{
|
||||||
|
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||||
|
$labels = ['a', 'b', 'c'];
|
||||||
|
|
||||||
|
$classifier = new NaiveBayes();
|
||||||
|
$classifier->train($samples, $labels);
|
||||||
|
|
||||||
|
$this->assertEquals('a', $classifier->predict([3, 1, 1]));
|
||||||
|
$this->assertEquals('b', $classifier->predict([1, 4, 1]));
|
||||||
|
$this->assertEquals('c', $classifier->predict([1, 1, 6]));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testPredictArrayOfSamples()
|
||||||
|
{
|
||||||
|
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||||
|
$trainLabels = ['a', 'b', 'c'];
|
||||||
|
|
||||||
|
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]];
|
||||||
|
$testLabels = ['a', 'a', 'c', 'c', 'b', 'b', 'a', 'a'];
|
||||||
|
|
||||||
|
$classifier = new NaiveBayes();
|
||||||
|
$classifier->train($trainSamples, $trainLabels);
|
||||||
|
$predicted = $classifier->predict($testSamples);
|
||||||
|
|
||||||
|
$this->assertEquals($testLabels, $predicted);
|
||||||
|
}
|
||||||
|
}
|
33
tests/Phpml/Clustering/DBSCANTest.php
Normal file
33
tests/Phpml/Clustering/DBSCANTest.php
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Clustering;
|
||||||
|
|
||||||
|
use Phpml\Clustering\DBSCAN;
|
||||||
|
|
||||||
|
class DBSCANTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testDBSCANSamplesClustering()
|
||||||
|
{
|
||||||
|
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||||
|
$clustered = [
|
||||||
|
[[1, 1], [1, 2], [2, 1]],
|
||||||
|
[[8, 7], [7, 8], [8, 9]],
|
||||||
|
];
|
||||||
|
|
||||||
|
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3);
|
||||||
|
|
||||||
|
$this->assertEquals($clustered, $dbscan->cluster($samples));
|
||||||
|
|
||||||
|
$samples = [[1, 1], [6, 6], [1, -1], [5, 6], [-1, -1], [7, 8], [-1, 1], [7, 7]];
|
||||||
|
$clustered = [
|
||||||
|
[[1, 1], [1, -1], [-1, -1], [-1, 1]],
|
||||||
|
[[6, 6], [5, 6], [7, 8], [7, 7]],
|
||||||
|
];
|
||||||
|
|
||||||
|
$dbscan = new DBSCAN($epsilon = 3, $minSamples = 4);
|
||||||
|
|
||||||
|
$this->assertEquals($clustered, $dbscan->cluster($samples));
|
||||||
|
}
|
||||||
|
}
|
51
tests/Phpml/Clustering/KMeansTest.php
Normal file
51
tests/Phpml/Clustering/KMeansTest.php
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Clustering;
|
||||||
|
|
||||||
|
use Phpml\Clustering\KMeans;
|
||||||
|
|
||||||
|
class KMeansTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testKMeansSamplesClustering()
|
||||||
|
{
|
||||||
|
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||||
|
|
||||||
|
$kmeans = new KMeans(2);
|
||||||
|
$clusters = $kmeans->cluster($samples);
|
||||||
|
|
||||||
|
$this->assertEquals(2, count($clusters));
|
||||||
|
|
||||||
|
foreach ($samples as $index => $sample) {
|
||||||
|
if (in_array($sample, $clusters[0]) || in_array($sample, $clusters[1])) {
|
||||||
|
unset($samples[$index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->assertEquals(0, count($samples));
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testKMeansInitializationMethods()
|
||||||
|
{
|
||||||
|
$samples = [
|
||||||
|
[180, 155], [186, 159], [119, 185], [141, 147], [157, 158],
|
||||||
|
[176, 122], [194, 160], [113, 193], [190, 148], [152, 154],
|
||||||
|
[162, 146], [188, 144], [185, 124], [163, 114], [151, 140],
|
||||||
|
[175, 131], [186, 162], [181, 195], [147, 122], [143, 195],
|
||||||
|
[171, 119], [117, 165], [169, 121], [159, 160], [159, 112],
|
||||||
|
[115, 122], [149, 193], [156, 135], [118, 120], [139, 159],
|
||||||
|
[150, 115], [181, 136], [167, 162], [132, 115], [175, 165],
|
||||||
|
[110, 147], [175, 118], [113, 145], [130, 162], [195, 179],
|
||||||
|
[164, 111], [192, 114], [194, 149], [139, 113], [160, 168],
|
||||||
|
[162, 110], [174, 144], [137, 142], [197, 160], [147, 173],
|
||||||
|
];
|
||||||
|
|
||||||
|
$kmeans = new KMeans(4, KMeans::INIT_KMEANS_PLUS_PLUS);
|
||||||
|
$clusters = $kmeans->cluster($samples);
|
||||||
|
$this->assertEquals(4, count($clusters));
|
||||||
|
|
||||||
|
$kmeans = new KMeans(4, KMeans::INIT_RANDOM);
|
||||||
|
$clusters = $kmeans->cluster($samples);
|
||||||
|
$this->assertEquals(4, count($clusters));
|
||||||
|
}
|
||||||
|
}
|
@ -16,7 +16,7 @@ class CsvDatasetTest extends \PHPUnit_Framework_TestCase
|
|||||||
new CsvDataset('missingFile', 3);
|
new CsvDataset('missingFile', 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testSampleCsvDataset()
|
public function testSampleCsvDatasetWithHeaderRow()
|
||||||
{
|
{
|
||||||
$filePath = dirname(__FILE__).'/Resources/dataset.csv';
|
$filePath = dirname(__FILE__).'/Resources/dataset.csv';
|
||||||
|
|
||||||
@ -25,4 +25,14 @@ class CsvDatasetTest extends \PHPUnit_Framework_TestCase
|
|||||||
$this->assertEquals(10, count($dataset->getSamples()));
|
$this->assertEquals(10, count($dataset->getSamples()));
|
||||||
$this->assertEquals(10, count($dataset->getLabels()));
|
$this->assertEquals(10, count($dataset->getLabels()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function testSampleCsvDatasetWithoutHeaderRow()
|
||||||
|
{
|
||||||
|
$filePath = dirname(__FILE__).'/Resources/dataset.csv';
|
||||||
|
|
||||||
|
$dataset = new CsvDataset($filePath, 2, false);
|
||||||
|
|
||||||
|
$this->assertEquals(11, count($dataset->getSamples()));
|
||||||
|
$this->assertEquals(11, count($dataset->getLabels()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
22
tests/Phpml/Dataset/Demo/GlassTest.php
Normal file
22
tests/Phpml/Dataset/Demo/GlassTest.php
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Dataset\Demo;
|
||||||
|
|
||||||
|
use Phpml\Dataset\Demo\Glass;
|
||||||
|
|
||||||
|
class GlassTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testLoadingWineDataset()
|
||||||
|
{
|
||||||
|
$glass = new Glass();
|
||||||
|
|
||||||
|
// whole dataset
|
||||||
|
$this->assertEquals(214, count($glass->getSamples()));
|
||||||
|
$this->assertEquals(214, count($glass->getLabels()));
|
||||||
|
|
||||||
|
// one sample features count
|
||||||
|
$this->assertEquals(9, count($glass->getSamples()[0]));
|
||||||
|
}
|
||||||
|
}
|
22
tests/Phpml/Dataset/Demo/WineTest.php
Normal file
22
tests/Phpml/Dataset/Demo/WineTest.php
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Dataset\Demo;
|
||||||
|
|
||||||
|
use Phpml\Dataset\Demo\Wine;
|
||||||
|
|
||||||
|
class WineTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testLoadingWineDataset()
|
||||||
|
{
|
||||||
|
$wine = new Wine();
|
||||||
|
|
||||||
|
// whole dataset
|
||||||
|
$this->assertEquals(178, count($wine->getSamples()));
|
||||||
|
$this->assertEquals(178, count($wine->getLabels()));
|
||||||
|
|
||||||
|
// one sample features count
|
||||||
|
$this->assertEquals(13, count($wine->getSamples()[0]));
|
||||||
|
}
|
||||||
|
}
|
64
tests/Phpml/Math/Distance/ChebyshevTest.php
Normal file
64
tests/Phpml/Math/Distance/ChebyshevTest.php
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Metric;
|
||||||
|
|
||||||
|
use Phpml\Math\Distance\Chebyshev;
|
||||||
|
|
||||||
|
class ChebyshevTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var Chebyshev
|
||||||
|
*/
|
||||||
|
private $distanceMetric;
|
||||||
|
|
||||||
|
public function setUp()
|
||||||
|
{
|
||||||
|
$this->distanceMetric = new Chebyshev();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnInvalidArguments()
|
||||||
|
{
|
||||||
|
$a = [0, 1, 2];
|
||||||
|
$b = [0, 2];
|
||||||
|
|
||||||
|
$this->distanceMetric->distance($a, $b);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForOneDimension()
|
||||||
|
{
|
||||||
|
$a = [4];
|
||||||
|
$b = [2];
|
||||||
|
|
||||||
|
$expectedDistance = 2;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForTwoDimensions()
|
||||||
|
{
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 2;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForThreeDimensions()
|
||||||
|
{
|
||||||
|
$a = [6, 10, 3];
|
||||||
|
$b = [2, 5, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 5;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
}
|
64
tests/Phpml/Math/Distance/EuclideanTest.php
Normal file
64
tests/Phpml/Math/Distance/EuclideanTest.php
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Metric;
|
||||||
|
|
||||||
|
use Phpml\Math\Distance\Euclidean;
|
||||||
|
|
||||||
|
class EuclideanTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var Euclidean
|
||||||
|
*/
|
||||||
|
private $distanceMetric;
|
||||||
|
|
||||||
|
public function setUp()
|
||||||
|
{
|
||||||
|
$this->distanceMetric = new Euclidean();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnInvalidArguments()
|
||||||
|
{
|
||||||
|
$a = [0, 1, 2];
|
||||||
|
$b = [0, 2];
|
||||||
|
|
||||||
|
$this->distanceMetric->distance($a, $b);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForOneDimension()
|
||||||
|
{
|
||||||
|
$a = [4];
|
||||||
|
$b = [2];
|
||||||
|
|
||||||
|
$expectedDistance = 2;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForTwoDimensions()
|
||||||
|
{
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 2.2360679774998;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForThreeDimensions()
|
||||||
|
{
|
||||||
|
$a = [6, 10, 3];
|
||||||
|
$b = [2, 5, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 6.7082039324993694;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
}
|
64
tests/Phpml/Math/Distance/ManhattanTest.php
Normal file
64
tests/Phpml/Math/Distance/ManhattanTest.php
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Metric;
|
||||||
|
|
||||||
|
use Phpml\Math\Distance\Manhattan;
|
||||||
|
|
||||||
|
class ManhattanTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var Manhattan
|
||||||
|
*/
|
||||||
|
private $distanceMetric;
|
||||||
|
|
||||||
|
public function setUp()
|
||||||
|
{
|
||||||
|
$this->distanceMetric = new Manhattan();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnInvalidArguments()
|
||||||
|
{
|
||||||
|
$a = [0, 1, 2];
|
||||||
|
$b = [0, 2];
|
||||||
|
|
||||||
|
$this->distanceMetric->distance($a, $b);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForOneDimension()
|
||||||
|
{
|
||||||
|
$a = [4];
|
||||||
|
$b = [2];
|
||||||
|
|
||||||
|
$expectedDistance = 2;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForTwoDimensions()
|
||||||
|
{
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 3;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForThreeDimensions()
|
||||||
|
{
|
||||||
|
$a = [6, 10, 3];
|
||||||
|
$b = [2, 5, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 11;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
}
|
77
tests/Phpml/Math/Distance/MinkowskiTest.php
Normal file
77
tests/Phpml/Math/Distance/MinkowskiTest.php
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Metric;
|
||||||
|
|
||||||
|
use Phpml\Math\Distance\Minkowski;
|
||||||
|
|
||||||
|
class MinkowskiTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @var Minkowski
|
||||||
|
*/
|
||||||
|
private $distanceMetric;
|
||||||
|
|
||||||
|
public function setUp()
|
||||||
|
{
|
||||||
|
$this->distanceMetric = new Minkowski();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnInvalidArguments()
|
||||||
|
{
|
||||||
|
$a = [0, 1, 2];
|
||||||
|
$b = [0, 2];
|
||||||
|
|
||||||
|
$this->distanceMetric->distance($a, $b);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForOneDimension()
|
||||||
|
{
|
||||||
|
$a = [4];
|
||||||
|
$b = [2];
|
||||||
|
|
||||||
|
$expectedDistance = 2;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForTwoDimensions()
|
||||||
|
{
|
||||||
|
$a = [4, 6];
|
||||||
|
$b = [2, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 2.080;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForThreeDimensions()
|
||||||
|
{
|
||||||
|
$a = [6, 10, 3];
|
||||||
|
$b = [2, 5, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 5.819;
|
||||||
|
$actualDistance = $this->distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCalculateDistanceForThreeDimensionsWithDifferentLambda()
|
||||||
|
{
|
||||||
|
$distanceMetric = new Minkowski($lambda = 5);
|
||||||
|
|
||||||
|
$a = [6, 10, 3];
|
||||||
|
$b = [2, 5, 5];
|
||||||
|
|
||||||
|
$expectedDistance = 5.300;
|
||||||
|
$actualDistance = $distanceMetric->distance($a, $b);
|
||||||
|
|
||||||
|
$this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001);
|
||||||
|
}
|
||||||
|
}
|
25
tests/Phpml/Math/Kernel/RBFTest.php
Normal file
25
tests/Phpml/Math/Kernel/RBFTest.php
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace test\Phpml\Math\Kernel;
|
||||||
|
|
||||||
|
use Phpml\Math\Kernel\RBF;
|
||||||
|
|
||||||
|
class RBFTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testComputeRBFKernelFunction()
|
||||||
|
{
|
||||||
|
$rbf = new RBF($gamma = 0.001);
|
||||||
|
|
||||||
|
$this->assertEquals(1, $rbf->compute([1, 2], [1, 2]));
|
||||||
|
$this->assertEquals(0.97336, $rbf->compute([1, 2, 3], [4, 5, 6]), '', $delta = 0.0001);
|
||||||
|
$this->assertEquals(0.00011, $rbf->compute([4, 5], [1, 100]), '', $delta = 0.0001);
|
||||||
|
|
||||||
|
$rbf = new RBF($gamma = 0.2);
|
||||||
|
|
||||||
|
$this->assertEquals(1, $rbf->compute([1, 2], [1, 2]));
|
||||||
|
$this->assertEquals(0.00451, $rbf->compute([1, 2, 3], [4, 5, 6]), '', $delta = 0.0001);
|
||||||
|
$this->assertEquals(0, $rbf->compute([4, 5], [1, 100]));
|
||||||
|
}
|
||||||
|
}
|
176
tests/Phpml/Math/MatrixTest.php
Normal file
176
tests/Phpml/Math/MatrixTest.php
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Math;
|
||||||
|
|
||||||
|
use Phpml\Math\Matrix;
|
||||||
|
|
||||||
|
class MatrixTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnInvalidMatrixSupplied()
|
||||||
|
{
|
||||||
|
new Matrix([[1, 2], [3]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCreateMatrixFromFlatArray()
|
||||||
|
{
|
||||||
|
$flatArray = [1, 2, 3, 4];
|
||||||
|
$matrix = Matrix::fromFlatArray($flatArray);
|
||||||
|
|
||||||
|
$this->assertInstanceOf(Matrix::class, $matrix);
|
||||||
|
$this->assertEquals([[1], [2], [3], [4]], $matrix->toArray());
|
||||||
|
$this->assertEquals(4, $matrix->getRows());
|
||||||
|
$this->assertEquals(1, $matrix->getColumns());
|
||||||
|
$this->assertEquals($flatArray, $matrix->getColumnValues(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\MatrixException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnInvalidColumnNumber()
|
||||||
|
{
|
||||||
|
$matrix = new Matrix([[1, 2, 3], [4, 5, 6]]);
|
||||||
|
$matrix->getColumnValues(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\MatrixException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnGetDeterminantIfArrayIsNotSquare()
|
||||||
|
{
|
||||||
|
$matrix = new Matrix([[1, 2, 3], [4, 5, 6]]);
|
||||||
|
$matrix->getDeterminant();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testGetMatrixDeterminant()
|
||||||
|
{
|
||||||
|
//http://matrix.reshish.com/determinant.php
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 3, 3],
|
||||||
|
[4, 2, 1],
|
||||||
|
[5, 6, 7],
|
||||||
|
]);
|
||||||
|
$this->assertEquals(-3, $matrix->getDeterminant());
|
||||||
|
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[1, 2, 3, 3, 2, 1],
|
||||||
|
[1 / 2, 5, 6, 7, 1, 1],
|
||||||
|
[3 / 2, 7 / 2, 2, 0, 6, 8],
|
||||||
|
[1, 8, 10, 1, 2, 2],
|
||||||
|
[1 / 4, 4, 1, 0, 2, 3 / 7],
|
||||||
|
[1, 8, 7, 5, 4, 4 / 5],
|
||||||
|
]);
|
||||||
|
$this->assertEquals(1116.5035, $matrix->getDeterminant(), '', $delta = 0.0001);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testMatrixTranspose()
|
||||||
|
{
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 3, 3],
|
||||||
|
[4, 2, 1],
|
||||||
|
[5, 6, 7],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$transposedMatrix = [
|
||||||
|
[3, 4, 5],
|
||||||
|
[3, 2, 6],
|
||||||
|
[3, 1, 7],
|
||||||
|
];
|
||||||
|
|
||||||
|
$this->assertEquals($transposedMatrix, $matrix->transpose()->toArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnMultiplyWhenInconsistentMatrixSupplied()
|
||||||
|
{
|
||||||
|
$matrix1 = new Matrix([[1, 2, 3], [4, 5, 6]]);
|
||||||
|
$matrix2 = new Matrix([[3, 2, 1], [6, 5, 4]]);
|
||||||
|
|
||||||
|
$matrix1->multiply($matrix2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testMatrixMultiplyByMatrix()
|
||||||
|
{
|
||||||
|
$matrix1 = new Matrix([
|
||||||
|
[1, 2, 3],
|
||||||
|
[4, 5, 6],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$matrix2 = new Matrix([
|
||||||
|
[7, 8],
|
||||||
|
[9, 10],
|
||||||
|
[11, 12],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$product = [
|
||||||
|
[58, 64],
|
||||||
|
[139, 154],
|
||||||
|
];
|
||||||
|
|
||||||
|
$this->assertEquals($product, $matrix1->multiply($matrix2)->toArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testDivideByScalar()
|
||||||
|
{
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[4, 6, 8],
|
||||||
|
[2, 10, 20],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$quotient = [
|
||||||
|
[2, 3, 4],
|
||||||
|
[1, 5, 10],
|
||||||
|
];
|
||||||
|
|
||||||
|
$this->assertEquals($quotient, $matrix->divideByScalar(2)->toArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\MatrixException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionWhenInverseIfArrayIsNotSquare()
|
||||||
|
{
|
||||||
|
$matrix = new Matrix([[1, 2, 3], [4, 5, 6]]);
|
||||||
|
$matrix->inverse();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testInverseMatrix()
|
||||||
|
{
|
||||||
|
//http://ncalculators.com/matrix/inverse-matrix.htm
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 4, 2],
|
||||||
|
[4, 5, 5],
|
||||||
|
[1, 1, 1],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$inverseMatrix = [
|
||||||
|
[0, -1, 5],
|
||||||
|
[1 / 2, 1 / 2, -7 / 2],
|
||||||
|
[-1 / 2, 1 / 2, -1 / 2],
|
||||||
|
];
|
||||||
|
|
||||||
|
$this->assertEquals($inverseMatrix, $matrix->inverse()->toArray(), '', $delta = 0.0001);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testCrossOutMatrix()
|
||||||
|
{
|
||||||
|
$matrix = new Matrix([
|
||||||
|
[3, 4, 2],
|
||||||
|
[4, 5, 5],
|
||||||
|
[1, 1, 1],
|
||||||
|
]);
|
||||||
|
|
||||||
|
$crossOuted = [
|
||||||
|
[3, 2],
|
||||||
|
[1, 1],
|
||||||
|
];
|
||||||
|
|
||||||
|
$this->assertEquals($crossOuted, $matrix->crossOut(1, 1)->toArray());
|
||||||
|
}
|
||||||
|
}
|
17
tests/Phpml/Math/ProductTest.php
Normal file
17
tests/Phpml/Math/ProductTest.php
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Phpml\Math;
|
||||||
|
|
||||||
|
use Phpml\Math\Product;
|
||||||
|
|
||||||
|
class ProductTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testScalarProduct()
|
||||||
|
{
|
||||||
|
$this->assertEquals(10, Product::scalar([2, 3], [-1, 4]));
|
||||||
|
$this->assertEquals(-0.1, Product::scalar([1, 4, 1], [-2, 0.5, -0.1]));
|
||||||
|
$this->assertEquals(8, Product::scalar([2], [4]));
|
||||||
|
}
|
||||||
|
}
|
38
tests/Phpml/Math/Statistic/CorrelationTest.php
Normal file
38
tests/Phpml/Math/Statistic/CorrelationTest.php
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace test\Phpml\Math\StandardDeviation;
|
||||||
|
|
||||||
|
use Phpml\Math\Statistic\Correlation;
|
||||||
|
|
||||||
|
class CorrelationTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testPearsonCorrelation()
|
||||||
|
{
|
||||||
|
//http://www.stat.wmich.edu/s216/book/node126.html
|
||||||
|
$delta = 0.001;
|
||||||
|
$x = [9300, 10565, 15000, 15000, 17764, 57000, 65940, 73676, 77006, 93739, 146088, 153260];
|
||||||
|
$y = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025];
|
||||||
|
$this->assertEquals(-0.641, Correlation::pearson($x, $y), '', $delta);
|
||||||
|
|
||||||
|
//http://www.statisticshowto.com/how-to-compute-pearsons-correlation-coefficients/
|
||||||
|
$delta = 0.001;
|
||||||
|
$x = [43, 21, 25, 42, 57, 59];
|
||||||
|
$y = [99, 65, 79, 75, 87, 82];
|
||||||
|
$this->assertEquals(0.549, Correlation::pearson($x, $y), '', $delta);
|
||||||
|
|
||||||
|
$delta = 0.001;
|
||||||
|
$x = [60, 61, 62, 63, 65];
|
||||||
|
$y = [3.1, 3.6, 3.8, 4, 4.1];
|
||||||
|
$this->assertEquals(0.911, Correlation::pearson($x, $y), '', $delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnInvalidArgumentsForPearsonCorrelation()
|
||||||
|
{
|
||||||
|
Correlation::pearson([1, 2, 4], [3, 5]);
|
||||||
|
}
|
||||||
|
}
|
18
tests/Phpml/Math/Statistic/MeanTest.php
Normal file
18
tests/Phpml/Math/Statistic/MeanTest.php
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace test\Phpml\Math\StandardDeviation;
|
||||||
|
|
||||||
|
use Phpml\Math\Statistic\Mean;
|
||||||
|
|
||||||
|
class MeanTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testArithmeticMean()
|
||||||
|
{
|
||||||
|
$delta = 0.01;
|
||||||
|
$this->assertEquals(3.5, Mean::arithmetic([2, 5]), '', $delta);
|
||||||
|
$this->assertEquals(41.16, Mean::arithmetic([43, 21, 25, 42, 57, 59]), '', $delta);
|
||||||
|
$this->assertEquals(1.7, Mean::arithmetic([0.5, 0.5, 1.5, 2.5, 3.5]), '', $delta);
|
||||||
|
}
|
||||||
|
}
|
42
tests/Phpml/Math/Statistic/StandardDeviationTest.php
Normal file
42
tests/Phpml/Math/Statistic/StandardDeviationTest.php
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace test\Phpml\Math\StandardDeviation;
|
||||||
|
|
||||||
|
use Phpml\Math\Statistic\StandardDeviation;
|
||||||
|
|
||||||
|
class StandardDeviationTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testStandardDeviationOfPopulationSample()
|
||||||
|
{
|
||||||
|
//https://pl.wikipedia.org/wiki/Odchylenie_standardowe
|
||||||
|
$delta = 0.001;
|
||||||
|
$population = [5, 6, 8, 9];
|
||||||
|
$this->assertEquals(1.825, StandardDeviation::population($population), '', $delta);
|
||||||
|
|
||||||
|
//http://www.stat.wmich.edu/s216/book/node126.html
|
||||||
|
$delta = 0.5;
|
||||||
|
$population = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025];
|
||||||
|
$this->assertEquals(4079, StandardDeviation::population($population), '', $delta);
|
||||||
|
|
||||||
|
$population = [9300, 10565, 15000, 15000, 17764, 57000, 65940, 73676, 77006, 93739, 146088, 153260];
|
||||||
|
$this->assertEquals(50989, StandardDeviation::population($population), '', $delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnEmptyArrayIfNotSample()
|
||||||
|
{
|
||||||
|
StandardDeviation::population([], false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @expectedException \Phpml\Exception\InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function testThrowExceptionOnToSmallArray()
|
||||||
|
{
|
||||||
|
StandardDeviation::population([1]);
|
||||||
|
}
|
||||||
|
}
|
@ -1,51 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
declare (strict_types = 1);
|
|
||||||
|
|
||||||
namespace tests\Phpml\Metric;
|
|
||||||
|
|
||||||
use Phpml\Metric\Distance;
|
|
||||||
|
|
||||||
class DistanceTest extends \PHPUnit_Framework_TestCase
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* @expectedException \Phpml\Exception\InvalidArgumentException
|
|
||||||
*/
|
|
||||||
public function testThrowExceptionOnInvalidArgumentsInEuclidean()
|
|
||||||
{
|
|
||||||
$a = [0, 1, 2];
|
|
||||||
$b = [0, 2];
|
|
||||||
|
|
||||||
Distance::euclidean($a, $b);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function testCalculateEuclideanDistanceForOneDimension()
|
|
||||||
{
|
|
||||||
$a = [4];
|
|
||||||
$b = [2];
|
|
||||||
|
|
||||||
$expectedDistance = 2;
|
|
||||||
$actualDistance = Distance::euclidean($a, $b);
|
|
||||||
|
|
||||||
$this->assertEquals($expectedDistance, $actualDistance);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function testCalculateEuclideanDistanceForTwoAndMoreDimension()
|
|
||||||
{
|
|
||||||
$a = [4, 6];
|
|
||||||
$b = [2, 5];
|
|
||||||
|
|
||||||
$expectedDistance = 2.2360679774998;
|
|
||||||
$actualDistance = Distance::euclidean($a, $b);
|
|
||||||
|
|
||||||
$this->assertEquals($expectedDistance, $actualDistance);
|
|
||||||
|
|
||||||
$a = [6, 10, 3];
|
|
||||||
$b = [2, 5, 5];
|
|
||||||
|
|
||||||
$expectedDistance = 6.7082039324993694;
|
|
||||||
$actualDistance = Distance::euclidean($a, $b);
|
|
||||||
|
|
||||||
$this->assertEquals($expectedDistance, $actualDistance);
|
|
||||||
}
|
|
||||||
}
|
|
68
tests/Phpml/Regression/LeastSquaresTest.php
Normal file
68
tests/Phpml/Regression/LeastSquaresTest.php
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
|
namespace tests\Regression;
|
||||||
|
|
||||||
|
use Phpml\Regression\LeastSquares;
|
||||||
|
|
||||||
|
class LeastSquaresTest extends \PHPUnit_Framework_TestCase
|
||||||
|
{
|
||||||
|
public function testPredictSingleFeatureSamples()
|
||||||
|
{
|
||||||
|
$delta = 0.01;
|
||||||
|
|
||||||
|
//https://www.easycalculation.com/analytical/learn-least-square-regression.php
|
||||||
|
$samples = [[60], [61], [62], [63], [65]];
|
||||||
|
$targets = [3.1, 3.6, 3.8, 4, 4.1];
|
||||||
|
|
||||||
|
$regression = new LeastSquares();
|
||||||
|
$regression->train($samples, $targets);
|
||||||
|
|
||||||
|
$this->assertEquals(4.06, $regression->predict([64]), '', $delta);
|
||||||
|
|
||||||
|
//http://www.stat.wmich.edu/s216/book/node127.html
|
||||||
|
$samples = [[9300], [10565], [15000], [15000], [17764], [57000], [65940], [73676], [77006], [93739], [146088], [153260]];
|
||||||
|
$targets = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025];
|
||||||
|
|
||||||
|
$regression = new LeastSquares();
|
||||||
|
$regression->train($samples, $targets);
|
||||||
|
|
||||||
|
$this->assertEquals(7659.35, $regression->predict([9300]), '', $delta);
|
||||||
|
$this->assertEquals(5213.81, $regression->predict([57000]), '', $delta);
|
||||||
|
$this->assertEquals(4188.13, $regression->predict([77006]), '', $delta);
|
||||||
|
$this->assertEquals(7659.35, $regression->predict([9300]), '', $delta);
|
||||||
|
$this->assertEquals(278.66, $regression->predict([153260]), '', $delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testPredictSingleFeatureSamplesWithMatrixTargets()
|
||||||
|
{
|
||||||
|
$delta = 0.01;
|
||||||
|
|
||||||
|
//https://www.easycalculation.com/analytical/learn-least-square-regression.php
|
||||||
|
$samples = [[60], [61], [62], [63], [65]];
|
||||||
|
$targets = [[3.1], [3.6], [3.8], [4], [4.1]];
|
||||||
|
|
||||||
|
$regression = new LeastSquares();
|
||||||
|
$regression->train($samples, $targets);
|
||||||
|
|
||||||
|
$this->assertEquals(4.06, $regression->predict([64]), '', $delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testPredictMultiFeaturesSamples()
|
||||||
|
{
|
||||||
|
$delta = 0.01;
|
||||||
|
|
||||||
|
//http://www.stat.wmich.edu/s216/book/node129.html
|
||||||
|
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
||||||
|
$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400];
|
||||||
|
|
||||||
|
$regression = new LeastSquares();
|
||||||
|
$regression->train($samples, $targets);
|
||||||
|
|
||||||
|
$this->assertEquals(-800614.957, $regression->getIntercept(), '', $delta);
|
||||||
|
$this->assertEquals([-0.0327, 404.14], $regression->getCoefficients(), '', $delta);
|
||||||
|
$this->assertEquals(4094.82, $regression->predict([60000, 1996]), '', $delta);
|
||||||
|
$this->assertEquals(5711.40, $regression->predict([60000, 2000]), '', $delta);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user