diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..247e222 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,5 @@ +language: php +php: + - '7.0' +before_script: composer install +script: bin/phpunit \ No newline at end of file diff --git a/README.md b/README.md index feac3e5..d3f65b7 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,25 @@ [![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop) -Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... +Fresh approach to Machine Learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... + +Simple example of classification: +```php +use Phpml\Classification\KNearestNeighbors; + +$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; +$labels = ['a', 'a', 'a', 'b', 'b', 'b']; + +$classifier = new KNearestNeighbors(); +$classifier->train($samples, $labels); + +$classifier->predict([3, 2]); +// return 'b' +``` ## Documentation -To find out how to use PHP-ML follow [Documentation](php-ml.readthedocs.org). +To find out how to use PHP-ML follow [Documentation](http://php-ml.readthedocs.org/). ## Installation @@ -20,14 +34,33 @@ Currently this library is in the process of developing, but You can install it w composer require php-ai/php-ml ``` -## To-Do +## Features -* implements more algorithms -* integration with Lavacharts for data visualization +* Classification + * [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/) + * [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/) +* Regression + * [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/) +* Clustering + * [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means) + * [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan) +* Cross Validation + * [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split) +* Datasets + * [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset) + * Ready to use: + * [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/) +* Math + * [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/) + * [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/) + -## Testing +## Contribute -After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer) +- Issue Tracker: github.com/php-ai/php-ml/issues +- Source Code: github.com/php-ai/php-ml + +After installation, you can launch the test suite in project root directory (you will need to install dev requirements with Composer) ``` bin/phpunit @@ -39,4 +72,4 @@ PHP-ML is released under the MIT Licence. See the bundled LICENSE file for detai ## Author -Arkadiusz Kondas (@ArkadiuszKondas) \ No newline at end of file +Arkadiusz Kondas (@ArkadiuszKondas) diff --git a/data/glass.csv b/data/glass.csv new file mode 100644 index 0000000..77522db --- /dev/null +++ b/data/glass.csv @@ -0,0 +1,215 @@ +RI: refractive index,Na: Sodium,Mg: Magnesium,Al: Aluminum,Si: Silicon,K: Potassium,Ca: Calcium,Ba: Barium,Fe: Iron,type of glass +1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,building_windows_float_processed +1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,building_windows_float_processed +1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,building_windows_float_processed +1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,building_windows_float_processed +1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,building_windows_float_processed +1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,building_windows_float_processed +1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,building_windows_float_processed +1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,building_windows_float_processed +1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,building_windows_float_processed +1.51755,13.00,3.60,1.36,72.99,0.57,8.40,0.00,0.11,building_windows_float_processed +1.51571,12.72,3.46,1.56,73.20,0.67,8.09,0.00,0.24,building_windows_float_processed +1.51763,12.80,3.66,1.27,73.01,0.60,8.56,0.00,0.00,building_windows_float_processed +1.51589,12.88,3.43,1.40,73.28,0.69,8.05,0.00,0.24,building_windows_float_processed +1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.00,0.17,building_windows_float_processed +1.51763,12.61,3.59,1.31,73.29,0.58,8.50,0.00,0.00,building_windows_float_processed +1.51761,12.81,3.54,1.23,73.24,0.58,8.39,0.00,0.00,building_windows_float_processed +1.51784,12.68,3.67,1.16,73.11,0.61,8.70,0.00,0.00,building_windows_float_processed +1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.00,0.00,building_windows_float_processed +1.51911,13.90,3.73,1.18,72.12,0.06,8.89,0.00,0.00,building_windows_float_processed +1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.00,0.07,building_windows_float_processed +1.51750,12.82,3.55,1.49,72.75,0.54,8.52,0.00,0.19,building_windows_float_processed +1.51966,14.77,3.75,0.29,72.02,0.03,9.00,0.00,0.00,building_windows_float_processed +1.51736,12.78,3.62,1.29,72.79,0.59,8.70,0.00,0.00,building_windows_float_processed +1.51751,12.81,3.57,1.35,73.02,0.62,8.59,0.00,0.00,building_windows_float_processed +1.51720,13.38,3.50,1.15,72.85,0.50,8.43,0.00,0.00,building_windows_float_processed +1.51764,12.98,3.54,1.21,73.00,0.65,8.53,0.00,0.00,building_windows_float_processed +1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,building_windows_float_processed +1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.00,0.00,building_windows_float_processed +1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.00,0.00,building_windows_float_processed +1.51784,13.08,3.49,1.28,72.86,0.60,8.49,0.00,0.00,building_windows_float_processed +1.51768,12.65,3.56,1.30,73.08,0.61,8.69,0.00,0.14,building_windows_float_processed +1.51747,12.84,3.50,1.14,73.27,0.56,8.55,0.00,0.00,building_windows_float_processed +1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,building_windows_float_processed +1.51753,12.57,3.47,1.38,73.39,0.60,8.55,0.00,0.06,building_windows_float_processed +1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.00,0.00,building_windows_float_processed +1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.00,0.00,building_windows_float_processed +1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.00,building_windows_float_processed +1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.00,0.00,building_windows_float_processed +1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,building_windows_float_processed +1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.00,0.00,building_windows_float_processed +1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.00,0.00,building_windows_float_processed +1.51755,12.71,3.42,1.20,73.20,0.59,8.64,0.00,0.00,building_windows_float_processed +1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.00,0.00,building_windows_float_processed +1.52210,13.73,3.84,0.72,71.76,0.17,9.74,0.00,0.00,building_windows_float_processed +1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.00,0.30,building_windows_float_processed +1.51900,13.49,3.48,1.35,71.95,0.55,9.00,0.00,0.00,building_windows_float_processed +1.51869,13.19,3.37,1.18,72.72,0.57,8.83,0.00,0.16,building_windows_float_processed +1.52667,13.99,3.70,0.71,71.57,0.02,9.82,0.00,0.10,building_windows_float_processed +1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.00,0.00,building_windows_float_processed +1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.00,0.00,building_windows_float_processed +1.52320,13.72,3.72,0.51,71.75,0.09,10.06,0.00,0.16,building_windows_float_processed +1.51926,13.20,3.33,1.28,72.36,0.60,9.14,0.00,0.11,building_windows_float_processed +1.51808,13.43,2.87,1.19,72.84,0.55,9.03,0.00,0.00,building_windows_float_processed +1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.00,0.00,building_windows_float_processed +1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.00,0.09,building_windows_float_processed +1.51769,12.45,2.71,1.29,73.70,0.56,9.06,0.00,0.24,building_windows_float_processed +1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.00,0.31,building_windows_float_processed +1.51824,12.87,3.48,1.29,72.95,0.60,8.43,0.00,0.00,building_windows_float_processed +1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.00,0.00,building_windows_float_processed +1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.00,0.11,building_windows_float_processed +1.51905,13.60,3.62,1.11,72.64,0.14,8.76,0.00,0.00,building_windows_float_processed +1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.00,building_windows_float_processed +1.52172,13.51,3.86,0.88,71.79,0.23,9.54,0.00,0.11,building_windows_float_processed +1.52227,14.17,3.81,0.78,71.35,0.00,9.69,0.00,0.00,building_windows_float_processed +1.52172,13.48,3.74,0.90,72.01,0.18,9.61,0.00,0.07,building_windows_float_processed +1.52099,13.69,3.59,1.12,71.96,0.09,9.40,0.00,0.00,building_windows_float_processed +1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.00,0.17,building_windows_float_processed +1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.00,0.17,building_windows_float_processed +1.52152,13.12,3.58,0.90,72.20,0.23,9.82,0.00,0.16,building_windows_float_processed +1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,building_windows_float_processed +1.51574,14.86,3.67,1.74,71.87,0.16,7.36,0.00,0.12,building_windows_non_float_processed +1.51848,13.64,3.87,1.27,71.96,0.54,8.32,0.00,0.32,building_windows_non_float_processed +1.51593,13.09,3.59,1.52,73.10,0.67,7.83,0.00,0.00,building_windows_non_float_processed +1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.00,0.00,building_windows_non_float_processed +1.51596,13.02,3.56,1.54,73.11,0.72,7.90,0.00,0.00,building_windows_non_float_processed +1.51590,13.02,3.58,1.51,73.12,0.69,7.96,0.00,0.00,building_windows_non_float_processed +1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.00,0.00,building_windows_non_float_processed +1.51627,13.00,3.58,1.54,72.83,0.61,8.04,0.00,0.00,building_windows_non_float_processed +1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.00,0.14,building_windows_non_float_processed +1.51590,12.82,3.52,1.90,72.86,0.69,7.97,0.00,0.00,building_windows_non_float_processed +1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.00,0.00,building_windows_non_float_processed +1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.00,0.00,building_windows_non_float_processed +1.51646,13.41,3.55,1.25,72.81,0.68,8.10,0.00,0.00,building_windows_non_float_processed +1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.00,0.09,building_windows_non_float_processed +1.51409,14.25,3.09,2.08,72.28,1.10,7.08,0.00,0.00,building_windows_non_float_processed +1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.00,0.00,building_windows_non_float_processed +1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.00,0.00,building_windows_non_float_processed +1.51645,13.40,3.49,1.52,72.65,0.67,8.08,0.00,0.10,building_windows_non_float_processed +1.51618,13.01,3.50,1.48,72.89,0.60,8.12,0.00,0.00,building_windows_non_float_processed +1.51640,12.55,3.48,1.87,73.23,0.63,8.08,0.00,0.09,building_windows_non_float_processed +1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.00,0.22,building_windows_non_float_processed +1.51605,12.90,3.44,1.45,73.06,0.44,8.27,0.00,0.00,building_windows_non_float_processed +1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.00,0.19,building_windows_non_float_processed +1.51590,13.24,3.34,1.47,73.10,0.39,8.22,0.00,0.00,building_windows_non_float_processed +1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.00,0.00,building_windows_non_float_processed +1.51860,13.36,3.43,1.43,72.26,0.51,8.60,0.00,0.00,building_windows_non_float_processed +1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.00,0.15,building_windows_non_float_processed +1.51743,12.20,3.25,1.16,73.55,0.62,8.90,0.00,0.24,building_windows_non_float_processed +1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.00,0.00,building_windows_non_float_processed +1.51811,12.96,2.96,1.43,72.92,0.60,8.79,0.14,0.00,building_windows_non_float_processed +1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,building_windows_non_float_processed +1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,building_windows_non_float_processed +1.51820,12.62,2.76,0.83,73.81,0.35,9.42,0.00,0.20,building_windows_non_float_processed +1.52725,13.80,3.15,0.66,70.57,0.08,11.64,0.00,0.00,building_windows_non_float_processed +1.52410,13.83,2.90,1.17,71.15,0.08,10.79,0.00,0.00,building_windows_non_float_processed +1.52475,11.45,0.00,1.88,72.19,0.81,13.24,0.00,0.34,building_windows_non_float_processed +1.53125,10.73,0.00,2.10,69.81,0.58,13.30,3.15,0.28,building_windows_non_float_processed +1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.00,0.24,building_windows_non_float_processed +1.52222,14.43,0.00,1.00,72.67,0.10,11.52,0.00,0.08,building_windows_non_float_processed +1.51818,13.72,0.00,0.56,74.45,0.00,10.99,0.00,0.00,building_windows_non_float_processed +1.52664,11.23,0.00,0.77,73.21,0.00,14.68,0.00,0.00,building_windows_non_float_processed +1.52739,11.02,0.00,0.75,73.08,0.00,14.96,0.00,0.00,building_windows_non_float_processed +1.52777,12.64,0.00,0.67,72.02,0.06,14.40,0.00,0.00,building_windows_non_float_processed +1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.00,0.14,building_windows_non_float_processed +1.51847,13.10,3.97,1.19,72.44,0.60,8.43,0.00,0.00,building_windows_non_float_processed +1.51846,13.41,3.89,1.33,72.38,0.51,8.28,0.00,0.00,building_windows_non_float_processed +1.51829,13.24,3.90,1.41,72.33,0.55,8.31,0.00,0.10,building_windows_non_float_processed +1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.00,0.00,building_windows_non_float_processed +1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,building_windows_non_float_processed +1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.00,0.00,building_windows_non_float_processed +1.51844,13.25,3.76,1.32,72.40,0.58,8.42,0.00,0.00,building_windows_non_float_processed +1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.00,0.21,building_windows_non_float_processed +1.51687,13.23,3.54,1.48,72.84,0.56,8.10,0.00,0.00,building_windows_non_float_processed +1.51707,13.48,3.48,1.71,72.52,0.62,7.99,0.00,0.00,building_windows_non_float_processed +1.52177,13.20,3.68,1.15,72.75,0.54,8.52,0.00,0.00,building_windows_non_float_processed +1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.00,0.12,building_windows_non_float_processed +1.51667,12.94,3.61,1.26,72.75,0.56,8.60,0.00,0.00,building_windows_non_float_processed +1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.00,0.17,building_windows_non_float_processed +1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,building_windows_non_float_processed +1.52020,13.98,1.35,1.63,71.76,0.39,10.56,0.00,0.18,building_windows_non_float_processed +1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.00,0.00,building_windows_non_float_processed +1.52614,13.70,0.00,1.36,71.24,0.19,13.44,0.00,0.10,building_windows_non_float_processed +1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.00,0.00,building_windows_non_float_processed +1.51800,13.71,3.93,1.54,71.81,0.54,8.21,0.00,0.15,building_windows_non_float_processed +1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.00,0.00,building_windows_non_float_processed +1.51789,13.19,3.90,1.30,72.33,0.55,8.44,0.00,0.28,building_windows_non_float_processed +1.51806,13.00,3.80,1.08,73.07,0.56,8.38,0.00,0.12,building_windows_non_float_processed +1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.00,0.00,building_windows_non_float_processed +1.51674,12.79,3.52,1.54,73.36,0.66,7.90,0.00,0.00,building_windows_non_float_processed +1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.00,0.00,building_windows_non_float_processed +1.51690,13.33,3.54,1.61,72.54,0.68,8.11,0.00,0.00,building_windows_non_float_processed +1.51851,13.20,3.63,1.07,72.83,0.57,8.41,0.09,0.17,building_windows_non_float_processed +1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,building_windows_non_float_processed +1.51709,13.00,3.47,1.79,72.72,0.66,8.18,0.00,0.00,building_windows_non_float_processed +1.51660,12.99,3.18,1.23,72.97,0.58,8.81,0.00,0.24,building_windows_non_float_processed +1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.00,0.35,building_windows_non_float_processed +1.51769,13.65,3.66,1.11,72.77,0.11,8.60,0.00,0.00,vehicle_windows_float_processed +1.51610,13.33,3.53,1.34,72.67,0.56,8.33,0.00,0.00,vehicle_windows_float_processed +1.51670,13.24,3.57,1.38,72.70,0.56,8.44,0.00,0.10,vehicle_windows_float_processed +1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.00,0.00,vehicle_windows_float_processed +1.51665,13.14,3.45,1.76,72.48,0.60,8.38,0.00,0.17,vehicle_windows_float_processed +1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,vehicle_windows_float_processed +1.51779,13.64,3.65,0.65,73.00,0.06,8.93,0.00,0.00,vehicle_windows_float_processed +1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.00,0.00,vehicle_windows_float_processed +1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.00,0.00,vehicle_windows_float_processed +1.51646,13.04,3.40,1.26,73.01,0.52,8.58,0.00,0.00,vehicle_windows_float_processed +1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.00,0.00,vehicle_windows_float_processed +1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.00,0.00,vehicle_windows_float_processed +1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.00,0.00,vehicle_windows_float_processed +1.51796,13.50,3.36,1.63,71.94,0.57,8.81,0.00,0.09,vehicle_windows_float_processed +1.51832,13.33,3.34,1.54,72.14,0.56,8.99,0.00,0.00,vehicle_windows_float_processed +1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,vehicle_windows_float_processed +1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,vehicle_windows_float_processed +1.51514,14.01,2.68,3.50,69.89,1.68,5.87,2.20,0.00,containers +1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,containers +1.52171,11.56,1.88,1.56,72.86,0.47,11.41,0.00,0.00,containers +1.52151,11.03,1.71,1.56,73.44,0.58,11.62,0.00,0.00,containers +1.51969,12.64,0.00,1.65,73.75,0.38,11.53,0.00,0.00,containers +1.51666,12.86,0.00,1.83,73.88,0.97,10.17,0.00,0.00,containers +1.51994,13.27,0.00,1.76,73.03,0.47,11.32,0.00,0.00,containers +1.52369,13.44,0.00,1.58,72.22,0.32,12.24,0.00,0.00,containers +1.51316,13.02,0.00,3.04,70.48,6.21,6.96,0.00,0.00,containers +1.51321,13.00,0.00,3.02,70.70,6.21,6.93,0.00,0.00,containers +1.52043,13.38,0.00,1.40,72.25,0.33,12.50,0.00,0.00,containers +1.52058,12.85,1.61,2.17,72.18,0.76,9.70,0.24,0.51,containers +1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.00,0.28,containers +1.51905,14.00,2.39,1.56,72.37,0.00,9.57,0.00,0.00,tableware +1.51937,13.79,2.41,1.19,72.76,0.00,9.77,0.00,0.00,tableware +1.51829,14.46,2.24,1.62,72.38,0.00,9.26,0.00,0.00,tableware +1.51852,14.09,2.19,1.66,72.67,0.00,9.32,0.00,0.00,tableware +1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.00,0.00,tableware +1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,tableware +1.51916,14.15,0.00,2.09,72.74,0.00,10.88,0.00,0.00,tableware +1.51969,14.56,0.00,0.56,73.48,0.00,11.22,0.00,0.00,tableware +1.51115,17.38,0.00,0.34,75.41,0.00,6.65,0.00,0.00,tableware +1.51131,13.69,3.20,1.81,72.81,1.76,5.43,1.19,0.00,headlamps +1.51838,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.00,headlamps +1.52315,13.44,3.34,1.23,72.38,0.60,8.83,0.00,0.00,headlamps +1.52247,14.86,2.20,2.06,70.26,0.76,9.76,0.00,0.00,headlamps +1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.00,headlamps +1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,headlamps +1.51602,14.85,0.00,2.38,73.28,0.00,8.76,0.64,0.09,headlamps +1.51623,14.20,0.00,2.79,73.46,0.04,9.04,0.40,0.09,headlamps +1.51719,14.75,0.00,2.00,73.02,0.00,8.53,1.59,0.08,headlamps +1.51683,14.56,0.00,1.98,73.29,0.00,8.52,1.57,0.07,headlamps +1.51545,14.14,0.00,2.68,73.39,0.08,9.07,0.61,0.05,headlamps +1.51556,13.87,0.00,2.54,73.23,0.14,9.41,0.81,0.01,headlamps +1.51727,14.70,0.00,2.34,73.28,0.00,8.95,0.66,0.00,headlamps +1.51531,14.38,0.00,2.66,73.10,0.04,9.08,0.64,0.00,headlamps +1.51609,15.01,0.00,2.51,73.05,0.05,8.83,0.53,0.00,headlamps +1.51508,15.15,0.00,2.25,73.50,0.00,8.34,0.63,0.00,headlamps +1.51653,11.95,0.00,1.19,75.18,2.70,8.93,0.00,0.00,headlamps +1.51514,14.85,0.00,2.42,73.72,0.00,8.39,0.56,0.00,headlamps +1.51658,14.80,0.00,1.99,73.11,0.00,8.28,1.71,0.00,headlamps +1.51617,14.95,0.00,2.27,73.30,0.00,8.71,0.67,0.00,headlamps +1.51732,14.95,0.00,1.80,72.99,0.00,8.61,1.55,0.00,headlamps +1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,headlamps +1.51831,14.39,0.00,1.82,72.86,1.41,6.47,2.88,0.00,headlamps +1.51640,14.37,0.00,2.74,72.85,0.00,9.45,0.54,0.00,headlamps +1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.00,headlamps +1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.00,headlamps +1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.00,headlamps +1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.00,headlamps +1.51711,14.23,0.00,2.08,73.36,0.00,8.62,1.67,0.00,headlamps diff --git a/data/wine.csv b/data/wine.csv new file mode 100644 index 0000000..fe00ec1 --- /dev/null +++ b/data/wine.csv @@ -0,0 +1,179 @@ +alcohol,malic acid,ash,alcalinity of ash,magnesium,total phenols,flavanoids,nonflavanoid phenols,proanthocyanins,color intensity,hue,OD280/OD315 of diluted wines,proline,class +14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065,1 +13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050,1 +13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185,1 +14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480,1 +13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735,1 +14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450,1 +14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290,1 +14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295,1 +14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045,1 +13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045,1 +14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510,1 +14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280,1 +13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320,1 +14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150,1 +14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547,1 +13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310,1 +14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280,1 +13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130,1 +14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680,1 +13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845,1 +14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780,1 +12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770,1 +13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035,1 +12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015,1 +13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845,1 +13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830,1 +13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195,1 +13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285,1 +13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915,1 +14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035,1 +13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285,1 +13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515,1 +13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990,1 +13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235,1 +13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095,1 +13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920,1 +13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880,1 +13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105,1 +13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020,1 +14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760,1 +13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795,1 +13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035,1 +13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095,1 +13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680,1 +13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885,1 +14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080,1 +14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065,1 +13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985,1 +14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060,1 +13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260,1 +13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150,1 +13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265,1 +13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190,1 +13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375,1 +13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060,1 +13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120,1 +14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970,1 +13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270,1 +13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285,1 +12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520,2 +12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680,2 +12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450,2 +13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630,2 +12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420,2 +12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355,2 +12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678,2 +13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502,2 +12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510,2 +13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750,2 +12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718,2 +12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870,2 +13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410,2 +13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472,2 +12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985,2 +11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886,2 +11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428,2 +13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392,2 +11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500,2 +12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750,2 +12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463,2 +12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278,2 +12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714,2 +12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630,2 +13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515,2 +11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520,2 +12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450,2 +12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495,2 +11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562,2 +11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680,2 +12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625,2 +12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480,2 +12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450,2 +12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495,2 +12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290,2 +11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345,2 +12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937,2 +11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625,2 +12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428,2 +12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660,2 +12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406,2 +12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710,2 +12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562,2 +12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438,2 +11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415,2 +12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672,2 +12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315,2 +12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510,2 +12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488,2 +12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312,2 +11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680,2 +11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562,2 +12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325,2 +11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607,2 +11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434,2 +12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385,2 +11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407,2 +11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495,2 +12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345,2 +12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372,2 +12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564,2 +11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625,2 +11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465,2 +12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365,2 +13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380,2 +11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380,2 +12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378,2 +12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352,2 +11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466,2 +12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342,2 +12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580,2 +12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630,3 +12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530,3 +12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560,3 +12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600,3 +12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650,3 +12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695,3 +12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720,3 +12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515,3 +13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580,3 +12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590,3 +12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600,3 +13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780,3 +13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520,3 +13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550,3 +12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855,3 +13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830,3 +13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415,3 +12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625,3 +13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650,3 +13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550,3 +13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500,3 +12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480,3 +13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425,3 +13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675,3 +12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640,3 +13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725,3 +13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480,3 +12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880,3 +14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660,3 +13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620,3 +12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520,3 +13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680,3 +12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570,3 +12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675,3 +13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615,3 +13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520,3 +13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695,3 +12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685,3 +13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750,3 +13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630,3 +12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510,3 +12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470,3 +14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660,3 +13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740,3 +13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750,3 +13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835,3 +13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840,3 +14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560,3 \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index c3e2703..d3f65b7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,11 +1,30 @@ -# PHP Machine Learning (PHP-ML) +# PHP Machine Learning library [![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop) +[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop) [![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml) [![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop) -Fresh approach to machine learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... +Fresh approach to Machine Learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... + +Simple example of classification: +```php +use Phpml\Classification\KNearestNeighbors; + +$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; +$labels = ['a', 'a', 'a', 'b', 'b', 'b']; + +$classifier = new KNearestNeighbors(); +$classifier->train($samples, $labels); + +$classifier->predict([3, 2]); +// return 'b' +``` + +## Documentation + +To find out how to use PHP-ML follow [Documentation](http://php-ml.readthedocs.org/). ## Installation @@ -15,14 +34,33 @@ Currently this library is in the process of developing, but You can install it w composer require php-ai/php-ml ``` -## To-Do +## Features -* implements more algorithms -* integration with Lavacharts for data visualization +* Classification + * [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/) + * [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/) +* Regression + * [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/) +* Clustering + * [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means) + * [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan) +* Cross Validation + * [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split) +* Datasets + * [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset) + * Ready to use: + * [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/) +* Math + * [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/) + * [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/) + -## Testing +## Contribute -After installation, you can launch the test suite in project root directory (you will need to install dev requirements with composer) +- Issue Tracker: github.com/php-ai/php-ml/issues +- Source Code: github.com/php-ai/php-ml + +After installation, you can launch the test suite in project root directory (you will need to install dev requirements with Composer) ``` bin/phpunit @@ -34,4 +72,4 @@ PHP-ML is released under the MIT Licence. See the bundled LICENSE file for detai ## Author -Arkadiusz Kondas (@ArkadiuszKondas) \ No newline at end of file +Arkadiusz Kondas (@ArkadiuszKondas) diff --git a/docs/machine-learning/classification/knearestneighbors.md b/docs/machine-learning/classification/k-nearest-neighbors.md similarity index 71% rename from docs/machine-learning/classification/knearestneighbors.md rename to docs/machine-learning/classification/k-nearest-neighbors.md index 569c48b..3d5aa27 100644 --- a/docs/machine-learning/classification/knearestneighbors.md +++ b/docs/machine-learning/classification/k-nearest-neighbors.md @@ -5,14 +5,16 @@ Classifier implementing the k-nearest neighbors algorithm. ### Constructor Parameters * $k - number of nearest neighbors to scan (default: 3) +* $distanceMetric - Distance object, default Euclidean (see [distance documentation](math/distance/)) ``` $classifier = new KNearestNeighbors($k=4); +$classifier = new KNearestNeighbors($k=3, new Minkowski($lambda=4)); ``` ### Train -To train a classifier simply provide train samples and labels (as `array`): +To train a classifier simply provide train samples and labels (as `array`). Example: ``` $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; @@ -24,7 +26,7 @@ $classifier->train($samples, $labels); ### Predict -To predict sample class use `predict` method. You can provide one sample or array of samples: +To predict sample label use `predict` method. You can provide one sample or array of samples: ``` $classifier->predict([3, 2]); diff --git a/docs/machine-learning/classification/naive-bayes.md b/docs/machine-learning/classification/naive-bayes.md new file mode 100644 index 0000000..e990321 --- /dev/null +++ b/docs/machine-learning/classification/naive-bayes.md @@ -0,0 +1,27 @@ +# NaiveBayes Classifier + +Classifier based on applying Bayes' theorem with strong (naive) independence assumptions between the features. + +### Train + +To train a classifier simply provide train samples and labels (as `array`). Example: + +``` +$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; +$labels = ['a', 'b', 'c']; + +$classifier = new NaiveBayes(); +$classifier->train($samples, $labels); +``` + +### Predict + +To predict sample label use `predict` method. You can provide one sample or array of samples: + +``` +$classifier->predict([3, 1, 1]); +// return 'a' + +$classifier->predict([[3, 1, 1], [1, 4, 1]); +// return ['a', 'b'] +``` diff --git a/docs/machine-learning/clustering/dbscan.md b/docs/machine-learning/clustering/dbscan.md new file mode 100644 index 0000000..45dd631 --- /dev/null +++ b/docs/machine-learning/clustering/dbscan.md @@ -0,0 +1,27 @@ +# DBSCAN clustering + +It is a density-based clustering algorithm: given a set of points in some space, it groups together points that are closely packed together (points with many nearby neighbors), marking as outliers points that lie alone in low-density regions (whose nearest neighbors are too far away). DBSCAN is one of the most common clustering algorithms and also most cited in scientific literature. +*(source: wikipedia)* + +### Constructor Parameters + +* $epsilon - epsilon, maximum distance between two samples for them to be considered as in the same neighborhood +* $minSamples - number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself) +* $distanceMetric - Distance object, default Euclidean (see [distance documentation](math/distance/)) + +``` +$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3); +$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3, new Minkowski($lambda=4)); +``` + +### Clustering + +To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside. + +``` +$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]]; + +$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3); +$dbscan->cluster($samples); +// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]] +``` diff --git a/docs/machine-learning/clustering/k-means.md b/docs/machine-learning/clustering/k-means.md new file mode 100644 index 0000000..296feb1 --- /dev/null +++ b/docs/machine-learning/clustering/k-means.md @@ -0,0 +1,37 @@ +# K-means clustering + +The K-Means algorithm clusters data by trying to separate samples in n groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares. +This algorithm requires the number of clusters to be specified. + +### Constructor Parameters + +* $clustersNumber - number of clusters to find +* $initialization - initialization method, default kmeans++ (see below) + +``` +$kmeans = new KMeans(2); +$kmeans = new KMeans(4, KMeans::INIT_RANDOM); +``` + +### Clustering + +To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside. + +``` +$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]]; + +$kmeans = new KMeans(2); +$kmeans->cluster($samples); +// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]] +``` + +### Initialization methods + +#### kmeans++ (default) + +K-means++ method selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. +It use the DASV seeding method consists of finding good initial centroids for the clusters. + +#### random + +Random initialization method chooses completely random centroid. It get the space boundaries to avoid placing clusters centroid too far from samples data. diff --git a/docs/machine-learning/cross-validation/randomsplit.md b/docs/machine-learning/cross-validation/random-split.md similarity index 100% rename from docs/machine-learning/cross-validation/randomsplit.md rename to docs/machine-learning/cross-validation/random-split.md diff --git a/docs/machine-learning/datasets/csv-dataset.md b/docs/machine-learning/datasets/csv-dataset.md index 553bc60..0ea6319 100644 --- a/docs/machine-learning/datasets/csv-dataset.md +++ b/docs/machine-learning/datasets/csv-dataset.md @@ -12,4 +12,4 @@ Helper class that loads data from CSV file. It extends the `ArrayDataset`. $dataset = new CsvDataset('dataset.csv', 2, true); ``` -See Array Dataset for more information. +See [ArrayDataset](machine-learning/datasets/array-dataset/) for more information. diff --git a/docs/machine-learning/datasets/demo/iris.md b/docs/machine-learning/datasets/demo/iris.md index 9e00d5c..5972f1b 100644 --- a/docs/machine-learning/datasets/demo/iris.md +++ b/docs/machine-learning/datasets/demo/iris.md @@ -17,7 +17,7 @@ To load Iris dataset simple use: $dataset = new Iris(); ``` -### Several samples +### Several samples example ``` sepal length,sepal width,petal length,petal width,class diff --git a/docs/machine-learning/metric/accuracy.md b/docs/machine-learning/metric/accuracy.md index b8ec70a..5045973 100644 --- a/docs/machine-learning/metric/accuracy.md +++ b/docs/machine-learning/metric/accuracy.md @@ -4,7 +4,7 @@ Class for calculate classifier accuracy. ### Score -To calculate classifier accuracy score use `score` static method. Parametrs: +To calculate classifier accuracy score use `score` static method. Parameters: * $actualLabels - (array) true sample labels * $predictedLabels - (array) predicted labels (e.x. from test group) diff --git a/docs/machine-learning/metric/distance.md b/docs/machine-learning/metric/distance.md deleted file mode 100644 index de8bcb1..0000000 --- a/docs/machine-learning/metric/distance.md +++ /dev/null @@ -1,17 +0,0 @@ -# Distance - -Special class for calculation of different types of distance. - -### Euclidean - -![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance") - -To calculate euclidean distance: - -``` -$a = [4, 6]; -$b = [2, 5]; - -Distance::euclidean($a, $b); -// return 2.2360679774998 -``` diff --git a/docs/machine-learning/regression/least-squares.md b/docs/machine-learning/regression/least-squares.md new file mode 100644 index 0000000..4a00bcd --- /dev/null +++ b/docs/machine-learning/regression/least-squares.md @@ -0,0 +1,51 @@ +# LeastSquares Linear Regression + +Linear model that use least squares method to approximate solution. + +### Train + +To train a model simply provide train samples and targets values (as `array`). Example: + +``` +$samples = [[60], [61], [62], [63], [65]]; +$targets = [3.1, 3.6, 3.8, 4, 4.1]; + +$regression = new LeastSquares(); +$regression->train($samples, $targets); +``` + +### Predict + +To predict sample target value use `predict` method with sample to check (as `array`). Example: + +``` +$regression->predict([64]); +// return 4.06 +``` + +### Multiple Linear Regression + +The term multiple attached to linear regression means that there are two or more sample parameters used to predict target. +For example you can use: mileage and production year to predict price of a car. + +``` +$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; +$targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + +$regression = new LeastSquares(); +$regression->train($samples, $targets); +$regression->predict([60000, 1996]) +// return 4094.82 +``` + +### Intercept and Coefficients + +After you train your model you can get the intercept and coefficients array. + +``` +$regression->getIntercept(); +// return -7.9635135135131 + +$regression->getCoefficients(); +// return [array(1) {[0]=>float(0.18783783783783)}] +``` diff --git a/docs/math/distance.md b/docs/math/distance.md new file mode 100644 index 0000000..fd491ea --- /dev/null +++ b/docs/math/distance.md @@ -0,0 +1,109 @@ +# Distance + +Selected algorithms require the use of a function for calculating the distance. + +### Euclidean + +Class for calculation Euclidean distance. + +![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance") + +To calculate Euclidean distance: + +``` +$a = [4, 6]; +$b = [2, 5]; + +$euclidean = new Euclidean(); +$euclidean->distance($a, $b); +// return 2.2360679774998 +``` + +### Manhattan + +Class for calculation Manhattan distance. + +![manhattan](https://upload.wikimedia.org/math/4/c/5/4c568bd1d76a6b15e19cb2ac3ad75350.png "Manhattan Distance") + +To calculate Manhattan distance: + +``` +$a = [4, 6]; +$b = [2, 5]; + +$manhattan = new Manhattan(); +$manhattan->distance($a, $b); +// return 3 +``` + +### Chebyshev + +Class for calculation Chebyshev distance. + +![chebyshev](https://upload.wikimedia.org/math/7/1/2/71200f7dbb43b3bcfbcbdb9e02ab0a0c.png "Chebyshev Distance") + +To calculate Chebyshev distance: + +``` +$a = [4, 6]; +$b = [2, 5]; + +$chebyshev = new Chebyshev(); +$chebyshev->distance($a, $b); +// return 2 +``` + +### Minkowski + +Class for calculation Minkowski distance. + +![minkowski](https://upload.wikimedia.org/math/a/a/0/aa0c62083c12390cb15ac3217de88e66.png "Minkowski Distance") + +To calculate Minkowski distance: + +``` +$a = [4, 6]; +$b = [2, 5]; + +$minkowski = new Minkowski(); +$minkowski->distance($a, $b); +// return 2.080 +``` + +You can provide the `lambda` parameter: + +``` +$a = [6, 10, 3]; +$b = [2, 5, 5]; + +$minkowski = new Minkowski($lambda = 5); +$minkowski->distance($a, $b); +// return 5.300 +``` + +### Custom distance + +To apply your own function of distance use `Distance` interface. Example + +``` +class CustomDistance implements Distance +{ + /** + * @param array $a + * @param array $b + * + * @return float + */ + public function distance(array $a, array $b): float + { + $distance = []; + $count = count($a); + + for ($i = 0; $i < $count; ++$i) { + $distance[] = $a[$i] * $b[$i]; + } + + return min($distance); + } +} +``` diff --git a/docs/math/matrix.md b/docs/math/matrix.md new file mode 100644 index 0000000..3716347 --- /dev/null +++ b/docs/math/matrix.md @@ -0,0 +1,129 @@ +# Matrix + +Class that wraps PHP arrays to mathematical matrix. + +### Creation + +To create Matrix use simple arrays: + +``` +$matrix = new Matrix([ + [3, 3, 3], + [4, 2, 1], + [5, 6, 7], +]); +``` + +You can also create Matrix (one dimension) from flat array: + +``` +$flatArray = [1, 2, 3, 4]; +$matrix = Matrix::fromFlatArray($flatArray); +``` + +### Matrix data + +Methods for reading data from Matrix: + +``` +$matrix->toArray(); // cast matrix to PHP array +$matrix->getRows(); // rows count +$matrix->getColumns(); // columns count +$matrix->getColumnValues($column=4); // get values from given column +``` + +### Determinant + +Read more about [matrix determinant](https://en.wikipedia.org/wiki/Determinant). + +``` +$matrix = new Matrix([ + [3, 3, 3], + [4, 2, 1], + [5, 6, 7], +]); + +$matrix->getDeterminant(); +// return -3 +``` + +### Transpose + +Read more about [matrix transpose](https://en.wikipedia.org/wiki/Transpose). + +``` +$matrix->transpose(); +// return new Matrix +``` + +### Multiply + +Multiply Matrix by another Matrix. + +``` +$matrix1 = new Matrix([ + [1, 2, 3], + [4, 5, 6], +]); + +$matrix2 = new Matrix([ + [7, 8], + [9, 10], + [11, 12], +]); + +$matrix1->multiply($matrix2); + +// result $product = [ +// [58, 64], +// [139, 154], +//]; +``` + +### Divide by scalar + +You can divide Matrix by scalar value. + +``` +$matrix->divideByScalar(2); +``` + +### Inverse + +Read more about [invertible matrix](https://en.wikipedia.org/wiki/Invertible_matrix) + +``` +$matrix = new Matrix([ + [3, 4, 2], + [4, 5, 5], + [1, 1, 1], +]); + +$matrix->inverse(); + +// result $inverseMatrix = [ +// [0, -1, 5], +// [1 / 2, 1 / 2, -7 / 2], +// [-1 / 2, 1 / 2, -1 / 2], +//]; + +``` + +### Cross out + +Cross out given row and column from Matrix. + +``` +$matrix = new Matrix([ + [3, 4, 2], + [4, 5, 5], + [1, 1, 1], +]); + +$matrix->crossOut(1, 1) + +// result $crossOuted = [ +// [3, 2], +// [1, 1], +//]; +``` diff --git a/mkdocs.yml b/mkdocs.yml index f20036f..eac1c17 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,15 +3,23 @@ pages: - Home: index.md - Machine Learning: - Classification: - - KNearestNeighbors: machine-learning/classification/knearestneighbors.md + - KNearestNeighbors: machine-learning/classification/k-nearest-neighbors.md + - NaiveBayes: machine-learning/classification/naive-bayes.md + - Regression: + - LeastSquares: machine-learning/regression/least-squares.md + - Clustering: + - KMeans: machine-learning/clustering/k-means.md + - DBSCAN: machine-learning/clustering/dbscan.md - Cross Validation: - - RandomSplit: machine-learning/cross-validation/randomsplit.md + - RandomSplit: machine-learning/cross-validation/random-split.md - Datasets: - Array Dataset: machine-learning/datasets/array-dataset.md - CSV Dataset: machine-learning/datasets/csv-dataset.md - - Demo: + - Ready to use datasets: - Iris: machine-learning/datasets/demo/iris.md - Metric: - Accuracy: machine-learning/metric/accuracy.md - - Distance: machine-learning/metric/distance.md -theme: readthedocs \ No newline at end of file + - Math: + - Distance: math/distance.md + - Matrix: math/matrix.md +theme: readthedocs diff --git a/src/Phpml/Classifier/Classifier.php b/src/Phpml/Classification/Classifier.php similarity index 90% rename from src/Phpml/Classifier/Classifier.php rename to src/Phpml/Classification/Classifier.php index 90250a9..00e6779 100644 --- a/src/Phpml/Classifier/Classifier.php +++ b/src/Phpml/Classification/Classifier.php @@ -2,7 +2,7 @@ declare (strict_types = 1); -namespace Phpml\Classifier; +namespace Phpml\Classification; interface Classifier { diff --git a/src/Phpml/Classifier/KNearestNeighbors.php b/src/Phpml/Classification/KNearestNeighbors.php similarity index 53% rename from src/Phpml/Classifier/KNearestNeighbors.php rename to src/Phpml/Classification/KNearestNeighbors.php index f913488..93991ae 100644 --- a/src/Phpml/Classifier/KNearestNeighbors.php +++ b/src/Phpml/Classification/KNearestNeighbors.php @@ -2,64 +2,41 @@ declare (strict_types = 1); -namespace Phpml\Classifier; +namespace Phpml\Classification; -use Phpml\Metric\Distance; +use Phpml\Classification\Traits\Predictable; +use Phpml\Classification\Traits\Trainable; +use Phpml\Math\Distance; +use Phpml\Math\Distance\Euclidean; class KNearestNeighbors implements Classifier { + use Trainable, Predictable; + /** * @var int */ private $k; /** - * @var array + * @var Distance */ - private $samples; + private $distanceMetric; /** - * @var array + * @param int $k + * @param Distance|null $distanceMetric (if null then Euclidean distance as default) */ - private $labels; - - /** - * @param int $k - */ - public function __construct(int $k = 3) + public function __construct(int $k = 3, Distance $distanceMetric = null) { + if (null === $distanceMetric) { + $distanceMetric = new Euclidean(); + } + $this->k = $k; $this->samples = []; $this->labels = []; - } - - /** - * @param array $samples - * @param array $labels - */ - public function train(array $samples, array $labels) - { - $this->samples = $samples; - $this->labels = $labels; - } - - /** - * @param array $samples - * - * @return mixed - */ - public function predict(array $samples) - { - if (!is_array($samples[0])) { - $predicted = $this->predictSample($samples); - } else { - $predicted = []; - foreach ($samples as $index => $sample) { - $predicted[$index] = $this->predictSample($sample); - } - } - - return $predicted; + $this->distanceMetric = $distanceMetric; } /** @@ -67,7 +44,7 @@ class KNearestNeighbors implements Classifier * * @return mixed */ - private function predictSample(array $sample) + protected function predictSample(array $sample) { $distances = $this->kNeighborsDistances($sample); @@ -95,7 +72,7 @@ class KNearestNeighbors implements Classifier $distances = []; foreach ($this->samples as $index => $neighbor) { - $distances[$index] = Distance::euclidean($sample, $neighbor); + $distances[$index] = $this->distanceMetric->distance($sample, $neighbor); } asort($distances); diff --git a/src/Phpml/Classification/NaiveBayes.php b/src/Phpml/Classification/NaiveBayes.php new file mode 100644 index 0000000..ae98e1d --- /dev/null +++ b/src/Phpml/Classification/NaiveBayes.php @@ -0,0 +1,36 @@ +labels as $index => $label) { + $predictions[$label] = 0; + foreach ($sample as $token => $count) { + if (array_key_exists($token, $this->samples[$index])) { + $predictions[$label] += $count * $this->samples[$index][$token]; + } + } + } + + arsort($predictions, SORT_NUMERIC); + reset($predictions); + + return key($predictions); + } +} diff --git a/src/Phpml/Classification/SupportVectorMachine.php b/src/Phpml/Classification/SupportVectorMachine.php new file mode 100644 index 0000000..7b0d854 --- /dev/null +++ b/src/Phpml/Classification/SupportVectorMachine.php @@ -0,0 +1,61 @@ +kernel = $kernel; + $this->C = $C; + $this->tolerance = $tolerance; + $this->upperBound = $upperBound; + } + + /** + * @param array $sample + * + * @return mixed + */ + protected function predictSample(array $sample) + { + } +} diff --git a/src/Phpml/Classification/Traits/Predictable.php b/src/Phpml/Classification/Traits/Predictable.php new file mode 100644 index 0000000..804b54a --- /dev/null +++ b/src/Phpml/Classification/Traits/Predictable.php @@ -0,0 +1,34 @@ +predictSample($samples); + } else { + $predicted = []; + foreach ($samples as $index => $sample) { + $predicted[$index] = $this->predictSample($sample); + } + } + + return $predicted; + } + + /** + * @param array $sample + * + * @return mixed + */ + abstract protected function predictSample(array $sample); +} diff --git a/src/Phpml/Classification/Traits/Trainable.php b/src/Phpml/Classification/Traits/Trainable.php new file mode 100644 index 0000000..8fa97f2 --- /dev/null +++ b/src/Phpml/Classification/Traits/Trainable.php @@ -0,0 +1,28 @@ +samples = $samples; + $this->labels = $labels; + } +} diff --git a/src/Phpml/Classifier/NaiveBayes.php b/src/Phpml/Classifier/NaiveBayes.php deleted file mode 100644 index 7324d79..0000000 --- a/src/Phpml/Classifier/NaiveBayes.php +++ /dev/null @@ -1,25 +0,0 @@ -epsilon = $epsilon; + $this->minSamples = $minSamples; + $this->distanceMetric = $distanceMetric; + } + + /** + * @param array $samples + * + * @return array + */ + public function cluster(array $samples) + { + $clusters = []; + $visited = []; + + foreach ($samples as $index => $sample) { + if (isset($visited[$index])) { + continue; + } + $visited[$index] = true; + + $regionSamples = $this->getSamplesInRegion($sample, $samples); + if (count($regionSamples) >= $this->minSamples) { + $clusters[] = $this->expandCluster($regionSamples, $visited); + } + } + + return $clusters; + } + + /** + * @param array $localSample + * @param array $samples + * + * @return array + */ + private function getSamplesInRegion($localSample, $samples) + { + $region = []; + + foreach ($samples as $index => $sample) { + if ($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) { + $region[$index] = $sample; + } + } + + return $region; + } + + /** + * @param array $samples + * @param array $visited + * + * @return array + */ + private function expandCluster($samples, &$visited) + { + $cluster = []; + + foreach ($samples as $index => $sample) { + if (!isset($visited[$index])) { + $visited[$index] = true; + $regionSamples = $this->getSamplesInRegion($sample, $samples); + if (count($regionSamples) > $this->minSamples) { + $cluster = array_merge($regionSamples, $cluster); + } + } + + $cluster[] = $sample; + } + + return $cluster; + } +} diff --git a/src/Phpml/Clustering/KMeans.php b/src/Phpml/Clustering/KMeans.php new file mode 100644 index 0000000..c5372b0 --- /dev/null +++ b/src/Phpml/Clustering/KMeans.php @@ -0,0 +1,60 @@ +clustersNumber = $clustersNumber; + $this->initialization = $initialization; + } + + /** + * @param array $samples + * + * @return array + */ + public function cluster(array $samples) + { + $space = new Space(count($samples[0])); + foreach ($samples as $sample) { + $space->addPoint($sample); + } + + $clusters = []; + foreach ($space->cluster($this->clustersNumber, $this->initialization) as $cluster) { + $clusters[] = $cluster->getPoints(); + } + + return $clusters; + } +} diff --git a/src/Phpml/Clustering/KMeans/Cluster.php b/src/Phpml/Clustering/KMeans/Cluster.php new file mode 100644 index 0000000..5cd974d --- /dev/null +++ b/src/Phpml/Clustering/KMeans/Cluster.php @@ -0,0 +1,137 @@ +space = $space; + $this->points = new SplObjectStorage(); + } + + /** + * @return array + */ + public function getPoints() + { + $points = []; + foreach ($this->points as $point) { + $points[] = $point->toArray(); + } + + return $points; + } + + /** + * @return array + */ + public function toArray() + { + return array( + 'centroid' => parent::toArray(), + 'points' => $this->getPoints(), + ); + } + + /** + * @param Point $point + * + * @return Point + */ + public function attach(Point $point) + { + if ($point instanceof self) { + throw new LogicException('cannot attach a cluster to another'); + } + + $this->points->attach($point); + + return $point; + } + + /** + * @param Point $point + * + * @return Point + */ + public function detach(Point $point) + { + $this->points->detach($point); + + return $point; + } + + /** + * @param SplObjectStorage $points + */ + public function attachAll(SplObjectStorage $points) + { + $this->points->addAll($points); + } + + /** + * @param SplObjectStorage $points + */ + public function detachAll(SplObjectStorage $points) + { + $this->points->removeAll($points); + } + + public function updateCentroid() + { + if (!$count = count($this->points)) { + return; + } + + $centroid = $this->space->newPoint(array_fill(0, $this->dimension, 0)); + + foreach ($this->points as $point) { + for ($n = 0; $n < $this->dimension; ++$n) { + $centroid->coordinates[$n] += $point->coordinates[$n]; + } + } + + for ($n = 0; $n < $this->dimension; ++$n) { + $this->coordinates[$n] = $centroid->coordinates[$n] / $count; + } + } + + /** + * @return Point[]|SplObjectStorage + */ + public function getIterator() + { + return $this->points; + } + + /** + * @return mixed + */ + public function count() + { + return count($this->points); + } +} diff --git a/src/Phpml/Clustering/KMeans/Point.php b/src/Phpml/Clustering/KMeans/Point.php new file mode 100644 index 0000000..9ff4b45 --- /dev/null +++ b/src/Phpml/Clustering/KMeans/Point.php @@ -0,0 +1,124 @@ +dimension = count($coordinates); + $this->coordinates = $coordinates; + } + + /** + * @return array + */ + public function toArray() + { + return $this->coordinates; + } + + /** + * @param Point $point + * @param bool $precise + * + * @return int|mixed + */ + public function getDistanceWith(self $point, $precise = true) + { + $distance = 0; + for ($n = 0; $n < $this->dimension; ++$n) { + $difference = $this->coordinates[$n] - $point->coordinates[$n]; + $distance += $difference * $difference; + } + + return $precise ? sqrt($distance) : $distance; + } + + /** + * @param $points + * + * @return mixed + */ + public function getClosest($points) + { + foreach ($points as $point) { + $distance = $this->getDistanceWith($point, false); + + if (!isset($minDistance)) { + $minDistance = $distance; + $minPoint = $point; + continue; + } + + if ($distance < $minDistance) { + $minDistance = $distance; + $minPoint = $point; + } + } + + return $minPoint; + } + + /** + * @return array + */ + public function getCoordinates() + { + return $this->coordinates; + } + + /** + * @param mixed $offset + * + * @return bool + */ + public function offsetExists($offset) + { + return isset($this->coordinates[$offset]); + } + + /** + * @param mixed $offset + * + * @return mixed + */ + public function offsetGet($offset) + { + return $this->coordinates[$offset]; + } + + /** + * @param mixed $offset + * @param mixed $value + */ + public function offsetSet($offset, $value) + { + $this->coordinates[$offset] = $value; + } + + /** + * @param mixed $offset + */ + public function offsetUnset($offset) + { + unset($this->coordinates[$offset]); + } +} diff --git a/src/Phpml/Clustering/KMeans/Space.php b/src/Phpml/Clustering/KMeans/Space.php new file mode 100644 index 0000000..2976434 --- /dev/null +++ b/src/Phpml/Clustering/KMeans/Space.php @@ -0,0 +1,233 @@ +dimension = $dimension; + } + + /** + * @return array + */ + public function toArray() + { + $points = []; + foreach ($this as $point) { + $points[] = $point->toArray(); + } + + return ['points' => $points]; + } + + /** + * @param array $coordinates + * + * @return Point + */ + public function newPoint(array $coordinates) + { + if (count($coordinates) != $this->dimension) { + throw new LogicException('('.implode(',', $coordinates).') is not a point of this space'); + } + + return new Point($coordinates); + } + + /** + * @param array $coordinates + * @param null $data + */ + public function addPoint(array $coordinates, $data = null) + { + return $this->attach($this->newPoint($coordinates), $data); + } + + /** + * @param object $point + * @param null $data + */ + public function attach($point, $data = null) + { + if (!$point instanceof Point) { + throw new InvalidArgumentException('can only attach points to spaces'); + } + + return parent::attach($point, $data); + } + + /** + * @return int + */ + public function getDimension() + { + return $this->dimension; + } + + /** + * @return array|bool + */ + public function getBoundaries() + { + if (!count($this)) { + return false; + } + + $min = $this->newPoint(array_fill(0, $this->dimension, null)); + $max = $this->newPoint(array_fill(0, $this->dimension, null)); + + foreach ($this as $point) { + for ($n = 0; $n < $this->dimension; ++$n) { + ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n]; + ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n]; + } + } + + return array($min, $max); + } + + /** + * @param Point $min + * @param Point $max + * + * @return Point + */ + public function getRandomPoint(Point $min, Point $max) + { + $point = $this->newPoint(array_fill(0, $this->dimension, null)); + + for ($n = 0; $n < $this->dimension; ++$n) { + $point[$n] = rand($min[$n], $max[$n]); + } + + return $point; + } + + /** + * @param int $clustersNumber + * @param int $initMethod + * + * @return array|Cluster[] + */ + public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM) + { + $clusters = $this->initializeClusters($clustersNumber, $initMethod); + + do { + } while (!$this->iterate($clusters)); + + return $clusters; + } + + /** + * @param $clustersNumber + * @param $initMethod + * + * @return array|Cluster[] + */ + protected function initializeClusters(int $clustersNumber, int $initMethod) + { + switch ($initMethod) { + case KMeans::INIT_RANDOM: + list($min, $max) = $this->getBoundaries(); + for ($n = 0; $n < $clustersNumber; ++$n) { + $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); + } + break; + + case KMeans::INIT_KMEANS_PLUS_PLUS: + $position = rand(1, count($this)); + for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next()); + $clusters[] = new Cluster($this, $this->current()->getCoordinates()); + + $distances = new SplObjectStorage(); + + for ($i = 1; $i < $clustersNumber; ++$i) { + $sum = 0; + foreach ($this as $point) { + $distance = $point->getDistanceWith($point->getClosest($clusters)); + $sum += $distances[$point] = $distance; + } + + $sum = rand(0, (int) $sum); + foreach ($this as $point) { + if (($sum -= $distances[$point]) > 0) { + continue; + } + + $clusters[] = new Cluster($this, $point->getCoordinates()); + break; + } + } + + break; + } + $clusters[0]->attachAll($this); + + return $clusters; + } + + /** + * @param $clusters + * + * @return bool + */ + protected function iterate($clusters) + { + $convergence = true; + + $attach = new SplObjectStorage(); + $detach = new SplObjectStorage(); + + foreach ($clusters as $cluster) { + foreach ($cluster as $point) { + $closest = $point->getClosest($clusters); + + if ($closest !== $cluster) { + isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage(); + isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage(); + + $attach[$closest]->attach($point); + $detach[$cluster]->attach($point); + + $convergence = false; + } + } + } + + foreach ($attach as $cluster) { + $cluster->attachAll($attach[$cluster]); + } + + foreach ($detach as $cluster) { + $cluster->detachAll($detach[$cluster]); + } + + foreach ($clusters as $cluster) { + $cluster->updateCentroid(); + } + + return $convergence; + } +} diff --git a/src/Phpml/Dataset/ArrayDataset.php b/src/Phpml/Dataset/ArrayDataset.php index d117122..7c5c2b5 100644 --- a/src/Phpml/Dataset/ArrayDataset.php +++ b/src/Phpml/Dataset/ArrayDataset.php @@ -27,7 +27,7 @@ class ArrayDataset implements Dataset public function __construct(array $samples, array $labels) { if (count($samples) != count($labels)) { - throw InvalidArgumentException::sizeNotMatch(); + throw InvalidArgumentException::arraySizeNotMatch(); } $this->samples = $samples; diff --git a/src/Phpml/Dataset/CsvDataset.php b/src/Phpml/Dataset/CsvDataset.php index e6dafd2..7d1f91e 100644 --- a/src/Phpml/Dataset/CsvDataset.php +++ b/src/Phpml/Dataset/CsvDataset.php @@ -26,19 +26,18 @@ class CsvDataset extends ArrayDataset throw DatasetException::missingFile(basename($filepath)); } - $row = 0; - if (($handle = fopen($filepath, 'r')) !== false) { - while (($data = fgetcsv($handle, 1000, ',')) !== false) { - ++$row; - if ($headingRow && $row == 1) { - continue; - } - $this->samples[] = array_slice($data, 0, $features); - $this->labels[] = $data[$features]; - } - fclose($handle); - } else { + if (false === $handle = fopen($filepath, 'r')) { throw DatasetException::cantOpenFile(basename($filepath)); } + + if ($headingRow) { + fgets($handle); + } + + while (($data = fgetcsv($handle, 1000, ',')) !== false) { + $this->samples[] = array_slice($data, 0, $features); + $this->labels[] = $data[$features]; + } + fclose($handle); } } diff --git a/src/Phpml/Dataset/Dataset.php b/src/Phpml/Dataset/Dataset.php index 4e04931..2bc4043 100644 --- a/src/Phpml/Dataset/Dataset.php +++ b/src/Phpml/Dataset/Dataset.php @@ -6,6 +6,7 @@ namespace Phpml\Dataset; interface Dataset { + const SOME = 'z'; /** * @return array */ diff --git a/src/Phpml/Dataset/Demo/Glass.php b/src/Phpml/Dataset/Demo/Glass.php new file mode 100644 index 0000000..2a3d7e2 --- /dev/null +++ b/src/Phpml/Dataset/Demo/Glass.php @@ -0,0 +1,28 @@ +lambda = $lambda; + } + + /** + * @param array $a + * @param array $b + * + * @return float + * + * @throws InvalidArgumentException + */ + public function distance(array $a, array $b): float + { + if (count($a) !== count($b)) { + throw InvalidArgumentException::arraySizeNotMatch(); + } + + $distance = 0; + $count = count($a); + + for ($i = 0; $i < $count; ++$i) { + $distance += pow(abs($a[$i] - $b[$i]), $this->lambda); + } + + return pow($distance, 1 / $this->lambda); + } +} diff --git a/src/Phpml/Math/Kernel.php b/src/Phpml/Math/Kernel.php new file mode 100644 index 0000000..953a5fa --- /dev/null +++ b/src/Phpml/Math/Kernel.php @@ -0,0 +1,16 @@ +gamma = $gamma; + } + + /** + * @param float $a + * @param float $b + * + * @return float + */ + public function compute($a, $b) + { + $score = 2 * Product::scalar($a, $b); + $squares = Product::scalar($a, $a) + Product::scalar($b, $b); + $result = exp(-$this->gamma * ($squares - $score)); + + return $result; + } +} diff --git a/src/Phpml/Math/Matrix.php b/src/Phpml/Math/Matrix.php new file mode 100644 index 0000000..7c04119 --- /dev/null +++ b/src/Phpml/Math/Matrix.php @@ -0,0 +1,273 @@ +rows = count($matrix); + $this->columns = count($matrix[0]); + + if ($validate) { + for ($i = 0; $i < $this->rows; ++$i) { + if (count($matrix[$i]) !== $this->columns) { + throw InvalidArgumentException::matrixDimensionsDidNotMatch(); + } + } + } + + $this->matrix = $matrix; + } + + /** + * @param array $array + * + * @return Matrix + */ + public static function fromFlatArray(array $array) + { + $matrix = []; + foreach ($array as $value) { + $matrix[] = [$value]; + } + + return new self($matrix); + } + + /** + * @return array + */ + public function toArray() + { + return $this->matrix; + } + + /** + * @return int + */ + public function getRows() + { + return $this->rows; + } + + /** + * @return int + */ + public function getColumns() + { + return $this->columns; + } + + /** + * @param $column + * + * @return array + * + * @throws MatrixException + */ + public function getColumnValues($column) + { + if ($column >= $this->columns) { + throw MatrixException::columnOutOfRange(); + } + + $values = []; + for ($i = 0; $i < $this->rows; ++$i) { + $values[] = $this->matrix[$i][$column]; + } + + return $values; + } + + /** + * @return float|int + * + * @throws MatrixException + */ + public function getDeterminant() + { + if ($this->determinant) { + return $this->determinant; + } + + if (!$this->isSquare()) { + throw MatrixException::notSquareMatrix(); + } + + return $this->determinant = $this->calculateDeterminant(); + } + + /** + * @return float|int + * + * @throws MatrixException + */ + private function calculateDeterminant() + { + $determinant = 0; + if ($this->rows == 1 && $this->columns == 1) { + $determinant = $this->matrix[0][0]; + } elseif ($this->rows == 2 && $this->columns == 2) { + $determinant = + $this->matrix[0][0] * $this->matrix[1][1] - + $this->matrix[0][1] * $this->matrix[1][0]; + } else { + for ($j = 0; $j < $this->columns; ++$j) { + $subMatrix = $this->crossOut(0, $j); + $minor = $this->matrix[0][$j] * $subMatrix->getDeterminant(); + $determinant += fmod($j, 2) == 0 ? $minor : -$minor; + } + } + + return $determinant; + } + + /** + * @return bool + */ + public function isSquare() + { + return $this->columns === $this->rows; + } + + /** + * @return Matrix + */ + public function transpose() + { + $newMatrix = []; + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $newMatrix[$j][$i] = $this->matrix[$i][$j]; + } + } + + return new self($newMatrix, false); + } + + /** + * @param Matrix $matrix + * + * @return Matrix + * + * @throws InvalidArgumentException + */ + public function multiply(Matrix $matrix) + { + if ($this->columns != $matrix->getRows()) { + throw InvalidArgumentException::inconsistentMatrixSupplied(); + } + + $product = []; + $multiplier = $matrix->toArray(); + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $matrix->getColumns(); ++$j) { + $product[$i][$j] = 0; + for ($k = 0; $k < $this->columns; ++$k) { + $product[$i][$j] += $this->matrix[$i][$k] * $multiplier[$k][$j]; + } + } + } + + return new self($product, false); + } + + /** + * @param $value + * + * @return Matrix + */ + public function divideByScalar($value) + { + $newMatrix = array(); + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $newMatrix[$i][$j] = $this->matrix[$i][$j] / $value; + } + } + + return new self($newMatrix, false); + } + + /** + * @return Matrix + * + * @throws MatrixException + */ + public function inverse() + { + if (!$this->isSquare()) { + throw MatrixException::notSquareMatrix(); + } + + $newMatrix = array(); + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $minor = $this->crossOut($i, $j)->getDeterminant(); + $newMatrix[$i][$j] = fmod($i + $j, 2) == 0 ? $minor : -$minor; + } + } + + $cofactorMatrix = new self($newMatrix, false); + + return $cofactorMatrix->transpose()->divideByScalar($this->getDeterminant()); + } + + /** + * @param int $row + * @param int $column + * + * @return Matrix + */ + public function crossOut(int $row, int $column) + { + $newMatrix = []; + $r = 0; + for ($i = 0; $i < $this->rows; ++$i) { + $c = 0; + if ($row != $i) { + for ($j = 0; $j < $this->columns; ++$j) { + if ($column != $j) { + $newMatrix[$r][$c] = $this->matrix[$i][$j]; + ++$c; + } + } + ++$r; + } + } + + return new self($newMatrix, false); + } +} diff --git a/src/Phpml/Math/Product.php b/src/Phpml/Math/Product.php new file mode 100644 index 0000000..70accb9 --- /dev/null +++ b/src/Phpml/Math/Product.php @@ -0,0 +1,24 @@ + $value) { + $product += $value * $b[$index]; + } + + return $product; + } +} diff --git a/src/Phpml/Math/Statistic/Correlation.php b/src/Phpml/Math/Statistic/Correlation.php new file mode 100644 index 0000000..1d2e8ac --- /dev/null +++ b/src/Phpml/Math/Statistic/Correlation.php @@ -0,0 +1,45 @@ + $label) { - if ($label === $predictedLabels[$index]) { + if ($label == $predictedLabels[$index]) { ++$score; } } diff --git a/src/Phpml/Regression/LeastSquares.php b/src/Phpml/Regression/LeastSquares.php new file mode 100644 index 0000000..cd0251f --- /dev/null +++ b/src/Phpml/Regression/LeastSquares.php @@ -0,0 +1,116 @@ +samples = $samples; + $this->targets = $targets; + + $this->computeCoefficients(); + } + + /** + * @param array $sample + * + * @return mixed + */ + public function predict($sample) + { + $result = $this->intercept; + foreach ($this->coefficients as $index => $coefficient) { + $result += $coefficient * $sample[$index]; + } + + return $result; + } + + /** + * @return array + */ + public function getCoefficients() + { + return $this->coefficients; + } + + /** + * @return float + */ + public function getIntercept() + { + return $this->intercept; + } + + /** + * coefficient(b) = (X'X)-1X'Y. + */ + private function computeCoefficients() + { + $samplesMatrix = $this->getSamplesMatrix(); + $targetsMatrix = $this->getTargetsMatrix(); + + $ts = $samplesMatrix->transpose()->multiply($samplesMatrix)->inverse(); + $tf = $samplesMatrix->transpose()->multiply($targetsMatrix); + + $this->coefficients = $ts->multiply($tf)->getColumnValues(0); + $this->intercept = array_shift($this->coefficients); + } + + /** + * Add one dimension for intercept calculation. + * + * @return Matrix + */ + private function getSamplesMatrix() + { + $samples = []; + foreach ($this->samples as $sample) { + array_unshift($sample, 1); + $samples[] = $sample; + } + + return new Matrix($samples); + } + + /** + * @return Matrix + */ + private function getTargetsMatrix() + { + if (is_array($this->targets[0])) { + return new Matrix($this->targets); + } + + return Matrix::fromFlatArray($this->targets); + } +} diff --git a/src/Phpml/Regression/Regression.php b/src/Phpml/Regression/Regression.php new file mode 100644 index 0000000..a7837d4 --- /dev/null +++ b/src/Phpml/Regression/Regression.php @@ -0,0 +1,21 @@ +assertEquals($testLabels, $predicted); } - public function testAccuracyOnIrisDataset() + public function testPredictArrayOfSamplesUsingChebyshevDistanceMetric() { - $dataset = new RandomSplit(new Iris(), $testSize = 0.5, $seed = 123); - $classifier = new KNearestNeighbors($k = 4); - $classifier->train($dataset->getTrainSamples(), $dataset->getTrainLabels()); - $predicted = $classifier->predict($dataset->getTestSamples()); - $score = Accuracy::score($dataset->getTestLabels(), $predicted); + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; - $this->assertEquals(0.96, $score); + $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; + $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a']; + + $classifier = new KNearestNeighbors(3, new Chebyshev()); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + $this->assertEquals($testLabels, $predicted); } } diff --git a/tests/Phpml/Classification/NaiveBayesTest.php b/tests/Phpml/Classification/NaiveBayesTest.php new file mode 100644 index 0000000..3482cf5 --- /dev/null +++ b/tests/Phpml/Classification/NaiveBayesTest.php @@ -0,0 +1,38 @@ +train($samples, $labels); + + $this->assertEquals('a', $classifier->predict([3, 1, 1])); + $this->assertEquals('b', $classifier->predict([1, 4, 1])); + $this->assertEquals('c', $classifier->predict([1, 1, 6])); + } + + public function testPredictArrayOfSamples() + { + $trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; + $trainLabels = ['a', 'b', 'c']; + + $testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]]; + $testLabels = ['a', 'a', 'c', 'c', 'b', 'b', 'a', 'a']; + + $classifier = new NaiveBayes(); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + $this->assertEquals($testLabels, $predicted); + } +} diff --git a/tests/Phpml/Clustering/DBSCANTest.php b/tests/Phpml/Clustering/DBSCANTest.php new file mode 100644 index 0000000..be37fff --- /dev/null +++ b/tests/Phpml/Clustering/DBSCANTest.php @@ -0,0 +1,33 @@ +assertEquals($clustered, $dbscan->cluster($samples)); + + $samples = [[1, 1], [6, 6], [1, -1], [5, 6], [-1, -1], [7, 8], [-1, 1], [7, 7]]; + $clustered = [ + [[1, 1], [1, -1], [-1, -1], [-1, 1]], + [[6, 6], [5, 6], [7, 8], [7, 7]], + ]; + + $dbscan = new DBSCAN($epsilon = 3, $minSamples = 4); + + $this->assertEquals($clustered, $dbscan->cluster($samples)); + } +} diff --git a/tests/Phpml/Clustering/KMeansTest.php b/tests/Phpml/Clustering/KMeansTest.php new file mode 100644 index 0000000..5a85b38 --- /dev/null +++ b/tests/Phpml/Clustering/KMeansTest.php @@ -0,0 +1,51 @@ +cluster($samples); + + $this->assertEquals(2, count($clusters)); + + foreach ($samples as $index => $sample) { + if (in_array($sample, $clusters[0]) || in_array($sample, $clusters[1])) { + unset($samples[$index]); + } + } + $this->assertEquals(0, count($samples)); + } + + public function testKMeansInitializationMethods() + { + $samples = [ + [180, 155], [186, 159], [119, 185], [141, 147], [157, 158], + [176, 122], [194, 160], [113, 193], [190, 148], [152, 154], + [162, 146], [188, 144], [185, 124], [163, 114], [151, 140], + [175, 131], [186, 162], [181, 195], [147, 122], [143, 195], + [171, 119], [117, 165], [169, 121], [159, 160], [159, 112], + [115, 122], [149, 193], [156, 135], [118, 120], [139, 159], + [150, 115], [181, 136], [167, 162], [132, 115], [175, 165], + [110, 147], [175, 118], [113, 145], [130, 162], [195, 179], + [164, 111], [192, 114], [194, 149], [139, 113], [160, 168], + [162, 110], [174, 144], [137, 142], [197, 160], [147, 173], + ]; + + $kmeans = new KMeans(4, KMeans::INIT_KMEANS_PLUS_PLUS); + $clusters = $kmeans->cluster($samples); + $this->assertEquals(4, count($clusters)); + + $kmeans = new KMeans(4, KMeans::INIT_RANDOM); + $clusters = $kmeans->cluster($samples); + $this->assertEquals(4, count($clusters)); + } +} diff --git a/tests/Phpml/Dataset/CsvDatasetTest.php b/tests/Phpml/Dataset/CsvDatasetTest.php index db87d62..2994504 100644 --- a/tests/Phpml/Dataset/CsvDatasetTest.php +++ b/tests/Phpml/Dataset/CsvDatasetTest.php @@ -16,7 +16,7 @@ class CsvDatasetTest extends \PHPUnit_Framework_TestCase new CsvDataset('missingFile', 3); } - public function testSampleCsvDataset() + public function testSampleCsvDatasetWithHeaderRow() { $filePath = dirname(__FILE__).'/Resources/dataset.csv'; @@ -25,4 +25,14 @@ class CsvDatasetTest extends \PHPUnit_Framework_TestCase $this->assertEquals(10, count($dataset->getSamples())); $this->assertEquals(10, count($dataset->getLabels())); } + + public function testSampleCsvDatasetWithoutHeaderRow() + { + $filePath = dirname(__FILE__).'/Resources/dataset.csv'; + + $dataset = new CsvDataset($filePath, 2, false); + + $this->assertEquals(11, count($dataset->getSamples())); + $this->assertEquals(11, count($dataset->getLabels())); + } } diff --git a/tests/Phpml/Dataset/Demo/GlassTest.php b/tests/Phpml/Dataset/Demo/GlassTest.php new file mode 100644 index 0000000..6f6e177 --- /dev/null +++ b/tests/Phpml/Dataset/Demo/GlassTest.php @@ -0,0 +1,22 @@ +assertEquals(214, count($glass->getSamples())); + $this->assertEquals(214, count($glass->getLabels())); + + // one sample features count + $this->assertEquals(9, count($glass->getSamples()[0])); + } +} diff --git a/tests/Phpml/Dataset/Demo/WineTest.php b/tests/Phpml/Dataset/Demo/WineTest.php new file mode 100644 index 0000000..de16483 --- /dev/null +++ b/tests/Phpml/Dataset/Demo/WineTest.php @@ -0,0 +1,22 @@ +assertEquals(178, count($wine->getSamples())); + $this->assertEquals(178, count($wine->getLabels())); + + // one sample features count + $this->assertEquals(13, count($wine->getSamples()[0])); + } +} diff --git a/tests/Phpml/Math/Distance/ChebyshevTest.php b/tests/Phpml/Math/Distance/ChebyshevTest.php new file mode 100644 index 0000000..78fb2a0 --- /dev/null +++ b/tests/Phpml/Math/Distance/ChebyshevTest.php @@ -0,0 +1,64 @@ +distanceMetric = new Chebyshev(); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnInvalidArguments() + { + $a = [0, 1, 2]; + $b = [0, 2]; + + $this->distanceMetric->distance($a, $b); + } + + public function testCalculateDistanceForOneDimension() + { + $a = [4]; + $b = [2]; + + $expectedDistance = 2; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateDistanceForTwoDimensions() + { + $a = [4, 6]; + $b = [2, 5]; + + $expectedDistance = 2; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateDistanceForThreeDimensions() + { + $a = [6, 10, 3]; + $b = [2, 5, 5]; + + $expectedDistance = 5; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } +} diff --git a/tests/Phpml/Math/Distance/EuclideanTest.php b/tests/Phpml/Math/Distance/EuclideanTest.php new file mode 100644 index 0000000..a3dea3c --- /dev/null +++ b/tests/Phpml/Math/Distance/EuclideanTest.php @@ -0,0 +1,64 @@ +distanceMetric = new Euclidean(); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnInvalidArguments() + { + $a = [0, 1, 2]; + $b = [0, 2]; + + $this->distanceMetric->distance($a, $b); + } + + public function testCalculateDistanceForOneDimension() + { + $a = [4]; + $b = [2]; + + $expectedDistance = 2; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateDistanceForTwoDimensions() + { + $a = [4, 6]; + $b = [2, 5]; + + $expectedDistance = 2.2360679774998; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateDistanceForThreeDimensions() + { + $a = [6, 10, 3]; + $b = [2, 5, 5]; + + $expectedDistance = 6.7082039324993694; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } +} diff --git a/tests/Phpml/Math/Distance/ManhattanTest.php b/tests/Phpml/Math/Distance/ManhattanTest.php new file mode 100644 index 0000000..7d0cf2d --- /dev/null +++ b/tests/Phpml/Math/Distance/ManhattanTest.php @@ -0,0 +1,64 @@ +distanceMetric = new Manhattan(); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnInvalidArguments() + { + $a = [0, 1, 2]; + $b = [0, 2]; + + $this->distanceMetric->distance($a, $b); + } + + public function testCalculateDistanceForOneDimension() + { + $a = [4]; + $b = [2]; + + $expectedDistance = 2; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateDistanceForTwoDimensions() + { + $a = [4, 6]; + $b = [2, 5]; + + $expectedDistance = 3; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateDistanceForThreeDimensions() + { + $a = [6, 10, 3]; + $b = [2, 5, 5]; + + $expectedDistance = 11; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } +} diff --git a/tests/Phpml/Math/Distance/MinkowskiTest.php b/tests/Phpml/Math/Distance/MinkowskiTest.php new file mode 100644 index 0000000..ad9318d --- /dev/null +++ b/tests/Phpml/Math/Distance/MinkowskiTest.php @@ -0,0 +1,77 @@ +distanceMetric = new Minkowski(); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnInvalidArguments() + { + $a = [0, 1, 2]; + $b = [0, 2]; + + $this->distanceMetric->distance($a, $b); + } + + public function testCalculateDistanceForOneDimension() + { + $a = [4]; + $b = [2]; + + $expectedDistance = 2; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance); + } + + public function testCalculateDistanceForTwoDimensions() + { + $a = [4, 6]; + $b = [2, 5]; + + $expectedDistance = 2.080; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001); + } + + public function testCalculateDistanceForThreeDimensions() + { + $a = [6, 10, 3]; + $b = [2, 5, 5]; + + $expectedDistance = 5.819; + $actualDistance = $this->distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001); + } + + public function testCalculateDistanceForThreeDimensionsWithDifferentLambda() + { + $distanceMetric = new Minkowski($lambda = 5); + + $a = [6, 10, 3]; + $b = [2, 5, 5]; + + $expectedDistance = 5.300; + $actualDistance = $distanceMetric->distance($a, $b); + + $this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001); + } +} diff --git a/tests/Phpml/Math/Kernel/RBFTest.php b/tests/Phpml/Math/Kernel/RBFTest.php new file mode 100644 index 0000000..5b9bcb4 --- /dev/null +++ b/tests/Phpml/Math/Kernel/RBFTest.php @@ -0,0 +1,25 @@ +assertEquals(1, $rbf->compute([1, 2], [1, 2])); + $this->assertEquals(0.97336, $rbf->compute([1, 2, 3], [4, 5, 6]), '', $delta = 0.0001); + $this->assertEquals(0.00011, $rbf->compute([4, 5], [1, 100]), '', $delta = 0.0001); + + $rbf = new RBF($gamma = 0.2); + + $this->assertEquals(1, $rbf->compute([1, 2], [1, 2])); + $this->assertEquals(0.00451, $rbf->compute([1, 2, 3], [4, 5, 6]), '', $delta = 0.0001); + $this->assertEquals(0, $rbf->compute([4, 5], [1, 100])); + } +} diff --git a/tests/Phpml/Math/MatrixTest.php b/tests/Phpml/Math/MatrixTest.php new file mode 100644 index 0000000..64bb903 --- /dev/null +++ b/tests/Phpml/Math/MatrixTest.php @@ -0,0 +1,176 @@ +assertInstanceOf(Matrix::class, $matrix); + $this->assertEquals([[1], [2], [3], [4]], $matrix->toArray()); + $this->assertEquals(4, $matrix->getRows()); + $this->assertEquals(1, $matrix->getColumns()); + $this->assertEquals($flatArray, $matrix->getColumnValues(0)); + } + + /** + * @expectedException \Phpml\Exception\MatrixException + */ + public function testThrowExceptionOnInvalidColumnNumber() + { + $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix->getColumnValues(4); + } + + /** + * @expectedException \Phpml\Exception\MatrixException + */ + public function testThrowExceptionOnGetDeterminantIfArrayIsNotSquare() + { + $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix->getDeterminant(); + } + + public function testGetMatrixDeterminant() + { + //http://matrix.reshish.com/determinant.php + $matrix = new Matrix([ + [3, 3, 3], + [4, 2, 1], + [5, 6, 7], + ]); + $this->assertEquals(-3, $matrix->getDeterminant()); + + $matrix = new Matrix([ + [1, 2, 3, 3, 2, 1], + [1 / 2, 5, 6, 7, 1, 1], + [3 / 2, 7 / 2, 2, 0, 6, 8], + [1, 8, 10, 1, 2, 2], + [1 / 4, 4, 1, 0, 2, 3 / 7], + [1, 8, 7, 5, 4, 4 / 5], + ]); + $this->assertEquals(1116.5035, $matrix->getDeterminant(), '', $delta = 0.0001); + } + + public function testMatrixTranspose() + { + $matrix = new Matrix([ + [3, 3, 3], + [4, 2, 1], + [5, 6, 7], + ]); + + $transposedMatrix = [ + [3, 4, 5], + [3, 2, 6], + [3, 1, 7], + ]; + + $this->assertEquals($transposedMatrix, $matrix->transpose()->toArray()); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnMultiplyWhenInconsistentMatrixSupplied() + { + $matrix1 = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix2 = new Matrix([[3, 2, 1], [6, 5, 4]]); + + $matrix1->multiply($matrix2); + } + + public function testMatrixMultiplyByMatrix() + { + $matrix1 = new Matrix([ + [1, 2, 3], + [4, 5, 6], + ]); + + $matrix2 = new Matrix([ + [7, 8], + [9, 10], + [11, 12], + ]); + + $product = [ + [58, 64], + [139, 154], + ]; + + $this->assertEquals($product, $matrix1->multiply($matrix2)->toArray()); + } + + public function testDivideByScalar() + { + $matrix = new Matrix([ + [4, 6, 8], + [2, 10, 20], + ]); + + $quotient = [ + [2, 3, 4], + [1, 5, 10], + ]; + + $this->assertEquals($quotient, $matrix->divideByScalar(2)->toArray()); + } + + /** + * @expectedException \Phpml\Exception\MatrixException + */ + public function testThrowExceptionWhenInverseIfArrayIsNotSquare() + { + $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix->inverse(); + } + + public function testInverseMatrix() + { + //http://ncalculators.com/matrix/inverse-matrix.htm + $matrix = new Matrix([ + [3, 4, 2], + [4, 5, 5], + [1, 1, 1], + ]); + + $inverseMatrix = [ + [0, -1, 5], + [1 / 2, 1 / 2, -7 / 2], + [-1 / 2, 1 / 2, -1 / 2], + ]; + + $this->assertEquals($inverseMatrix, $matrix->inverse()->toArray(), '', $delta = 0.0001); + } + + public function testCrossOutMatrix() + { + $matrix = new Matrix([ + [3, 4, 2], + [4, 5, 5], + [1, 1, 1], + ]); + + $crossOuted = [ + [3, 2], + [1, 1], + ]; + + $this->assertEquals($crossOuted, $matrix->crossOut(1, 1)->toArray()); + } +} diff --git a/tests/Phpml/Math/ProductTest.php b/tests/Phpml/Math/ProductTest.php new file mode 100644 index 0000000..aba0ff2 --- /dev/null +++ b/tests/Phpml/Math/ProductTest.php @@ -0,0 +1,17 @@ +assertEquals(10, Product::scalar([2, 3], [-1, 4])); + $this->assertEquals(-0.1, Product::scalar([1, 4, 1], [-2, 0.5, -0.1])); + $this->assertEquals(8, Product::scalar([2], [4])); + } +} diff --git a/tests/Phpml/Math/Statistic/CorrelationTest.php b/tests/Phpml/Math/Statistic/CorrelationTest.php new file mode 100644 index 0000000..948dc16 --- /dev/null +++ b/tests/Phpml/Math/Statistic/CorrelationTest.php @@ -0,0 +1,38 @@ +assertEquals(-0.641, Correlation::pearson($x, $y), '', $delta); + + //http://www.statisticshowto.com/how-to-compute-pearsons-correlation-coefficients/ + $delta = 0.001; + $x = [43, 21, 25, 42, 57, 59]; + $y = [99, 65, 79, 75, 87, 82]; + $this->assertEquals(0.549, Correlation::pearson($x, $y), '', $delta); + + $delta = 0.001; + $x = [60, 61, 62, 63, 65]; + $y = [3.1, 3.6, 3.8, 4, 4.1]; + $this->assertEquals(0.911, Correlation::pearson($x, $y), '', $delta); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnInvalidArgumentsForPearsonCorrelation() + { + Correlation::pearson([1, 2, 4], [3, 5]); + } +} diff --git a/tests/Phpml/Math/Statistic/MeanTest.php b/tests/Phpml/Math/Statistic/MeanTest.php new file mode 100644 index 0000000..f0dca3b --- /dev/null +++ b/tests/Phpml/Math/Statistic/MeanTest.php @@ -0,0 +1,18 @@ +assertEquals(3.5, Mean::arithmetic([2, 5]), '', $delta); + $this->assertEquals(41.16, Mean::arithmetic([43, 21, 25, 42, 57, 59]), '', $delta); + $this->assertEquals(1.7, Mean::arithmetic([0.5, 0.5, 1.5, 2.5, 3.5]), '', $delta); + } +} diff --git a/tests/Phpml/Math/Statistic/StandardDeviationTest.php b/tests/Phpml/Math/Statistic/StandardDeviationTest.php new file mode 100644 index 0000000..299c979 --- /dev/null +++ b/tests/Phpml/Math/Statistic/StandardDeviationTest.php @@ -0,0 +1,42 @@ +assertEquals(1.825, StandardDeviation::population($population), '', $delta); + + //http://www.stat.wmich.edu/s216/book/node126.html + $delta = 0.5; + $population = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025]; + $this->assertEquals(4079, StandardDeviation::population($population), '', $delta); + + $population = [9300, 10565, 15000, 15000, 17764, 57000, 65940, 73676, 77006, 93739, 146088, 153260]; + $this->assertEquals(50989, StandardDeviation::population($population), '', $delta); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnEmptyArrayIfNotSample() + { + StandardDeviation::population([], false); + } + + /** + * @expectedException \Phpml\Exception\InvalidArgumentException + */ + public function testThrowExceptionOnToSmallArray() + { + StandardDeviation::population([1]); + } +} diff --git a/tests/Phpml/Metric/DistanceTest.php b/tests/Phpml/Metric/DistanceTest.php deleted file mode 100644 index b5fdc75..0000000 --- a/tests/Phpml/Metric/DistanceTest.php +++ /dev/null @@ -1,51 +0,0 @@ -assertEquals($expectedDistance, $actualDistance); - } - - public function testCalculateEuclideanDistanceForTwoAndMoreDimension() - { - $a = [4, 6]; - $b = [2, 5]; - - $expectedDistance = 2.2360679774998; - $actualDistance = Distance::euclidean($a, $b); - - $this->assertEquals($expectedDistance, $actualDistance); - - $a = [6, 10, 3]; - $b = [2, 5, 5]; - - $expectedDistance = 6.7082039324993694; - $actualDistance = Distance::euclidean($a, $b); - - $this->assertEquals($expectedDistance, $actualDistance); - } -} diff --git a/tests/Phpml/Regression/LeastSquaresTest.php b/tests/Phpml/Regression/LeastSquaresTest.php new file mode 100644 index 0000000..8bd444f --- /dev/null +++ b/tests/Phpml/Regression/LeastSquaresTest.php @@ -0,0 +1,68 @@ +train($samples, $targets); + + $this->assertEquals(4.06, $regression->predict([64]), '', $delta); + + //http://www.stat.wmich.edu/s216/book/node127.html + $samples = [[9300], [10565], [15000], [15000], [17764], [57000], [65940], [73676], [77006], [93739], [146088], [153260]]; + $targets = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025]; + + $regression = new LeastSquares(); + $regression->train($samples, $targets); + + $this->assertEquals(7659.35, $regression->predict([9300]), '', $delta); + $this->assertEquals(5213.81, $regression->predict([57000]), '', $delta); + $this->assertEquals(4188.13, $regression->predict([77006]), '', $delta); + $this->assertEquals(7659.35, $regression->predict([9300]), '', $delta); + $this->assertEquals(278.66, $regression->predict([153260]), '', $delta); + } + + public function testPredictSingleFeatureSamplesWithMatrixTargets() + { + $delta = 0.01; + + //https://www.easycalculation.com/analytical/learn-least-square-regression.php + $samples = [[60], [61], [62], [63], [65]]; + $targets = [[3.1], [3.6], [3.8], [4], [4.1]]; + + $regression = new LeastSquares(); + $regression->train($samples, $targets); + + $this->assertEquals(4.06, $regression->predict([64]), '', $delta); + } + + public function testPredictMultiFeaturesSamples() + { + $delta = 0.01; + + //http://www.stat.wmich.edu/s216/book/node129.html + $samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; + $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + + $regression = new LeastSquares(); + $regression->train($samples, $targets); + + $this->assertEquals(-800614.957, $regression->getIntercept(), '', $delta); + $this->assertEquals([-0.0327, 404.14], $regression->getCoefficients(), '', $delta); + $this->assertEquals(4094.82, $regression->predict([60000, 1996]), '', $delta); + $this->assertEquals(5711.40, $regression->predict([60000, 2000]), '', $delta); + } +}