diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..173228f --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*] +end_of_line = lf +charset = utf-8 +max_line_length = 80 +indent_style = space +indent_size = 4 +insert_final_newline = true diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..93f6619 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,14 @@ +* text=auto + +/docs export-ignore +/tests export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore +/.travis.yml export-ignore +/ecs.yml export-ignore +/CHANGELOG.md export-ignore +/CONTRIBUTING.md export-ignore +/mkdocs.yml export-ignore +/phpbench.json export-ignore +/phpstan.neon export-ignore +/phpunit.xml export-ignore diff --git a/.gitignore b/.gitignore index 73854f2..3adb3ef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ /vendor/ -humbuglog.* -/bin/phpunit +/build +/tests/Performance/Data/*.csv +.php_cs.cache +.phpunit.result.cache diff --git a/.travis.yml b/.travis.yml index 247e222..30ee823 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,38 @@ language: php -php: - - '7.0' -before_script: composer install -script: bin/phpunit \ No newline at end of file + +matrix: + fast_finish: true + + include: + - os: linux + php: '7.2' + env: PHPUNIT_FLAGS="--coverage-clover build/logs/clover.xml" DISABLE_XDEBUG="true" STATIC_ANALYSIS="true" + + - os: linux + php: '7.3' + + - os: linux + php: '7.4' + +cache: + directories: + - $HOME/.composer/cache + +before_install: + - if [[ $DISABLE_XDEBUG == "true" ]]; then phpenv config-rm xdebug.ini; fi + +install: + - curl -s http://getcomposer.org/installer | php + - php composer.phar install --no-interaction --ignore-platform-reqs + +script: + - vendor/bin/phpunit $PHPUNIT_FLAGS + - if [[ $STATIC_ANALYSIS != "" ]]; then composer check-cs; fi + - if [[ $STATIC_ANALYSIS != "" ]]; then composer phpstan; fi + +after_success: + - | + if [[ $PHPUNIT_FLAGS != "" ]]; then + wget https://github.com/php-coveralls/php-coveralls/releases/download/v2.0.0/php-coveralls.phar + php php-coveralls.phar --verbose; + fi diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..b403887 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,151 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.9.0] - Unreleased +### Added +- [Preprocessing] Implement LabelEncoder +- [Preprocessing] Implement ColumnFilter +- [Preprocessing] Implement LambdaTransformer +- [Preprocessing] Implement NumberConverter +- [Preprocessing] Implement OneHotEncoder +- [Workflow] Implement FeatureUnion +- [Metric] Add Regression metrics: meanSquaredError, meanSquaredLogarithmicError, meanAbsoluteError, medianAbsoluteError, r2Score, maxError +- [Regression] Implement DecisionTreeRegressor + +## [0.8.0] - 2019-03-20 +### Added +- [Tokenization] Added NGramTokenizer (#350) +- editorconfig file (#355) +### Fixed +- [Dataset] FilesDataset read samples without additional array (#363) +- [Tokenization] fixed error with numeric token values (#363) +### Changed +- [Math] improved performance with pow and sqrt replacement (#350) +- [Math] reduce duplicated code in distance metrics (#348) +- update phpunit to 7.5.1 (#335) +- code style fixes (#334) + +## [0.7.0] - 2018-11-07 +### Added +- [Clustering] added KMeans associative clustering (#262) +- [Dataset] added removeColumns function to ArrayDataset (#249) +- [Dataset] added a SvmDataset class for SVM-Light (or LibSVM) format files (#237) +- [Dataset] added Mnist Dataset for MNIST file format (#326) +- [Internal] Add performance test for LeastSquares (#263) + +### Changed +- [Internal] implement Keep a Changelog format +- [Classification] changed the default kernel type in SVC to Kernel::RBF (#267) +- [Optimizer] removed $initialTheta property and renamed setInitialTheta method to setTheta (#252) +- [Imputer] Throw exception when trying to transform without train data (#314) +- [Math] Micro optimization for matrix multiplication (#255) +- [Internal] Throw proper exception (#259, #251) +- [MLPClassifier] return labels in output (#315) +- [Internal] Update phpstan to 0.10.5 (#320) + +### Fixed +- [SVM] ensure DataTransformer::testSet samples array is not empty (#204) +- [Optimizer] optimizer initial theta randomization (#239) +- [Internal] travis build on osx (#281) +- [SVM] SVM locale (non-locale aware) (#288) +- [Internal] typo, tests, code styles and documentation fixes (#265, #261, #254, #253, #251, #250, #248, #245, #243, #317, #328) +- [Classification] Check if feature exist when predict target in NaiveBayes (#327) + +## [0.6.2] - 2018-02-22 +### Fixed +- Fix Apriori array keys (#238) + +## [0.6.1] - 2018-02-18 +### Fixed +- Fix KMeans and EigenvalueDecomposition (#235) + +## [0.6.0] - 2018-02-16 +- feature [FeatureSelection] implement SelectKBest with scoring functions (#232) +- feature [FeatureSelection] implement VarianceThreshold - simple baseline approach to feature selection. (#228) +- feature [Classification] support probability estimation in SVC (#218) +- feature [NeuralNetwork] configure an Activation Function per hidden layer (#208) +- feature [NeuralNetwork] Ability to update learningRate in MLP (#160) +- feature [Metric] Choose averaging method in classification report (#205) +- enhancement Add phpstan strict rules (#233) +- enhancement Flatten directory structure (#220) +- enhancement Update phpunit/phpunit (#219) +- enhancement Cache dependencies installed with composer on Travis (#215) +- enhancement Add support for coveralls.io (#153) +- enhancement Add phpstan and easy coding standards (#156, #168) +- enhancement Throw exception when libsvm command fails to run (#200, #202) +- enhancement Normalize composer.json and sort packages (#214, #210) +- enhancement Rewrite DBSCAN (#185) +- fix phpunit include tests path (#230) +- fix support of a rule in Apriori (#229) +- fix apriori generates an empty array as a part of the frequent item sets (#224) +- fix backpropagation random error (#157) +- fix logistic regression implementation (#169) +- fix activation functions support (#163) +- fix string representation of integer labels issue in NaiveBayes (#206) +- fix the implementation of conjugate gradient method (#184) +- typo, tests and documentation fixes (#234, #221, #181, #183, #155, #159, #165, #187, #154, #191, #203, #209, #213, #212, #211) + +## [0.5.0] - 2017-11-14 +- general [php] Upgrade to PHP 7.1 (#150) +- general [coding standard] fix imports order and drop unused docs typehints +- feature [NeuralNetwork] Add PReLU activation function (#128) +- feature [NeuralNetwork] Add ThresholdedReLU activation function (#129) +- feature [Dataset] Support CSV with long lines (#119) +- feature [NeuralNetwork] Neural networks partial training and persistency (#91) +- feature Add french stopwords (#92) +- feature New methods: setBinPath, setVarPath in SupportVectorMachine (#73) +- feature Linear Discrimant Analysis (LDA) (#82) +- feature Linear algebra operations, Dimensionality reduction and some other minor changes (#81) +- feature Partial training base (#78) +- feature Add delimiter option for CsvDataset (#66) +- feature LogisticRegression classifier & Optimization methods (#63) +- feature Additional training for SVR (#59) +- optimization Comparison - replace eval (#130) +- optimization Use C-style casts (#124) +- optimization Speed up DataTransformer (#122) +- bug DBSCAN fix for associative keys and array_merge performance optimization (#139) +- bug Ensure user-provided SupportVectorMachine paths are valid (#126) +- bug [DecisionTree] Fix string cast #120 (#121) +- bug fix invalid typehint for subs method (#110) +- bug Fix samples transformation in Pipeline training (#94) +- bug Fix division by 0 error during normalization (#83) +- bug Fix wrong docs references (#79) + +## [0.4.0] - 2017-02-23 +- feature [Classification] - Ensemble Classifiers : Bagging and RandomForest by Mustafa Karabulut +- feature [Classification] - RandomForest::getFeatureImportances() method by Mustafa Karabulut +- feature [Classification] - Linear classifiers: Perceptron, Adaline, DecisionStump by Mustafa Karabulut +- feature [Classification] - AdaBoost algorithm by Mustafa Karabulut +- bug [Math] - Check if matrix is singular doing inverse by Povilas Susinskas +- optimization - Euclidean optimization by Mustafa Karabulut + +## [0.3.0] - 2017-02-04 +- feature [Persistency] - ModelManager - save and restore trained models by David Monllaó +- feature [Classification] - DecisionTree implementation by Mustafa Karabulut +- feature [Clustering] - Fuzzy C Means implementation by Mustafa Karabulut +- other small fixes and code styles refactors + +## [0.2.1] - 2016-11-20 +- feature [Association] - Apriori algorithm implementation +- bug [Metric] - division by zero + +## [0.2.0] - 2016-08-14 +- feature [NeuralNetwork] - MultilayerPerceptron and Backpropagation training + +## [0.1.2] - 2016-07-24 +- feature [Dataset] - FilesDataset - load dataset from files (folder names as targets) +- feature [Metric] - ClassificationReport - report about trained classifier +- bug [Feature Extraction] - fix problem with token count vectorizer array order +- tests [General] - add more tests for specific conditions + +## [0.1.1] - 2016-07-12 +- feature [Cross Validation] Stratified Random Split - equal distribution for targets in split +- feature [General] Documentation - add missing pages (Pipeline, ConfusionMatrix and TfIdfTransformer) and fix links + +## [0.1.0] - 2016-07-08 +- first develop release +- base tools for Machine Learning: Algorithms, Cross Validation, Preprocessing, Feature Extraction +- bug [General] #7 - PHP-ML doesn't work on Mac diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..68dd849 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,66 @@ +# Contributing to PHP-ML + +PHP-ML is an open source project. If you'd like to contribute, please read the following text. Before I can merge your +Pull-Request here are some guidelines that you need to follow. These guidelines exist not to annoy you, but to keep the +code base clean, unified and future proof. + +## Branch + +You should only open pull requests against the `master` branch. + +## Unit Tests + +Please try to add a test for your pull-request. You can run the unit-tests by calling: + +```bash +vendor/bin/phpunit +``` + +## Performance Tests + +Before first run bootstrap script will download all necessary datasets from public repository `php-ai/php-ml-datasets`. + +Time performance tests: + +```bash +vendor/bin/phpbench run --report=time +``` + +Memory performance tests: + +```bash +vendor/bin/phpbench run --report=memory +``` + +## Travis + +GitHub automatically run your pull request through Travis CI. +If you break the tests, I cannot merge your code, so please make sure that your code is working before opening up a Pull-Request. + +## Merge + +Please give me time to review your pull requests. I will give my best to review everything as fast as possible, but cannot always live up to my own expectations. + +## Coding Standards & Static Analysis + +When contributing code to PHP-ML, you must follow its coding standards. To do that, just run: + +```bash +composer fix-cs +``` +[More about EasyCodingStandard](https://github.com/Symplify/EasyCodingStandard) + +Code has to also pass static analysis by [PHPStan](https://github.com/phpstan/phpstan): + +```bash +composer phpstan +``` + + +## Documentation + +Please update the documentation pages if necessary. You can find them in docs/. + +--- + +Thank you very much again for your contribution! diff --git a/LICENSE b/LICENSE index bd5cb2f..bcb7895 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ The MIT License (MIT) -Copyright (c) 2016 Arkadiusz Kondas +Copyright (c) 2016-2019 Arkadiusz Kondas +Copyright (c) 2018 Andrew DalPino Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 4b0e6a8..f34a49a 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,26 @@ # PHP-ML - Machine Learning library for PHP -[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop) -[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop) +[![Minimum PHP Version](https://img.shields.io/badge/php-%3E%3D%207.2-8892BF.svg)](https://php.net/) +[![Latest Stable Version](https://img.shields.io/packagist/v/php-ai/php-ml.svg)](https://packagist.org/packages/php-ai/php-ml) +[![Build Status](https://travis-ci.org/php-ai/php-ml.svg?branch=master)](https://travis-ci.org/php-ai/php-ml) +[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=master)](http://php-ml.readthedocs.org/) [![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml) [![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml) -[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop) +[![Coverage Status](https://coveralls.io/repos/github/php-ai/php-ml/badge.svg?branch=master)](https://coveralls.io/github/php-ai/php-ml?branch=master) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=master) -Fresh approach to Machine Learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... +

+ +

+ +Fresh approach to Machine Learning in PHP. Algorithms, Cross Validation, Neural Network, Preprocessing, Feature Extraction and much more in one library. + +PHP-ML requires PHP >= 7.2. Simple example of classification: ```php +require_once __DIR__ . '/vendor/autoload.php'; + use Phpml\Classification\KNearestNeighbors; $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; @@ -18,53 +29,121 @@ $labels = ['a', 'a', 'a', 'b', 'b', 'b']; $classifier = new KNearestNeighbors(); $classifier->train($samples, $labels); -$classifier->predict([3, 2]); +echo $classifier->predict([3, 2]); // return 'b' ``` +## Awards + + + + ## Documentation To find out how to use PHP-ML follow [Documentation](http://php-ml.readthedocs.org/). ## Installation -Currently this library is in the process of developing, but You can install it with Composer: +Currently this library is in the process of being developed, but You can install it with Composer: ``` composer require php-ai/php-ml ``` +## Examples + +Example scripts are available in a separate repository [php-ai/php-ml-examples](https://github.com/php-ai/php-ml-examples). + +## Datasets + +Public datasets are available in a separate repository [php-ai/php-ml-datasets](https://github.com/php-ai/php-ml-datasets). + ## Features +* Association rule learning + * [Apriori](http://php-ml.readthedocs.io/en/latest/machine-learning/association/apriori/) * Classification + * [SVC](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/svc/) * [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/) * [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/) + * Decision Tree (CART) + * Ensemble Algorithms + * Bagging (Bootstrap Aggregating) + * Random Forest + * AdaBoost + * Linear + * Adaline + * Decision Stump + * Perceptron + * LogisticRegression * Regression * [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/) + * [SVR](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/svr/) + * DecisionTreeRegressor * Clustering - * [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means) - * [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan) + * [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means/) + * [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan/) + * Fuzzy C-Means +* Metric + * [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/) + * [Confusion Matrix](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/confusion-matrix/) + * [Classification Report](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/classification-report/) + * Regression +* Workflow + * [Pipeline](http://php-ml.readthedocs.io/en/latest/machine-learning/workflow/pipeline) + * FeatureUnion +* Neural Network + * [Multilayer Perceptron Classifier](http://php-ml.readthedocs.io/en/latest/machine-learning/neural-network/multilayer-perceptron-classifier/) * Cross Validation - * [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split) + * [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split/) + * [Stratified Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/stratified-random-split/) +* Feature Selection + * [Variance Threshold](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-selection/variance-threshold/) + * [SelectKBest](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-selection/selectkbest/) +* Preprocessing + * [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization/) + * [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/) + * LabelEncoder + * LambdaTransformer + * NumberConverter + * ColumnFilter + * OneHotEncoder +* Feature Extraction + * [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/) + * NGramTokenizer + * WhitespaceTokenizer + * WordTokenizer + * [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/) +* Dimensionality Reduction + * PCA (Principal Component Analysis) + * Kernel PCA + * LDA (Linear Discriminant Analysis) * Datasets - * [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset) + * [Array](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/array-dataset/) + * [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/) + * [Files](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/files-dataset/) + * [SVM](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/svm-dataset/) + * [MNIST](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/mnist-dataset.md) * Ready to use: * [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/) + * [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/) + * [Glass](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/glass/) +* Models management + * [Persistency](http://php-ml.readthedocs.io/en/latest/machine-learning/model-manager/persistency/) * Math * [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/) * [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/) - + * [Set](http://php-ml.readthedocs.io/en/latest/math/set/) + * [Statistic](http://php-ml.readthedocs.io/en/latest/math/statistic/) + * Linear Algebra ## Contribute -- Issue Tracker: github.com/php-ai/php-ml/issues -- Source Code: github.com/php-ai/php-ml +- [Guide: CONTRIBUTING.md](https://github.com/php-ai/php-ml/blob/master/CONTRIBUTING.md) +- [Issue Tracker: github.com/php-ai/php-ml](https://github.com/php-ai/php-ml/issues) +- [Source Code: github.com/php-ai/php-ml](https://github.com/php-ai/php-ml) -After installation, you can launch the test suite in project root directory (you will need to install dev requirements with Composer) - -``` -bin/phpunit -``` +You can find more about contributing in [CONTRIBUTING.md](CONTRIBUTING.md). ## License diff --git a/bin/code-coverage.sh b/bin/code-coverage.sh new file mode 100755 index 0000000..a24c0e8 --- /dev/null +++ b/bin/code-coverage.sh @@ -0,0 +1,4 @@ +#!/bin/bash +echo "Run PHPUnit with code coverage" +bin/phpunit --coverage-html .coverage +google-chrome .coverage/index.html diff --git a/bin/libsvm/svm-predict-osx b/bin/libsvm/svm-predict-osx new file mode 100755 index 0000000..480a60b Binary files /dev/null and b/bin/libsvm/svm-predict-osx differ diff --git a/bin/libsvm/svm-predict.exe b/bin/libsvm/svm-predict.exe new file mode 100644 index 0000000..29760b0 Binary files /dev/null and b/bin/libsvm/svm-predict.exe differ diff --git a/bin/libsvm/svm-scale-osx b/bin/libsvm/svm-scale-osx new file mode 100755 index 0000000..0ac83f6 Binary files /dev/null and b/bin/libsvm/svm-scale-osx differ diff --git a/bin/libsvm/svm-scale.exe b/bin/libsvm/svm-scale.exe new file mode 100644 index 0000000..75489ae Binary files /dev/null and b/bin/libsvm/svm-scale.exe differ diff --git a/bin/libsvm/svm-train-osx b/bin/libsvm/svm-train-osx new file mode 100755 index 0000000..a716cda Binary files /dev/null and b/bin/libsvm/svm-train-osx differ diff --git a/bin/libsvm/svm-train.exe b/bin/libsvm/svm-train.exe new file mode 100644 index 0000000..23368b5 Binary files /dev/null and b/bin/libsvm/svm-train.exe differ diff --git a/composer.json b/composer.json index 041f818..5d8b511 100644 --- a/composer.json +++ b/composer.json @@ -2,27 +2,51 @@ "name": "php-ai/php-ml", "type": "library", "description": "PHP-ML - Machine Learning library for PHP", - "license": "MIT", - "keywords": ["machine learning","pattern recognition","computational learning theory","artificial intelligence"], + "keywords": [ + "machine learning", + "pattern recognition", + "neural network", + "computational learning theory", + "artificial intelligence", + "data science", + "feature extraction" + ], "homepage": "https://github.com/php-ai/php-ml", + "license": "MIT", "authors": [ { "name": "Arkadiusz Kondas", "email": "arkadiusz.kondas@gmail.com" } ], - "autoload": { - "psr-0": { - "Phpml": "src/" - } - }, "require": { - "php": ">=7.0.0" + "php": "^7.2" }, "require-dev": { - "phpunit/phpunit": "^5.2" + "phpbench/phpbench": "^0.16.0", + "phpstan/phpstan-phpunit": "^0.12", + "phpstan/phpstan": "^0.12", + "phpstan/phpstan-strict-rules": "^0.12", + "phpunit/phpunit": "^8.0", + "symplify/easy-coding-standard": "^6.0" }, "config": { - "bin-dir": "bin" + "preferred-install": "dist", + "sort-packages": true + }, + "autoload": { + "psr-4": { + "Phpml\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "Phpml\\Tests\\": "tests/" + } + }, + "scripts": { + "check-cs": "vendor/bin/ecs check src tests bin", + "fix-cs": "vendor/bin/ecs check src tests bin --fix", + "phpstan": "vendor/bin/phpstan.phar analyse src tests bin --level max --configuration phpstan.neon" } } diff --git a/composer.lock b/composer.lock index bf1fd1f..3a8a25f 100644 --- a/composer.lock +++ b/composer.lock @@ -1,41 +1,277 @@ { "_readme": [ "This file locks the dependencies of your project to a known state", - "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", + "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "hash": "f3e2d9975d300b3ea4c3568de44d8499", - "content-hash": "087091d0c339e9fa3a551a189ea658bf", + "content-hash": "914a4eb72418c2cf0d2321cafd474ac2", "packages": [], "packages-dev": [ { - "name": "doctrine/instantiator", - "version": "1.0.5", + "name": "beberlei/assert", + "version": "v3.2.7", "source": { "type": "git", - "url": "https://github.com/doctrine/instantiator.git", - "reference": "8e884e78f9f0eb1329e445619e04456e64d8051d" + "url": "https://github.com/beberlei/assert.git", + "reference": "d63a6943fc4fd1a2aedb65994e3548715105abcf" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/doctrine/instantiator/zipball/8e884e78f9f0eb1329e445619e04456e64d8051d", - "reference": "8e884e78f9f0eb1329e445619e04456e64d8051d", + "url": "https://api.github.com/repos/beberlei/assert/zipball/d63a6943fc4fd1a2aedb65994e3548715105abcf", + "reference": "d63a6943fc4fd1a2aedb65994e3548715105abcf", "shasum": "" }, "require": { - "php": ">=5.3,<8.0-DEV" + "ext-ctype": "*", + "ext-json": "*", + "ext-mbstring": "*", + "ext-simplexml": "*", + "php": "^7" }, "require-dev": { - "athletic/athletic": "~0.1.8", - "ext-pdo": "*", - "ext-phar": "*", - "phpunit/phpunit": "~4.0", - "squizlabs/php_codesniffer": "~2.0" + "friendsofphp/php-cs-fixer": "*", + "phpstan/phpstan-shim": "*", + "phpunit/phpunit": ">=6.0.0 <8" + }, + "suggest": { + "ext-intl": "Needed to allow Assertion::count(), Assertion::isCountable(), Assertion::minCount(), and Assertion::maxCount() to operate on ResourceBundles" + }, + "type": "library", + "autoload": { + "psr-4": { + "Assert\\": "lib/Assert" + }, + "files": [ + "lib/Assert/functions.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-2-Clause" + ], + "authors": [ + { + "name": "Benjamin Eberlei", + "email": "kontakt@beberlei.de", + "role": "Lead Developer" + }, + { + "name": "Richard Quadling", + "email": "rquadling@gmail.com", + "role": "Collaborator" + } + ], + "description": "Thin assertion library for input validation in business models.", + "keywords": [ + "assert", + "assertion", + "validation" + ], + "time": "2019-12-19T17:51:41+00:00" + }, + { + "name": "composer/semver", + "version": "1.5.1", + "source": { + "type": "git", + "url": "https://github.com/composer/semver.git", + "reference": "c6bea70230ef4dd483e6bbcab6005f682ed3a8de" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/composer/semver/zipball/c6bea70230ef4dd483e6bbcab6005f682ed3a8de", + "reference": "c6bea70230ef4dd483e6bbcab6005f682ed3a8de", + "shasum": "" + }, + "require": { + "php": "^5.3.2 || ^7.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.5 || ^5.0.5" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.0.x-dev" + "dev-master": "1.x-dev" + } + }, + "autoload": { + "psr-4": { + "Composer\\Semver\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nils Adermann", + "email": "naderman@naderman.de", + "homepage": "http://www.naderman.de" + }, + { + "name": "Jordi Boggiano", + "email": "j.boggiano@seld.be", + "homepage": "http://seld.be" + }, + { + "name": "Rob Bast", + "email": "rob.bast@gmail.com", + "homepage": "http://robbast.nl" + } + ], + "description": "Semver library that offers utilities, version constraint parsing and validation.", + "keywords": [ + "semantic", + "semver", + "validation", + "versioning" + ], + "time": "2020-01-13T12:06:48+00:00" + }, + { + "name": "composer/xdebug-handler", + "version": "1.4.1", + "source": { + "type": "git", + "url": "https://github.com/composer/xdebug-handler.git", + "reference": "1ab9842d69e64fb3a01be6b656501032d1b78cb7" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/composer/xdebug-handler/zipball/1ab9842d69e64fb3a01be6b656501032d1b78cb7", + "reference": "1ab9842d69e64fb3a01be6b656501032d1b78cb7", + "shasum": "" + }, + "require": { + "php": "^5.3.2 || ^7.0 || ^8.0", + "psr/log": "^1.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35 || ^5.7 || 6.5 - 8" + }, + "type": "library", + "autoload": { + "psr-4": { + "Composer\\XdebugHandler\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "John Stevenson", + "email": "john-stevenson@blueyonder.co.uk" + } + ], + "description": "Restarts a process without Xdebug.", + "keywords": [ + "Xdebug", + "performance" + ], + "time": "2020-03-01T12:26:26+00:00" + }, + { + "name": "doctrine/annotations", + "version": "v1.8.0", + "source": { + "type": "git", + "url": "https://github.com/doctrine/annotations.git", + "reference": "904dca4eb10715b92569fbcd79e201d5c349b6bc" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/doctrine/annotations/zipball/904dca4eb10715b92569fbcd79e201d5c349b6bc", + "reference": "904dca4eb10715b92569fbcd79e201d5c349b6bc", + "shasum": "" + }, + "require": { + "doctrine/lexer": "1.*", + "php": "^7.1" + }, + "require-dev": { + "doctrine/cache": "1.*", + "phpunit/phpunit": "^7.5" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.7.x-dev" + } + }, + "autoload": { + "psr-4": { + "Doctrine\\Common\\Annotations\\": "lib/Doctrine/Common/Annotations" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Guilherme Blanco", + "email": "guilhermeblanco@gmail.com" + }, + { + "name": "Roman Borschel", + "email": "roman@code-factory.org" + }, + { + "name": "Benjamin Eberlei", + "email": "kontakt@beberlei.de" + }, + { + "name": "Jonathan Wage", + "email": "jonwage@gmail.com" + }, + { + "name": "Johannes Schmitt", + "email": "schmittjoh@gmail.com" + } + ], + "description": "Docblock Annotations Parser", + "homepage": "http://www.doctrine-project.org", + "keywords": [ + "annotations", + "docblock", + "parser" + ], + "time": "2019-10-01T18:55:10+00:00" + }, + { + "name": "doctrine/instantiator", + "version": "1.3.0", + "source": { + "type": "git", + "url": "https://github.com/doctrine/instantiator.git", + "reference": "ae466f726242e637cebdd526a7d991b9433bacf1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/doctrine/instantiator/zipball/ae466f726242e637cebdd526a7d991b9433bacf1", + "reference": "ae466f726242e637cebdd526a7d991b9433bacf1", + "shasum": "" + }, + "require": { + "php": "^7.1" + }, + "require-dev": { + "doctrine/coding-standard": "^6.0", + "ext-pdo": "*", + "ext-phar": "*", + "phpbench/phpbench": "^0.13", + "phpstan/phpstan-phpunit": "^0.11", + "phpstan/phpstan-shim": "^0.11", + "phpunit/phpunit": "^7.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.2.x-dev" } }, "autoload": { @@ -55,46 +291,396 @@ } ], "description": "A small, lightweight utility to instantiate objects in PHP without invoking their constructors", - "homepage": "https://github.com/doctrine/instantiator", + "homepage": "https://www.doctrine-project.org/projects/instantiator.html", "keywords": [ "constructor", "instantiate" ], - "time": "2015-06-14 21:17:01" + "time": "2019-10-21T16:45:58+00:00" }, { - "name": "myclabs/deep-copy", - "version": "1.5.1", + "name": "doctrine/lexer", + "version": "1.2.0", "source": { "type": "git", - "url": "https://github.com/myclabs/DeepCopy.git", - "reference": "a8773992b362b58498eed24bf85005f363c34771" + "url": "https://github.com/doctrine/lexer.git", + "reference": "5242d66dbeb21a30dd8a3e66bf7a73b66e05e1f6" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/a8773992b362b58498eed24bf85005f363c34771", - "reference": "a8773992b362b58498eed24bf85005f363c34771", + "url": "https://api.github.com/repos/doctrine/lexer/zipball/5242d66dbeb21a30dd8a3e66bf7a73b66e05e1f6", + "reference": "5242d66dbeb21a30dd8a3e66bf7a73b66e05e1f6", "shasum": "" }, "require": { - "php": ">=5.4.0" + "php": "^7.2" }, "require-dev": { - "doctrine/collections": "1.*", - "phpunit/phpunit": "~4.1" + "doctrine/coding-standard": "^6.0", + "phpstan/phpstan": "^0.11.8", + "phpunit/phpunit": "^8.2" }, "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.2.x-dev" + } + }, "autoload": { "psr-4": { - "DeepCopy\\": "src/DeepCopy/" + "Doctrine\\Common\\Lexer\\": "lib/Doctrine/Common/Lexer" } }, "notification-url": "https://packagist.org/downloads/", "license": [ "MIT" ], + "authors": [ + { + "name": "Guilherme Blanco", + "email": "guilhermeblanco@gmail.com" + }, + { + "name": "Roman Borschel", + "email": "roman@code-factory.org" + }, + { + "name": "Johannes Schmitt", + "email": "schmittjoh@gmail.com" + } + ], + "description": "PHP Doctrine Lexer parser library that can be used in Top-Down, Recursive Descent Parsers.", + "homepage": "https://www.doctrine-project.org/projects/lexer.html", + "keywords": [ + "annotations", + "docblock", + "lexer", + "parser", + "php" + ], + "time": "2019-10-30T14:39:59+00:00" + }, + { + "name": "friendsofphp/php-cs-fixer", + "version": "v2.16.1", + "source": { + "type": "git", + "url": "https://github.com/FriendsOfPHP/PHP-CS-Fixer.git", + "reference": "c8afb599858876e95e8ebfcd97812d383fa23f02" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/FriendsOfPHP/PHP-CS-Fixer/zipball/c8afb599858876e95e8ebfcd97812d383fa23f02", + "reference": "c8afb599858876e95e8ebfcd97812d383fa23f02", + "shasum": "" + }, + "require": { + "composer/semver": "^1.4", + "composer/xdebug-handler": "^1.2", + "doctrine/annotations": "^1.2", + "ext-json": "*", + "ext-tokenizer": "*", + "php": "^5.6 || ^7.0", + "php-cs-fixer/diff": "^1.3", + "symfony/console": "^3.4.17 || ^4.1.6 || ^5.0", + "symfony/event-dispatcher": "^3.0 || ^4.0 || ^5.0", + "symfony/filesystem": "^3.0 || ^4.0 || ^5.0", + "symfony/finder": "^3.0 || ^4.0 || ^5.0", + "symfony/options-resolver": "^3.0 || ^4.0 || ^5.0", + "symfony/polyfill-php70": "^1.0", + "symfony/polyfill-php72": "^1.4", + "symfony/process": "^3.0 || ^4.0 || ^5.0", + "symfony/stopwatch": "^3.0 || ^4.0 || ^5.0" + }, + "require-dev": { + "johnkary/phpunit-speedtrap": "^1.1 || ^2.0 || ^3.0", + "justinrainbow/json-schema": "^5.0", + "keradus/cli-executor": "^1.2", + "mikey179/vfsstream": "^1.6", + "php-coveralls/php-coveralls": "^2.1", + "php-cs-fixer/accessible-object": "^1.0", + "php-cs-fixer/phpunit-constraint-isidenticalstring": "^1.1", + "php-cs-fixer/phpunit-constraint-xmlmatchesxsd": "^1.1", + "phpunit/phpunit": "^5.7.27 || ^6.5.14 || ^7.1", + "phpunitgoodpractices/traits": "^1.8", + "symfony/phpunit-bridge": "^4.3 || ^5.0", + "symfony/yaml": "^3.0 || ^4.0 || ^5.0" + }, + "suggest": { + "ext-mbstring": "For handling non-UTF8 characters in cache signature.", + "php-cs-fixer/phpunit-constraint-isidenticalstring": "For IsIdenticalString constraint.", + "php-cs-fixer/phpunit-constraint-xmlmatchesxsd": "For XmlMatchesXsd constraint.", + "symfony/polyfill-mbstring": "When enabling `ext-mbstring` is not possible." + }, + "bin": [ + "php-cs-fixer" + ], + "type": "application", + "autoload": { + "psr-4": { + "PhpCsFixer\\": "src/" + }, + "classmap": [ + "tests/Test/AbstractFixerTestCase.php", + "tests/Test/AbstractIntegrationCaseFactory.php", + "tests/Test/AbstractIntegrationTestCase.php", + "tests/Test/Assert/AssertTokensTrait.php", + "tests/Test/IntegrationCase.php", + "tests/Test/IntegrationCaseFactory.php", + "tests/Test/IntegrationCaseFactoryInterface.php", + "tests/Test/InternalIntegrationCaseFactory.php", + "tests/TestCase.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Dariusz Rumiński", + "email": "dariusz.ruminski@gmail.com" + } + ], + "description": "A tool to automatically fix PHP code style", + "time": "2019-11-25T22:10:32+00:00" + }, + { + "name": "jean85/pretty-package-versions", + "version": "1.2", + "source": { + "type": "git", + "url": "https://github.com/Jean85/pretty-package-versions.git", + "reference": "75c7effcf3f77501d0e0caa75111aff4daa0dd48" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Jean85/pretty-package-versions/zipball/75c7effcf3f77501d0e0caa75111aff4daa0dd48", + "reference": "75c7effcf3f77501d0e0caa75111aff4daa0dd48", + "shasum": "" + }, + "require": { + "ocramius/package-versions": "^1.2.0", + "php": "^7.0" + }, + "require-dev": { + "phpunit/phpunit": "^6.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.x-dev" + } + }, + "autoload": { + "psr-4": { + "Jean85\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Alessandro Lai", + "email": "alessandro.lai85@gmail.com" + } + ], + "description": "A wrapper for ocramius/package-versions to get pretty versions strings", + "keywords": [ + "composer", + "package", + "release", + "versions" + ], + "time": "2018-06-13T13:22:40+00:00" + }, + { + "name": "lstrojny/functional-php", + "version": "1.11.0", + "source": { + "type": "git", + "url": "https://github.com/lstrojny/functional-php.git", + "reference": "df0e516eb44cd0579eeaff57023ef41ffa11947f" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/lstrojny/functional-php/zipball/df0e516eb44cd0579eeaff57023ef41ffa11947f", + "reference": "df0e516eb44cd0579eeaff57023ef41ffa11947f", + "shasum": "" + }, + "require": { + "php": "~7" + }, + "require-dev": { + "friendsofphp/php-cs-fixer": "^2.14", + "phpunit/phpunit": "~7", + "squizlabs/php_codesniffer": "~3.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "Functional\\": "src/Functional" + }, + "files": [ + "src/Functional/Average.php", + "src/Functional/ButLast.php", + "src/Functional/Capture.php", + "src/Functional/ConstFunction.php", + "src/Functional/CompareOn.php", + "src/Functional/CompareObjectHashOn.php", + "src/Functional/Compose.php", + "src/Functional/Concat.php", + "src/Functional/Contains.php", + "src/Functional/Converge.php", + "src/Functional/Curry.php", + "src/Functional/CurryN.php", + "src/Functional/Difference.php", + "src/Functional/DropFirst.php", + "src/Functional/DropLast.php", + "src/Functional/Each.php", + "src/Functional/Equal.php", + "src/Functional/ErrorToException.php", + "src/Functional/Every.php", + "src/Functional/False.php", + "src/Functional/Falsy.php", + "src/Functional/Filter.php", + "src/Functional/First.php", + "src/Functional/FirstIndexOf.php", + "src/Functional/FlatMap.php", + "src/Functional/Flatten.php", + "src/Functional/Flip.php", + "src/Functional/GreaterThan.php", + "src/Functional/GreaterThanOrEqual.php", + "src/Functional/Group.php", + "src/Functional/Head.php", + "src/Functional/Id.php", + "src/Functional/IfElse.php", + "src/Functional/Identical.php", + "src/Functional/IndexesOf.php", + "src/Functional/Intersperse.php", + "src/Functional/Invoke.php", + "src/Functional/InvokeFirst.php", + "src/Functional/InvokeIf.php", + "src/Functional/InvokeLast.php", + "src/Functional/Invoker.php", + "src/Functional/Last.php", + "src/Functional/LastIndexOf.php", + "src/Functional/LessThan.php", + "src/Functional/LessThanOrEqual.php", + "src/Functional/LexicographicCompare.php", + "src/Functional/Map.php", + "src/Functional/Match.php", + "src/Functional/Maximum.php", + "src/Functional/Memoize.php", + "src/Functional/Minimum.php", + "src/Functional/None.php", + "src/Functional/Noop.php", + "src/Functional/Not.php", + "src/Functional/OmitKeys.php", + "src/Functional/PartialAny.php", + "src/Functional/PartialLeft.php", + "src/Functional/PartialMethod.php", + "src/Functional/PartialRight.php", + "src/Functional/Partition.php", + "src/Functional/Pick.php", + "src/Functional/Pluck.php", + "src/Functional/Poll.php", + "src/Functional/Product.php", + "src/Functional/Ratio.php", + "src/Functional/ReduceLeft.php", + "src/Functional/ReduceRight.php", + "src/Functional/Reindex.php", + "src/Functional/Reject.php", + "src/Functional/Repeat.php", + "src/Functional/Retry.php", + "src/Functional/Select.php", + "src/Functional/SelectKeys.php", + "src/Functional/SequenceConstant.php", + "src/Functional/SequenceExponential.php", + "src/Functional/SequenceLinear.php", + "src/Functional/Some.php", + "src/Functional/Sort.php", + "src/Functional/Sum.php", + "src/Functional/SuppressError.php", + "src/Functional/Tap.php", + "src/Functional/Tail.php", + "src/Functional/TailRecursion.php", + "src/Functional/TakeLeft.php", + "src/Functional/TakeRight.php", + "src/Functional/True.php", + "src/Functional/Truthy.php", + "src/Functional/Unique.php", + "src/Functional/With.php", + "src/Functional/Zip.php", + "src/Functional/ZipAll.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Lars Strojny", + "email": "lstrojny@php.net", + "homepage": "http://usrportage.de" + }, + { + "name": "Max Beutel", + "email": "nash12@gmail.com" + } + ], + "description": "Functional primitives for PHP", + "keywords": [ + "functional" + ], + "time": "2019-12-19T16:01:40+00:00" + }, + { + "name": "myclabs/deep-copy", + "version": "1.9.5", + "source": { + "type": "git", + "url": "https://github.com/myclabs/DeepCopy.git", + "reference": "b2c28789e80a97badd14145fda39b545d83ca3ef" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/myclabs/DeepCopy/zipball/b2c28789e80a97badd14145fda39b545d83ca3ef", + "reference": "b2c28789e80a97badd14145fda39b545d83ca3ef", + "shasum": "" + }, + "require": { + "php": "^7.1" + }, + "replace": { + "myclabs/deep-copy": "self.version" + }, + "require-dev": { + "doctrine/collections": "^1.0", + "doctrine/common": "^2.6", + "phpunit/phpunit": "^7.1" + }, + "type": "library", + "autoload": { + "psr-4": { + "DeepCopy\\": "src/DeepCopy/" + }, + "files": [ + "src/DeepCopy/deep_copy.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], "description": "Create deep copies (clones) of your objects", - "homepage": "https://github.com/myclabs/DeepCopy", "keywords": [ "clone", "copy", @@ -102,43 +688,712 @@ "object", "object graph" ], - "time": "2015-11-20 12:04:31" + "time": "2020-01-17T21:11:47+00:00" }, { - "name": "phpdocumentor/reflection-docblock", - "version": "2.0.4", + "name": "nette/finder", + "version": "v2.5.2", "source": { "type": "git", - "url": "https://github.com/phpDocumentor/ReflectionDocBlock.git", - "reference": "d68dbdc53dc358a816f00b300704702b2eaff7b8" + "url": "https://github.com/nette/finder.git", + "reference": "4ad2c298eb8c687dd0e74ae84206a4186eeaed50" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/d68dbdc53dc358a816f00b300704702b2eaff7b8", - "reference": "d68dbdc53dc358a816f00b300704702b2eaff7b8", + "url": "https://api.github.com/repos/nette/finder/zipball/4ad2c298eb8c687dd0e74ae84206a4186eeaed50", + "reference": "4ad2c298eb8c687dd0e74ae84206a4186eeaed50", "shasum": "" }, "require": { - "php": ">=5.3.3" + "nette/utils": "^2.4 || ^3.0", + "php": ">=7.1" + }, + "conflict": { + "nette/nette": "<2.2" }, "require-dev": { - "phpunit/phpunit": "~4.0" - }, - "suggest": { - "dflydev/markdown": "~1.0", - "erusev/parsedown": "~1.0" + "nette/tester": "^2.0", + "phpstan/phpstan": "^0.12", + "tracy/tracy": "^2.3" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "2.0.x-dev" + "dev-master": "2.5-dev" } }, "autoload": { - "psr-0": { - "phpDocumentor": [ - "src/" - ] + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause", + "GPL-2.0", + "GPL-3.0" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "🔍 Nette Finder: find files and directories with an intuitive API.", + "homepage": "https://nette.org", + "keywords": [ + "filesystem", + "glob", + "iterator", + "nette" + ], + "time": "2020-01-03T20:35:40+00:00" + }, + { + "name": "nette/robot-loader", + "version": "v3.2.2", + "source": { + "type": "git", + "url": "https://github.com/nette/robot-loader.git", + "reference": "38e8a270567a4ad9fe716b40fcda5a6580afa3c0" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nette/robot-loader/zipball/38e8a270567a4ad9fe716b40fcda5a6580afa3c0", + "reference": "38e8a270567a4ad9fe716b40fcda5a6580afa3c0", + "shasum": "" + }, + "require": { + "ext-tokenizer": "*", + "nette/finder": "^2.5 || ^3.0", + "nette/utils": "^3.0", + "php": ">=7.1" + }, + "require-dev": { + "nette/tester": "^2.0", + "phpstan/phpstan": "^0.12", + "tracy/tracy": "^2.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.2-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause", + "GPL-2.0-only", + "GPL-3.0-only" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "🍀 Nette RobotLoader: high performance and comfortable autoloader that will search and autoload classes within your application.", + "homepage": "https://nette.org", + "keywords": [ + "autoload", + "class", + "interface", + "nette", + "trait" + ], + "time": "2020-02-20T22:17:50+00:00" + }, + { + "name": "nette/utils", + "version": "v3.1.1", + "source": { + "type": "git", + "url": "https://github.com/nette/utils.git", + "reference": "2c17d16d8887579ae1c0898ff94a3668997fd3eb" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/nette/utils/zipball/2c17d16d8887579ae1c0898ff94a3668997fd3eb", + "reference": "2c17d16d8887579ae1c0898ff94a3668997fd3eb", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "require-dev": { + "nette/tester": "~2.0", + "phpstan/phpstan": "^0.12", + "tracy/tracy": "^2.3" + }, + "suggest": { + "ext-gd": "to use Image", + "ext-iconv": "to use Strings::webalize() and toAscii()", + "ext-intl": "to use Strings::webalize(), toAscii(), normalize() and compare()", + "ext-json": "to use Nette\\Utils\\Json", + "ext-mbstring": "to use Strings::lower() etc...", + "ext-tokenizer": "to use Nette\\Utils\\Reflection::getUseStatements()", + "ext-xml": "to use Strings::length() etc. when mbstring is not available" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.1-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause", + "GPL-2.0-only", + "GPL-3.0-only" + ], + "authors": [ + { + "name": "David Grudl", + "homepage": "https://davidgrudl.com" + }, + { + "name": "Nette Community", + "homepage": "https://nette.org/contributors" + } + ], + "description": "🛠 Nette Utils: lightweight utilities for string & array manipulation, image handling, safe JSON encoding/decoding, validation, slug or strong password generating etc.", + "homepage": "https://nette.org", + "keywords": [ + "array", + "core", + "datetime", + "images", + "json", + "nette", + "paginator", + "password", + "slugify", + "string", + "unicode", + "utf-8", + "utility", + "validation" + ], + "time": "2020-02-09T14:10:55+00:00" + }, + { + "name": "ocramius/package-versions", + "version": "1.5.1", + "source": { + "type": "git", + "url": "https://github.com/Ocramius/PackageVersions.git", + "reference": "1d32342b8c1eb27353c8887c366147b4c2da673c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Ocramius/PackageVersions/zipball/1d32342b8c1eb27353c8887c366147b4c2da673c", + "reference": "1d32342b8c1eb27353c8887c366147b4c2da673c", + "shasum": "" + }, + "require": { + "composer-plugin-api": "^1.0.0", + "php": "^7.3.0" + }, + "require-dev": { + "composer/composer": "^1.8.6", + "doctrine/coding-standard": "^6.0.0", + "ext-zip": "*", + "infection/infection": "^0.13.4", + "phpunit/phpunit": "^8.2.5", + "vimeo/psalm": "^3.4.9" + }, + "type": "composer-plugin", + "extra": { + "class": "PackageVersions\\Installer", + "branch-alias": { + "dev-master": "1.6.x-dev" + } + }, + "autoload": { + "psr-4": { + "PackageVersions\\": "src/PackageVersions" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Marco Pivetta", + "email": "ocramius@gmail.com" + } + ], + "description": "Composer plugin that provides efficient querying for installed package versions (no runtime IO)", + "time": "2019-07-17T15:49:50+00:00" + }, + { + "name": "paragonie/random_compat", + "version": "v9.99.99", + "source": { + "type": "git", + "url": "https://github.com/paragonie/random_compat.git", + "reference": "84b4dfb120c6f9b4ff7b3685f9b8f1aa365a0c95" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/paragonie/random_compat/zipball/84b4dfb120c6f9b4ff7b3685f9b8f1aa365a0c95", + "reference": "84b4dfb120c6f9b4ff7b3685f9b8f1aa365a0c95", + "shasum": "" + }, + "require": { + "php": "^7" + }, + "require-dev": { + "phpunit/phpunit": "4.*|5.*", + "vimeo/psalm": "^1" + }, + "suggest": { + "ext-libsodium": "Provides a modern crypto API that can be used to generate random bytes." + }, + "type": "library", + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Paragon Initiative Enterprises", + "email": "security@paragonie.com", + "homepage": "https://paragonie.com" + } + ], + "description": "PHP 5.x polyfill for random_bytes() and random_int() from PHP 7", + "keywords": [ + "csprng", + "polyfill", + "pseudorandom", + "random" + ], + "time": "2018-07-02T15:55:56+00:00" + }, + { + "name": "phar-io/manifest", + "version": "1.0.3", + "source": { + "type": "git", + "url": "https://github.com/phar-io/manifest.git", + "reference": "7761fcacf03b4d4f16e7ccb606d4879ca431fcf4" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phar-io/manifest/zipball/7761fcacf03b4d4f16e7ccb606d4879ca431fcf4", + "reference": "7761fcacf03b4d4f16e7ccb606d4879ca431fcf4", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-phar": "*", + "phar-io/version": "^2.0", + "php": "^5.6 || ^7.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Arne Blankerts", + "email": "arne@blankerts.de", + "role": "Developer" + }, + { + "name": "Sebastian Heuer", + "email": "sebastian@phpeople.de", + "role": "Developer" + }, + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "Developer" + } + ], + "description": "Component for reading phar.io manifest information from a PHP Archive (PHAR)", + "time": "2018-07-08T19:23:20+00:00" + }, + { + "name": "phar-io/version", + "version": "2.0.1", + "source": { + "type": "git", + "url": "https://github.com/phar-io/version.git", + "reference": "45a2ec53a73c70ce41d55cedef9063630abaf1b6" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phar-io/version/zipball/45a2ec53a73c70ce41d55cedef9063630abaf1b6", + "reference": "45a2ec53a73c70ce41d55cedef9063630abaf1b6", + "shasum": "" + }, + "require": { + "php": "^5.6 || ^7.0" + }, + "type": "library", + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Arne Blankerts", + "email": "arne@blankerts.de", + "role": "Developer" + }, + { + "name": "Sebastian Heuer", + "email": "sebastian@phpeople.de", + "role": "Developer" + }, + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "Developer" + } + ], + "description": "Library for handling version information and constraints", + "time": "2018-07-08T19:19:57+00:00" + }, + { + "name": "php-cs-fixer/diff", + "version": "v1.3.0", + "source": { + "type": "git", + "url": "https://github.com/PHP-CS-Fixer/diff.git", + "reference": "78bb099e9c16361126c86ce82ec4405ebab8e756" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/PHP-CS-Fixer/diff/zipball/78bb099e9c16361126c86ce82ec4405ebab8e756", + "reference": "78bb099e9c16361126c86ce82ec4405ebab8e756", + "shasum": "" + }, + "require": { + "php": "^5.6 || ^7.0" + }, + "require-dev": { + "phpunit/phpunit": "^5.7.23 || ^6.4.3", + "symfony/process": "^3.3" + }, + "type": "library", + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Kore Nordmann", + "email": "mail@kore-nordmann.de" + }, + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + }, + { + "name": "SpacePossum" + } + ], + "description": "sebastian/diff v2 backport support for PHP5.6", + "homepage": "https://github.com/PHP-CS-Fixer", + "keywords": [ + "diff" + ], + "time": "2018-02-15T16:58:55+00:00" + }, + { + "name": "phpbench/container", + "version": "1.2", + "source": { + "type": "git", + "url": "https://github.com/phpbench/container.git", + "reference": "c0e3cbf1cd8f867c70b029cb6d1b0b39fe6d409d" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpbench/container/zipball/c0e3cbf1cd8f867c70b029cb6d1b0b39fe6d409d", + "reference": "c0e3cbf1cd8f867c70b029cb6d1b0b39fe6d409d", + "shasum": "" + }, + "require": { + "psr/container": "^1.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.36" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "psr-4": { + "PhpBench\\DependencyInjection\\": "lib/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Daniel Leech", + "email": "daniel@dantleech.com" + } + ], + "description": "Simple, configurable, service container.", + "time": "2018-02-12T08:08:59+00:00" + }, + { + "name": "phpbench/dom", + "version": "0.2.0", + "source": { + "type": "git", + "url": "https://github.com/phpbench/dom.git", + "reference": "b135378dd0004c05ba5446aeddaf0b83339c1c4c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpbench/dom/zipball/b135378dd0004c05ba5446aeddaf0b83339c1c4c", + "reference": "b135378dd0004c05ba5446aeddaf0b83339c1c4c", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "php": "^5.4|^7.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.6" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "psr-4": { + "PhpBench\\Dom\\": "lib/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Daniel Leech", + "email": "daniel@dantleech.com" + } + ], + "description": "DOM wrapper to simplify working with the PHP DOM implementation", + "time": "2016-02-27T12:15:56+00:00" + }, + { + "name": "phpbench/phpbench", + "version": "0.16.10", + "source": { + "type": "git", + "url": "https://github.com/phpbench/phpbench.git", + "reference": "00c18b1ab87dbda66e8972c8602a14dd08c69914" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpbench/phpbench/zipball/00c18b1ab87dbda66e8972c8602a14dd08c69914", + "reference": "00c18b1ab87dbda66e8972c8602a14dd08c69914", + "shasum": "" + }, + "require": { + "beberlei/assert": "^2.4 || ^3.0", + "doctrine/annotations": "^1.2.7", + "ext-dom": "*", + "ext-json": "*", + "ext-pcre": "*", + "ext-reflection": "*", + "ext-spl": "*", + "lstrojny/functional-php": "1.0 || ^1.2.3", + "php": "^7.1", + "phpbench/container": "~1.2", + "phpbench/dom": "~0.2.0", + "seld/jsonlint": "^1.1", + "symfony/console": "^3.2 || ^4.0", + "symfony/debug": "^2.4 || ^3.0 || ^4.0", + "symfony/filesystem": "^2.4 || ^3.0 || ^4.0", + "symfony/finder": "^2.4 || ^3.0 || ^4.0", + "symfony/options-resolver": "^2.6 || ^3.0 || ^4.0", + "symfony/process": "^2.1 || ^3.0 || ^4.0", + "webmozart/path-util": "^2.3" + }, + "require-dev": { + "doctrine/dbal": "^2.4", + "friendsofphp/php-cs-fixer": "^2.13.1", + "padraic/phar-updater": "^1.0", + "phpstan/phpstan": "^0.10.7", + "phpunit/phpunit": "^6.5 || ^7.0" + }, + "suggest": { + "ext-curl": "For (web) reports extension", + "ext-xdebug": "For Xdebug profiling extension." + }, + "bin": [ + "bin/phpbench" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "autoload": { + "psr-4": { + "PhpBench\\": "lib/", + "PhpBench\\Extensions\\Dbal\\": "extensions/dbal/lib/", + "PhpBench\\Extensions\\XDebug\\": "extensions/xdebug/lib/", + "PhpBench\\Extensions\\Reports\\": "extensions/reports/lib/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Daniel Leech", + "email": "daniel@dantleech.com" + } + ], + "description": "PHP Benchmarking Framework", + "time": "2019-09-01T08:08:02+00:00" + }, + { + "name": "phpdocumentor/reflection-common", + "version": "2.0.0", + "source": { + "type": "git", + "url": "https://github.com/phpDocumentor/ReflectionCommon.git", + "reference": "63a995caa1ca9e5590304cd845c15ad6d482a62a" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpDocumentor/ReflectionCommon/zipball/63a995caa1ca9e5590304cd845c15ad6d482a62a", + "reference": "63a995caa1ca9e5590304cd845c15ad6d482a62a", + "shasum": "" + }, + "require": { + "php": ">=7.1" + }, + "require-dev": { + "phpunit/phpunit": "~6" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.x-dev" + } + }, + "autoload": { + "psr-4": { + "phpDocumentor\\Reflection\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jaap van Otterdijk", + "email": "opensource@ijaap.nl" + } + ], + "description": "Common reflection classes used by phpdocumentor to reflect the code structure", + "homepage": "http://www.phpdoc.org", + "keywords": [ + "FQSEN", + "phpDocumentor", + "phpdoc", + "reflection", + "static analysis" + ], + "time": "2018-08-07T13:53:10+00:00" + }, + { + "name": "phpdocumentor/reflection-docblock", + "version": "5.1.0", + "source": { + "type": "git", + "url": "https://github.com/phpDocumentor/ReflectionDocBlock.git", + "reference": "cd72d394ca794d3466a3b2fc09d5a6c1dc86b47e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpDocumentor/ReflectionDocBlock/zipball/cd72d394ca794d3466a3b2fc09d5a6c1dc86b47e", + "reference": "cd72d394ca794d3466a3b2fc09d5a6c1dc86b47e", + "shasum": "" + }, + "require": { + "ext-filter": "^7.1", + "php": "^7.2", + "phpdocumentor/reflection-common": "^2.0", + "phpdocumentor/type-resolver": "^1.0", + "webmozart/assert": "^1" + }, + "require-dev": { + "doctrine/instantiator": "^1", + "mockery/mockery": "^1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.x-dev" + } + }, + "autoload": { + "psr-4": { + "phpDocumentor\\Reflection\\": "src" } }, "notification-url": "https://packagist.org/downloads/", @@ -148,44 +1403,96 @@ "authors": [ { "name": "Mike van Riel", - "email": "mike.vanriel@naenius.com" + "email": "me@mikevanriel.com" + }, + { + "name": "Jaap van Otterdijk", + "email": "account@ijaap.nl" } ], - "time": "2015-02-03 12:10:50" + "description": "With this component, a library can provide support for annotations via DocBlocks or otherwise retrieve information that is embedded in a DocBlock.", + "time": "2020-02-22T12:28:44+00:00" }, { - "name": "phpspec/prophecy", - "version": "v1.6.0", + "name": "phpdocumentor/type-resolver", + "version": "1.1.0", "source": { "type": "git", - "url": "https://github.com/phpspec/prophecy.git", - "reference": "3c91bdf81797d725b14cb62906f9a4ce44235972" + "url": "https://github.com/phpDocumentor/TypeResolver.git", + "reference": "7462d5f123dfc080dfdf26897032a6513644fc95" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/phpspec/prophecy/zipball/3c91bdf81797d725b14cb62906f9a4ce44235972", - "reference": "3c91bdf81797d725b14cb62906f9a4ce44235972", + "url": "https://api.github.com/repos/phpDocumentor/TypeResolver/zipball/7462d5f123dfc080dfdf26897032a6513644fc95", + "reference": "7462d5f123dfc080dfdf26897032a6513644fc95", + "shasum": "" + }, + "require": { + "php": "^7.2", + "phpdocumentor/reflection-common": "^2.0" + }, + "require-dev": { + "ext-tokenizer": "^7.2", + "mockery/mockery": "~1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.x-dev" + } + }, + "autoload": { + "psr-4": { + "phpDocumentor\\Reflection\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Mike van Riel", + "email": "me@mikevanriel.com" + } + ], + "description": "A PSR-5 based resolver of Class names, Types and Structural Element Names", + "time": "2020-02-18T18:59:58+00:00" + }, + { + "name": "phpspec/prophecy", + "version": "v1.10.2", + "source": { + "type": "git", + "url": "https://github.com/phpspec/prophecy.git", + "reference": "b4400efc9d206e83138e2bb97ed7f5b14b831cd9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpspec/prophecy/zipball/b4400efc9d206e83138e2bb97ed7f5b14b831cd9", + "reference": "b4400efc9d206e83138e2bb97ed7f5b14b831cd9", "shasum": "" }, "require": { "doctrine/instantiator": "^1.0.2", "php": "^5.3|^7.0", - "phpdocumentor/reflection-docblock": "~2.0", - "sebastian/comparator": "~1.1", - "sebastian/recursion-context": "~1.0" + "phpdocumentor/reflection-docblock": "^2.0|^3.0.2|^4.0|^5.0", + "sebastian/comparator": "^1.2.3|^2.0|^3.0|^4.0", + "sebastian/recursion-context": "^1.0|^2.0|^3.0|^4.0" }, "require-dev": { - "phpspec/phpspec": "~2.0" + "phpspec/phpspec": "^2.5 || ^3.2", + "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.5 || ^7.1" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.5.x-dev" + "dev-master": "1.10.x-dev" } }, "autoload": { - "psr-0": { - "Prophecy\\": "src/" + "psr-4": { + "Prophecy\\": "src/Prophecy" } }, "notification-url": "https://packagist.org/downloads/", @@ -213,44 +1520,237 @@ "spy", "stub" ], - "time": "2016-02-15 07:46:21" + "time": "2020-01-20T15:57:02+00:00" }, { - "name": "phpunit/php-code-coverage", - "version": "3.3.1", + "name": "phpstan/phpdoc-parser", + "version": "0.3.5", "source": { "type": "git", - "url": "https://github.com/sebastianbergmann/php-code-coverage.git", - "reference": "2431befdd451fac43fbcde94d1a92fb3b8b68f86" + "url": "https://github.com/phpstan/phpdoc-parser.git", + "reference": "8c4ef2aefd9788238897b678a985e1d5c8df6db4" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/2431befdd451fac43fbcde94d1a92fb3b8b68f86", - "reference": "2431befdd451fac43fbcde94d1a92fb3b8b68f86", + "url": "https://api.github.com/repos/phpstan/phpdoc-parser/zipball/8c4ef2aefd9788238897b678a985e1d5c8df6db4", + "reference": "8c4ef2aefd9788238897b678a985e1d5c8df6db4", "shasum": "" }, "require": { - "php": "^5.6 || ^7.0", - "phpunit/php-file-iterator": "~1.3", - "phpunit/php-text-template": "~1.2", - "phpunit/php-token-stream": "^1.4.2", - "sebastian/code-unit-reverse-lookup": "~1.0", - "sebastian/environment": "^1.3.2", - "sebastian/version": "~1.0|~2.0" + "php": "~7.1" }, "require-dev": { - "ext-xdebug": ">=2.1.4", - "phpunit/phpunit": "~5" - }, - "suggest": { - "ext-dom": "*", - "ext-xdebug": ">=2.4.0", - "ext-xmlwriter": "*" + "consistence/coding-standard": "^3.5", + "jakub-onderka/php-parallel-lint": "^0.9.2", + "phing/phing": "^2.16.0", + "phpstan/phpstan": "^0.10", + "phpunit/phpunit": "^6.3", + "slevomat/coding-standard": "^4.7.2", + "squizlabs/php_codesniffer": "^3.3.2", + "symfony/process": "^3.4 || ^4.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "3.3.x-dev" + "dev-master": "0.3-dev" + } + }, + "autoload": { + "psr-4": { + "PHPStan\\PhpDocParser\\": [ + "src/" + ] + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "PHPDoc parser with support for nullable, intersection and generic types", + "time": "2019-06-07T19:13:52+00:00" + }, + { + "name": "phpstan/phpstan", + "version": "0.12.13", + "source": { + "type": "git", + "url": "https://github.com/phpstan/phpstan.git", + "reference": "d74fb5ce1ab9f24a7128db90e99dec82440975c3" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpstan/phpstan/zipball/d74fb5ce1ab9f24a7128db90e99dec82440975c3", + "reference": "d74fb5ce1ab9f24a7128db90e99dec82440975c3", + "shasum": "" + }, + "require": { + "php": "^7.1" + }, + "bin": [ + "phpstan", + "phpstan.phar" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "0.12-dev" + } + }, + "autoload": { + "files": [ + "bootstrap.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "PHPStan - PHP Static Analysis Tool", + "time": "2020-03-02T13:08:55+00:00" + }, + { + "name": "phpstan/phpstan-phpunit", + "version": "0.12.6", + "source": { + "type": "git", + "url": "https://github.com/phpstan/phpstan-phpunit.git", + "reference": "26394996368b6d033d012547d3197f4e07e23021" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpstan/phpstan-phpunit/zipball/26394996368b6d033d012547d3197f4e07e23021", + "reference": "26394996368b6d033d012547d3197f4e07e23021", + "shasum": "" + }, + "require": { + "php": "~7.1", + "phpstan/phpstan": "^0.12.4" + }, + "conflict": { + "phpunit/phpunit": "<7.0" + }, + "require-dev": { + "consistence/coding-standard": "^3.5", + "dealerdirect/phpcodesniffer-composer-installer": "^0.4.4", + "ergebnis/composer-normalize": "^2.0.2", + "jakub-onderka/php-parallel-lint": "^1.0", + "phing/phing": "^2.16.0", + "phpstan/phpstan-strict-rules": "^0.12", + "phpunit/phpunit": "^7.0", + "satooshi/php-coveralls": "^1.0", + "slevomat/coding-standard": "^4.7.2" + }, + "type": "phpstan-extension", + "extra": { + "branch-alias": { + "dev-master": "0.12-dev" + }, + "phpstan": { + "includes": [ + "extension.neon", + "rules.neon" + ] + } + }, + "autoload": { + "psr-4": { + "PHPStan\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "PHPUnit extensions and rules for PHPStan", + "time": "2020-01-10T12:07:21+00:00" + }, + { + "name": "phpstan/phpstan-strict-rules", + "version": "0.12.2", + "source": { + "type": "git", + "url": "https://github.com/phpstan/phpstan-strict-rules.git", + "reference": "a670a59aff7cf96f75d21b974860ada10e25b2ee" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/phpstan/phpstan-strict-rules/zipball/a670a59aff7cf96f75d21b974860ada10e25b2ee", + "reference": "a670a59aff7cf96f75d21b974860ada10e25b2ee", + "shasum": "" + }, + "require": { + "php": "~7.1", + "phpstan/phpstan": "^0.12.6" + }, + "require-dev": { + "consistence/coding-standard": "^3.0.1", + "dealerdirect/phpcodesniffer-composer-installer": "^0.4.4", + "ergebnis/composer-normalize": "^2.0.2", + "jakub-onderka/php-parallel-lint": "^1.0", + "phing/phing": "^2.16.0", + "phpstan/phpstan-phpunit": "^0.12", + "phpunit/phpunit": "^7.0", + "slevomat/coding-standard": "^4.5.2" + }, + "type": "phpstan-extension", + "extra": { + "branch-alias": { + "dev-master": "0.12-dev" + }, + "phpstan": { + "includes": [ + "rules.neon" + ] + } + }, + "autoload": { + "psr-4": { + "PHPStan\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "Extra strict and opinionated rules for PHPStan", + "time": "2020-01-20T13:08:52+00:00" + }, + { + "name": "phpunit/php-code-coverage", + "version": "7.0.10", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/php-code-coverage.git", + "reference": "f1884187926fbb755a9aaf0b3836ad3165b478bf" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/php-code-coverage/zipball/f1884187926fbb755a9aaf0b3836ad3165b478bf", + "reference": "f1884187926fbb755a9aaf0b3836ad3165b478bf", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-xmlwriter": "*", + "php": "^7.2", + "phpunit/php-file-iterator": "^2.0.2", + "phpunit/php-text-template": "^1.2.1", + "phpunit/php-token-stream": "^3.1.1", + "sebastian/code-unit-reverse-lookup": "^1.0.1", + "sebastian/environment": "^4.2.2", + "sebastian/version": "^2.0.1", + "theseer/tokenizer": "^1.1.3" + }, + "require-dev": { + "phpunit/phpunit": "^8.2.2" + }, + "suggest": { + "ext-xdebug": "^2.7.2" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "7.0-dev" } }, "autoload": { @@ -265,7 +1765,7 @@ "authors": [ { "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", + "email": "sebastian@phpunit.de", "role": "lead" } ], @@ -276,29 +1776,32 @@ "testing", "xunit" ], - "time": "2016-04-08 08:14:53" + "time": "2019-11-20T13:55:58+00:00" }, { "name": "phpunit/php-file-iterator", - "version": "1.4.1", + "version": "2.0.2", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/php-file-iterator.git", - "reference": "6150bf2c35d3fc379e50c7602b75caceaa39dbf0" + "reference": "050bedf145a257b1ff02746c31894800e5122946" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-file-iterator/zipball/6150bf2c35d3fc379e50c7602b75caceaa39dbf0", - "reference": "6150bf2c35d3fc379e50c7602b75caceaa39dbf0", + "url": "https://api.github.com/repos/sebastianbergmann/php-file-iterator/zipball/050bedf145a257b1ff02746c31894800e5122946", + "reference": "050bedf145a257b1ff02746c31894800e5122946", "shasum": "" }, "require": { - "php": ">=5.3.3" + "php": "^7.1" + }, + "require-dev": { + "phpunit/phpunit": "^7.1" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.4.x-dev" + "dev-master": "2.0.x-dev" } }, "autoload": { @@ -313,7 +1816,7 @@ "authors": [ { "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", + "email": "sebastian@phpunit.de", "role": "lead" } ], @@ -323,7 +1826,7 @@ "filesystem", "iterator" ], - "time": "2015-06-21 13:08:43" + "time": "2018-09-13T20:33:42+00:00" }, { "name": "phpunit/php-text-template", @@ -364,26 +1867,34 @@ "keywords": [ "template" ], - "time": "2015-06-21 13:50:34" + "time": "2015-06-21T13:50:34+00:00" }, { "name": "phpunit/php-timer", - "version": "1.0.7", + "version": "2.1.2", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/php-timer.git", - "reference": "3e82f4e9fc92665fafd9157568e4dcb01d014e5b" + "reference": "1038454804406b0b5f5f520358e78c1c2f71501e" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-timer/zipball/3e82f4e9fc92665fafd9157568e4dcb01d014e5b", - "reference": "3e82f4e9fc92665fafd9157568e4dcb01d014e5b", + "url": "https://api.github.com/repos/sebastianbergmann/php-timer/zipball/1038454804406b0b5f5f520358e78c1c2f71501e", + "reference": "1038454804406b0b5f5f520358e78c1c2f71501e", "shasum": "" }, "require": { - "php": ">=5.3.3" + "php": "^7.1" + }, + "require-dev": { + "phpunit/phpunit": "^7.0" }, "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.1-dev" + } + }, "autoload": { "classmap": [ "src/" @@ -396,7 +1907,7 @@ "authors": [ { "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", + "email": "sebastian@phpunit.de", "role": "lead" } ], @@ -405,33 +1916,33 @@ "keywords": [ "timer" ], - "time": "2015-06-21 08:01:12" + "time": "2019-06-07T04:22:29+00:00" }, { "name": "phpunit/php-token-stream", - "version": "1.4.8", + "version": "3.1.1", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/php-token-stream.git", - "reference": "3144ae21711fb6cac0b1ab4cbe63b75ce3d4e8da" + "reference": "995192df77f63a59e47f025390d2d1fdf8f425ff" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/php-token-stream/zipball/3144ae21711fb6cac0b1ab4cbe63b75ce3d4e8da", - "reference": "3144ae21711fb6cac0b1ab4cbe63b75ce3d4e8da", + "url": "https://api.github.com/repos/sebastianbergmann/php-token-stream/zipball/995192df77f63a59e47f025390d2d1fdf8f425ff", + "reference": "995192df77f63a59e47f025390d2d1fdf8f425ff", "shasum": "" }, "require": { "ext-tokenizer": "*", - "php": ">=5.3.3" + "php": "^7.1" }, "require-dev": { - "phpunit/phpunit": "~4.2" + "phpunit/phpunit": "^7.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.4-dev" + "dev-master": "3.1-dev" } }, "autoload": { @@ -454,48 +1965,56 @@ "keywords": [ "tokenizer" ], - "time": "2015-09-15 10:49:45" + "time": "2019-09-17T06:23:10+00:00" }, { "name": "phpunit/phpunit", - "version": "5.3.2", + "version": "8.5.2", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/phpunit.git", - "reference": "2c6da3536035617bae3fe3db37283c9e0eb63ab3" + "reference": "018b6ac3c8ab20916db85fa91bf6465acb64d1e0" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/2c6da3536035617bae3fe3db37283c9e0eb63ab3", - "reference": "2c6da3536035617bae3fe3db37283c9e0eb63ab3", + "url": "https://api.github.com/repos/sebastianbergmann/phpunit/zipball/018b6ac3c8ab20916db85fa91bf6465acb64d1e0", + "reference": "018b6ac3c8ab20916db85fa91bf6465acb64d1e0", "shasum": "" }, "require": { + "doctrine/instantiator": "^1.2.0", "ext-dom": "*", "ext-json": "*", - "ext-pcre": "*", - "ext-reflection": "*", - "ext-spl": "*", - "myclabs/deep-copy": "~1.3", - "php": "^5.6 || ^7.0", - "phpspec/prophecy": "^1.3.1", - "phpunit/php-code-coverage": "^3.3.0", - "phpunit/php-file-iterator": "~1.4", - "phpunit/php-text-template": "~1.2", - "phpunit/php-timer": "^1.0.6", - "phpunit/phpunit-mock-objects": "^3.1", - "sebastian/comparator": "~1.1", - "sebastian/diff": "~1.2", - "sebastian/environment": "~1.3", - "sebastian/exporter": "~1.2", - "sebastian/global-state": "~1.0", - "sebastian/object-enumerator": "~1.0", - "sebastian/resource-operations": "~1.0", - "sebastian/version": "~1.0|~2.0", - "symfony/yaml": "~2.1|~3.0" + "ext-libxml": "*", + "ext-mbstring": "*", + "ext-xml": "*", + "ext-xmlwriter": "*", + "myclabs/deep-copy": "^1.9.1", + "phar-io/manifest": "^1.0.3", + "phar-io/version": "^2.0.1", + "php": "^7.2", + "phpspec/prophecy": "^1.8.1", + "phpunit/php-code-coverage": "^7.0.7", + "phpunit/php-file-iterator": "^2.0.2", + "phpunit/php-text-template": "^1.2.1", + "phpunit/php-timer": "^2.1.2", + "sebastian/comparator": "^3.0.2", + "sebastian/diff": "^3.0.2", + "sebastian/environment": "^4.2.2", + "sebastian/exporter": "^3.1.1", + "sebastian/global-state": "^3.0.0", + "sebastian/object-enumerator": "^3.0.3", + "sebastian/resource-operations": "^2.0.1", + "sebastian/type": "^1.1.3", + "sebastian/version": "^2.0.1" + }, + "require-dev": { + "ext-pdo": "*" }, "suggest": { - "phpunit/php-invoker": "~1.1" + "ext-soap": "*", + "ext-xdebug": "*", + "phpunit/php-invoker": "^2.0.0" }, "bin": [ "phpunit" @@ -503,7 +2022,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "5.3.x-dev" + "dev-master": "8.5-dev" } }, "autoload": { @@ -529,83 +2048,217 @@ "testing", "xunit" ], - "time": "2016-04-12 16:20:08" + "time": "2020-01-08T08:49:49+00:00" }, { - "name": "phpunit/phpunit-mock-objects", - "version": "3.1.3", + "name": "psr/cache", + "version": "1.0.1", "source": { "type": "git", - "url": "https://github.com/sebastianbergmann/phpunit-mock-objects.git", - "reference": "151c96874bff6fe61a25039df60e776613a61489" + "url": "https://github.com/php-fig/cache.git", + "reference": "d11b50ad223250cf17b86e38383413f5a6764bf8" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/phpunit-mock-objects/zipball/151c96874bff6fe61a25039df60e776613a61489", - "reference": "151c96874bff6fe61a25039df60e776613a61489", + "url": "https://api.github.com/repos/php-fig/cache/zipball/d11b50ad223250cf17b86e38383413f5a6764bf8", + "reference": "d11b50ad223250cf17b86e38383413f5a6764bf8", "shasum": "" }, "require": { - "doctrine/instantiator": "^1.0.2", - "php": ">=5.6", - "phpunit/php-text-template": "~1.2", - "sebastian/exporter": "~1.2" - }, - "require-dev": { - "phpunit/phpunit": "~5" - }, - "suggest": { - "ext-soap": "*" + "php": ">=5.3.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "3.1.x-dev" + "dev-master": "1.0.x-dev" } }, "autoload": { - "classmap": [ - "src/" - ] + "psr-4": { + "Psr\\Cache\\": "src/" + } }, "notification-url": "https://packagist.org/downloads/", "license": [ - "BSD-3-Clause" + "MIT" ], "authors": [ { - "name": "Sebastian Bergmann", - "email": "sb@sebastian-bergmann.de", - "role": "lead" + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" } ], - "description": "Mock Object library for PHPUnit", - "homepage": "https://github.com/sebastianbergmann/phpunit-mock-objects/", + "description": "Common interface for caching libraries", "keywords": [ - "mock", - "xunit" + "cache", + "psr", + "psr-6" ], - "time": "2016-04-20 14:39:26" + "time": "2016-08-06T20:24:11+00:00" }, { - "name": "sebastian/code-unit-reverse-lookup", + "name": "psr/container", "version": "1.0.0", "source": { "type": "git", - "url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git", - "reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe" + "url": "https://github.com/php-fig/container.git", + "reference": "b7ce3b176482dbbc1245ebf52b181af44c2cf55f" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/c36f5e7cfce482fde5bf8d10d41a53591e0198fe", - "reference": "c36f5e7cfce482fde5bf8d10d41a53591e0198fe", + "url": "https://api.github.com/repos/php-fig/container/zipball/b7ce3b176482dbbc1245ebf52b181af44c2cf55f", + "reference": "b7ce3b176482dbbc1245ebf52b181af44c2cf55f", "shasum": "" }, "require": { - "php": ">=5.6" + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Container\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common Container Interface (PHP FIG PSR-11)", + "homepage": "https://github.com/php-fig/container", + "keywords": [ + "PSR-11", + "container", + "container-interface", + "container-interop", + "psr" + ], + "time": "2017-02-14T16:28:37+00:00" + }, + { + "name": "psr/log", + "version": "1.1.2", + "source": { + "type": "git", + "url": "https://github.com/php-fig/log.git", + "reference": "446d54b4cb6bf489fc9d75f55843658e6f25d801" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/log/zipball/446d54b4cb6bf489fc9d75f55843658e6f25d801", + "reference": "446d54b4cb6bf489fc9d75f55843658e6f25d801", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.1.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\Log\\": "Psr/Log/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common interface for logging libraries", + "homepage": "https://github.com/php-fig/log", + "keywords": [ + "log", + "psr", + "psr-3" + ], + "time": "2019-11-01T11:05:21+00:00" + }, + { + "name": "psr/simple-cache", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/php-fig/simple-cache.git", + "reference": "408d5eafb83c57f6365a3ca330ff23aa4a5fa39b" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/simple-cache/zipball/408d5eafb83c57f6365a3ca330ff23aa4a5fa39b", + "reference": "408d5eafb83c57f6365a3ca330ff23aa4a5fa39b", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\SimpleCache\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "http://www.php-fig.org/" + } + ], + "description": "Common interfaces for simple caching", + "keywords": [ + "cache", + "caching", + "psr", + "psr-16", + "simple-cache" + ], + "time": "2017-10-23T01:57:42+00:00" + }, + { + "name": "sebastian/code-unit-reverse-lookup", + "version": "1.0.1", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/code-unit-reverse-lookup.git", + "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/code-unit-reverse-lookup/zipball/4419fcdb5eabb9caa61a27c7a1db532a6b55dd18", + "reference": "4419fcdb5eabb9caa61a27c7a1db532a6b55dd18", + "shasum": "" + }, + "require": { + "php": "^5.6 || ^7.0" }, "require-dev": { - "phpunit/phpunit": "~5" + "phpunit/phpunit": "^5.7 || ^6.0" }, "type": "library", "extra": { @@ -630,34 +2283,34 @@ ], "description": "Looks up which function or method a line of code belongs to", "homepage": "https://github.com/sebastianbergmann/code-unit-reverse-lookup/", - "time": "2016-02-13 06:45:14" + "time": "2017-03-04T06:30:41+00:00" }, { "name": "sebastian/comparator", - "version": "1.2.0", + "version": "3.0.2", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/comparator.git", - "reference": "937efb279bd37a375bcadf584dec0726f84dbf22" + "reference": "5de4fc177adf9bce8df98d8d141a7559d7ccf6da" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/937efb279bd37a375bcadf584dec0726f84dbf22", - "reference": "937efb279bd37a375bcadf584dec0726f84dbf22", + "url": "https://api.github.com/repos/sebastianbergmann/comparator/zipball/5de4fc177adf9bce8df98d8d141a7559d7ccf6da", + "reference": "5de4fc177adf9bce8df98d8d141a7559d7ccf6da", "shasum": "" }, "require": { - "php": ">=5.3.3", - "sebastian/diff": "~1.2", - "sebastian/exporter": "~1.2" + "php": "^7.1", + "sebastian/diff": "^3.0", + "sebastian/exporter": "^3.1" }, "require-dev": { - "phpunit/phpunit": "~4.4" + "phpunit/phpunit": "^7.1" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.2.x-dev" + "dev-master": "3.0-dev" } }, "autoload": { @@ -688,38 +2341,39 @@ } ], "description": "Provides the functionality to compare PHP values for equality", - "homepage": "http://www.github.com/sebastianbergmann/comparator", + "homepage": "https://github.com/sebastianbergmann/comparator", "keywords": [ "comparator", "compare", "equality" ], - "time": "2015-07-26 15:48:44" + "time": "2018-07-12T15:12:46+00:00" }, { "name": "sebastian/diff", - "version": "1.4.1", + "version": "3.0.2", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/diff.git", - "reference": "13edfd8706462032c2f52b4b862974dd46b71c9e" + "reference": "720fcc7e9b5cf384ea68d9d930d480907a0c1a29" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/13edfd8706462032c2f52b4b862974dd46b71c9e", - "reference": "13edfd8706462032c2f52b4b862974dd46b71c9e", + "url": "https://api.github.com/repos/sebastianbergmann/diff/zipball/720fcc7e9b5cf384ea68d9d930d480907a0c1a29", + "reference": "720fcc7e9b5cf384ea68d9d930d480907a0c1a29", "shasum": "" }, "require": { - "php": ">=5.3.3" + "php": "^7.1" }, "require-dev": { - "phpunit/phpunit": "~4.8" + "phpunit/phpunit": "^7.5 || ^8.0", + "symfony/process": "^2 || ^3.3 || ^4" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.4-dev" + "dev-master": "3.0-dev" } }, "autoload": { @@ -744,34 +2398,40 @@ "description": "Diff implementation", "homepage": "https://github.com/sebastianbergmann/diff", "keywords": [ - "diff" + "diff", + "udiff", + "unidiff", + "unified diff" ], - "time": "2015-12-08 07:14:41" + "time": "2019-02-04T06:01:07+00:00" }, { "name": "sebastian/environment", - "version": "1.3.6", + "version": "4.2.3", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/environment.git", - "reference": "2292b116f43c272ff4328083096114f84ea46a56" + "reference": "464c90d7bdf5ad4e8a6aea15c091fec0603d4368" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/2292b116f43c272ff4328083096114f84ea46a56", - "reference": "2292b116f43c272ff4328083096114f84ea46a56", + "url": "https://api.github.com/repos/sebastianbergmann/environment/zipball/464c90d7bdf5ad4e8a6aea15c091fec0603d4368", + "reference": "464c90d7bdf5ad4e8a6aea15c091fec0603d4368", "shasum": "" }, "require": { - "php": ">=5.3.3" + "php": "^7.1" }, "require-dev": { - "phpunit/phpunit": "~4.4" + "phpunit/phpunit": "^7.5" + }, + "suggest": { + "ext-posix": "*" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.3.x-dev" + "dev-master": "4.2-dev" } }, "autoload": { @@ -796,33 +2456,34 @@ "environment", "hhvm" ], - "time": "2016-05-04 07:59:13" + "time": "2019-11-20T08:46:58+00:00" }, { "name": "sebastian/exporter", - "version": "1.2.1", + "version": "3.1.2", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/exporter.git", - "reference": "7ae5513327cb536431847bcc0c10edba2701064e" + "reference": "68609e1261d215ea5b21b7987539cbfbe156ec3e" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/7ae5513327cb536431847bcc0c10edba2701064e", - "reference": "7ae5513327cb536431847bcc0c10edba2701064e", + "url": "https://api.github.com/repos/sebastianbergmann/exporter/zipball/68609e1261d215ea5b21b7987539cbfbe156ec3e", + "reference": "68609e1261d215ea5b21b7987539cbfbe156ec3e", "shasum": "" }, "require": { - "php": ">=5.3.3", - "sebastian/recursion-context": "~1.0" + "php": "^7.0", + "sebastian/recursion-context": "^3.0" }, "require-dev": { - "phpunit/phpunit": "~4.4" + "ext-mbstring": "*", + "phpunit/phpunit": "^6.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.2.x-dev" + "dev-master": "3.1.x-dev" } }, "autoload": { @@ -835,6 +2496,10 @@ "BSD-3-Clause" ], "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + }, { "name": "Jeff Welch", "email": "whatthejeff@gmail.com" @@ -843,17 +2508,13 @@ "name": "Volker Dusch", "email": "github@wallbash.com" }, - { - "name": "Bernhard Schussek", - "email": "bschussek@2bepublished.at" - }, - { - "name": "Sebastian Bergmann", - "email": "sebastian@phpunit.de" - }, { "name": "Adam Harvey", "email": "aharvey@php.net" + }, + { + "name": "Bernhard Schussek", + "email": "bschussek@gmail.com" } ], "description": "Provides the functionality to export PHP variables for visualization", @@ -862,27 +2523,30 @@ "export", "exporter" ], - "time": "2015-06-21 07:55:53" + "time": "2019-09-14T09:02:43+00:00" }, { "name": "sebastian/global-state", - "version": "1.1.1", + "version": "3.0.0", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/global-state.git", - "reference": "bc37d50fea7d017d3d340f230811c9f1d7280af4" + "reference": "edf8a461cf1d4005f19fb0b6b8b95a9f7fa0adc4" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/global-state/zipball/bc37d50fea7d017d3d340f230811c9f1d7280af4", - "reference": "bc37d50fea7d017d3d340f230811c9f1d7280af4", + "url": "https://api.github.com/repos/sebastianbergmann/global-state/zipball/edf8a461cf1d4005f19fb0b6b8b95a9f7fa0adc4", + "reference": "edf8a461cf1d4005f19fb0b6b8b95a9f7fa0adc4", "shasum": "" }, "require": { - "php": ">=5.3.3" + "php": "^7.2", + "sebastian/object-reflector": "^1.1.1", + "sebastian/recursion-context": "^3.0" }, "require-dev": { - "phpunit/phpunit": "~4.2" + "ext-dom": "*", + "phpunit/phpunit": "^8.0" }, "suggest": { "ext-uopz": "*" @@ -890,7 +2554,7 @@ "type": "library", "extra": { "branch-alias": { - "dev-master": "1.0-dev" + "dev-master": "3.0-dev" } }, "autoload": { @@ -913,33 +2577,34 @@ "keywords": [ "global state" ], - "time": "2015-10-12 03:26:01" + "time": "2019-02-01T05:30:01+00:00" }, { "name": "sebastian/object-enumerator", - "version": "1.0.0", + "version": "3.0.3", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/object-enumerator.git", - "reference": "d4ca2fb70344987502567bc50081c03e6192fb26" + "reference": "7cfd9e65d11ffb5af41198476395774d4c8a84c5" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/d4ca2fb70344987502567bc50081c03e6192fb26", - "reference": "d4ca2fb70344987502567bc50081c03e6192fb26", + "url": "https://api.github.com/repos/sebastianbergmann/object-enumerator/zipball/7cfd9e65d11ffb5af41198476395774d4c8a84c5", + "reference": "7cfd9e65d11ffb5af41198476395774d4c8a84c5", "shasum": "" }, "require": { - "php": ">=5.6", - "sebastian/recursion-context": "~1.0" + "php": "^7.0", + "sebastian/object-reflector": "^1.1.1", + "sebastian/recursion-context": "^3.0" }, "require-dev": { - "phpunit/phpunit": "~5" + "phpunit/phpunit": "^6.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.0.x-dev" + "dev-master": "3.0.x-dev" } }, "autoload": { @@ -959,32 +2624,77 @@ ], "description": "Traverses array structures and object graphs to enumerate all referenced objects", "homepage": "https://github.com/sebastianbergmann/object-enumerator/", - "time": "2016-01-28 13:25:10" + "time": "2017-08-03T12:35:26+00:00" }, { - "name": "sebastian/recursion-context", - "version": "1.0.2", + "name": "sebastian/object-reflector", + "version": "1.1.1", "source": { "type": "git", - "url": "https://github.com/sebastianbergmann/recursion-context.git", - "reference": "913401df809e99e4f47b27cdd781f4a258d58791" + "url": "https://github.com/sebastianbergmann/object-reflector.git", + "reference": "773f97c67f28de00d397be301821b06708fca0be" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/913401df809e99e4f47b27cdd781f4a258d58791", - "reference": "913401df809e99e4f47b27cdd781f4a258d58791", + "url": "https://api.github.com/repos/sebastianbergmann/object-reflector/zipball/773f97c67f28de00d397be301821b06708fca0be", + "reference": "773f97c67f28de00d397be301821b06708fca0be", "shasum": "" }, "require": { - "php": ">=5.3.3" + "php": "^7.0" }, "require-dev": { - "phpunit/phpunit": "~4.4" + "phpunit/phpunit": "^6.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.0.x-dev" + "dev-master": "1.1-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de" + } + ], + "description": "Allows reflection of object attributes, including inherited and non-public ones", + "homepage": "https://github.com/sebastianbergmann/object-reflector/", + "time": "2017-03-29T09:07:27+00:00" + }, + { + "name": "sebastian/recursion-context", + "version": "3.0.0", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/recursion-context.git", + "reference": "5b0cd723502bac3b006cbf3dbf7a1e3fcefe4fa8" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/recursion-context/zipball/5b0cd723502bac3b006cbf3dbf7a1e3fcefe4fa8", + "reference": "5b0cd723502bac3b006cbf3dbf7a1e3fcefe4fa8", + "shasum": "" + }, + "require": { + "php": "^7.0" + }, + "require-dev": { + "phpunit/phpunit": "^6.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0.x-dev" } }, "autoload": { @@ -1012,29 +2722,29 @@ ], "description": "Provides functionality to recursively process PHP variables", "homepage": "http://www.github.com/sebastianbergmann/recursion-context", - "time": "2015-11-11 19:50:13" + "time": "2017-03-03T06:23:57+00:00" }, { "name": "sebastian/resource-operations", - "version": "1.0.0", + "version": "2.0.1", "source": { "type": "git", "url": "https://github.com/sebastianbergmann/resource-operations.git", - "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52" + "reference": "4d7a795d35b889bf80a0cc04e08d77cedfa917a9" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/resource-operations/zipball/ce990bb21759f94aeafd30209e8cfcdfa8bc3f52", - "reference": "ce990bb21759f94aeafd30209e8cfcdfa8bc3f52", + "url": "https://api.github.com/repos/sebastianbergmann/resource-operations/zipball/4d7a795d35b889bf80a0cc04e08d77cedfa917a9", + "reference": "4d7a795d35b889bf80a0cc04e08d77cedfa917a9", "shasum": "" }, "require": { - "php": ">=5.6.0" + "php": "^7.1" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "1.0.x-dev" + "dev-master": "2.0-dev" } }, "autoload": { @@ -1054,20 +2764,66 @@ ], "description": "Provides a list of PHP built-in functions that operate on resources", "homepage": "https://www.github.com/sebastianbergmann/resource-operations", - "time": "2015-07-28 20:34:47" + "time": "2018-10-04T04:07:39+00:00" }, { - "name": "sebastian/version", - "version": "2.0.0", + "name": "sebastian/type", + "version": "1.1.3", "source": { "type": "git", - "url": "https://github.com/sebastianbergmann/version.git", - "reference": "c829badbd8fdf16a0bad8aa7fa7971c029f1b9c5" + "url": "https://github.com/sebastianbergmann/type.git", + "reference": "3aaaa15fa71d27650d62a948be022fe3b48541a3" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/sebastianbergmann/version/zipball/c829badbd8fdf16a0bad8aa7fa7971c029f1b9c5", - "reference": "c829badbd8fdf16a0bad8aa7fa7971c029f1b9c5", + "url": "https://api.github.com/repos/sebastianbergmann/type/zipball/3aaaa15fa71d27650d62a948be022fe3b48541a3", + "reference": "3aaaa15fa71d27650d62a948be022fe3b48541a3", + "shasum": "" + }, + "require": { + "php": "^7.2" + }, + "require-dev": { + "phpunit/phpunit": "^8.2" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.1-dev" + } + }, + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Sebastian Bergmann", + "email": "sebastian@phpunit.de", + "role": "lead" + } + ], + "description": "Collection of value objects that represent the types of the PHP type system", + "homepage": "https://github.com/sebastianbergmann/type", + "time": "2019-07-02T08:10:15+00:00" + }, + { + "name": "sebastian/version", + "version": "2.0.1", + "source": { + "type": "git", + "url": "https://github.com/sebastianbergmann/version.git", + "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/sebastianbergmann/version/zipball/99732be0ddb3361e16ad77b68ba41efc8e979019", + "reference": "99732be0ddb3361e16ad77b68ba41efc8e979019", "shasum": "" }, "require": { @@ -1097,29 +2853,1772 @@ ], "description": "Library that helps with managing the version number of Git-hosted PHP projects", "homepage": "https://github.com/sebastianbergmann/version", - "time": "2016-02-04 12:56:52" + "time": "2016-10-03T07:35:21+00:00" }, { - "name": "symfony/yaml", - "version": "v3.0.5", + "name": "seld/jsonlint", + "version": "1.7.2", "source": { "type": "git", - "url": "https://github.com/symfony/yaml.git", - "reference": "0047c8366744a16de7516622c5b7355336afae96" + "url": "https://github.com/Seldaek/jsonlint.git", + "reference": "e2e5d290e4d2a4f0eb449f510071392e00e10d19" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/symfony/yaml/zipball/0047c8366744a16de7516622c5b7355336afae96", - "reference": "0047c8366744a16de7516622c5b7355336afae96", + "url": "https://api.github.com/repos/Seldaek/jsonlint/zipball/e2e5d290e4d2a4f0eb449f510071392e00e10d19", + "reference": "e2e5d290e4d2a4f0eb449f510071392e00e10d19", "shasum": "" }, "require": { - "php": ">=5.5.9" + "php": "^5.3 || ^7.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0" + }, + "bin": [ + "bin/jsonlint" + ], + "type": "library", + "autoload": { + "psr-4": { + "Seld\\JsonLint\\": "src/Seld/JsonLint/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jordi Boggiano", + "email": "j.boggiano@seld.be", + "homepage": "http://seld.be" + } + ], + "description": "JSON Linter", + "keywords": [ + "json", + "linter", + "parser", + "validator" + ], + "time": "2019-10-24T14:27:39+00:00" + }, + { + "name": "slevomat/coding-standard", + "version": "5.0.4", + "source": { + "type": "git", + "url": "https://github.com/slevomat/coding-standard.git", + "reference": "287ac3347c47918c0bf5e10335e36197ea10894c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/slevomat/coding-standard/zipball/287ac3347c47918c0bf5e10335e36197ea10894c", + "reference": "287ac3347c47918c0bf5e10335e36197ea10894c", + "shasum": "" + }, + "require": { + "php": "^7.1", + "phpstan/phpdoc-parser": "^0.3.1", + "squizlabs/php_codesniffer": "^3.4.1" + }, + "require-dev": { + "jakub-onderka/php-parallel-lint": "1.0.0", + "phing/phing": "2.16.1", + "phpstan/phpstan": "0.11.4", + "phpstan/phpstan-phpunit": "0.11", + "phpstan/phpstan-strict-rules": "0.11", + "phpunit/phpunit": "8.0.5" + }, + "type": "phpcodesniffer-standard", + "autoload": { + "psr-4": { + "SlevomatCodingStandard\\": "SlevomatCodingStandard" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "Slevomat Coding Standard for PHP_CodeSniffer complements Consistence Coding Standard by providing sniffs with additional checks.", + "time": "2019-03-22T19:10:53+00:00" + }, + { + "name": "squizlabs/php_codesniffer", + "version": "3.5.4", + "source": { + "type": "git", + "url": "https://github.com/squizlabs/PHP_CodeSniffer.git", + "reference": "dceec07328401de6211037abbb18bda423677e26" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/squizlabs/PHP_CodeSniffer/zipball/dceec07328401de6211037abbb18bda423677e26", + "reference": "dceec07328401de6211037abbb18bda423677e26", + "shasum": "" + }, + "require": { + "ext-simplexml": "*", + "ext-tokenizer": "*", + "ext-xmlwriter": "*", + "php": ">=5.4.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.0 || ^5.0 || ^6.0 || ^7.0" + }, + "bin": [ + "bin/phpcs", + "bin/phpcbf" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.x-dev" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Greg Sherwood", + "role": "lead" + } + ], + "description": "PHP_CodeSniffer tokenizes PHP, JavaScript and CSS files and detects violations of a defined set of coding standards.", + "homepage": "https://github.com/squizlabs/PHP_CodeSniffer", + "keywords": [ + "phpcs", + "standards" + ], + "time": "2020-01-30T22:20:29+00:00" + }, + { + "name": "symfony/cache", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/cache.git", + "reference": "28511cbd8c760a19f4b4b70961d2cd957733b3d9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/cache/zipball/28511cbd8c760a19f4b4b70961d2cd957733b3d9", + "reference": "28511cbd8c760a19f4b4b70961d2cd957733b3d9", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "psr/cache": "~1.0", + "psr/log": "~1.0", + "symfony/cache-contracts": "^1.1.7|^2", + "symfony/service-contracts": "^1.1|^2", + "symfony/var-exporter": "^4.2|^5.0" + }, + "conflict": { + "doctrine/dbal": "<2.5", + "symfony/dependency-injection": "<3.4", + "symfony/http-kernel": "<4.4", + "symfony/var-dumper": "<4.4" + }, + "provide": { + "psr/cache-implementation": "1.0", + "psr/simple-cache-implementation": "1.0", + "symfony/cache-implementation": "1.0" + }, + "require-dev": { + "cache/integration-tests": "dev-master", + "doctrine/cache": "~1.6", + "doctrine/dbal": "~2.5", + "predis/predis": "~1.1", + "psr/simple-cache": "^1.0", + "symfony/config": "^4.2|^5.0", + "symfony/dependency-injection": "^3.4|^4.1|^5.0", + "symfony/var-dumper": "^4.4|^5.0" }, "type": "library", "extra": { "branch-alias": { - "dev-master": "3.0-dev" + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Cache\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Cache component with PSR-6, PSR-16, and tags", + "homepage": "https://symfony.com", + "keywords": [ + "caching", + "psr6" + ], + "time": "2020-02-20T16:31:44+00:00" + }, + { + "name": "symfony/cache-contracts", + "version": "v2.0.1", + "source": { + "type": "git", + "url": "https://github.com/symfony/cache-contracts.git", + "reference": "23ed8bfc1a4115feca942cb5f1aacdf3dcdf3c16" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/cache-contracts/zipball/23ed8bfc1a4115feca942cb5f1aacdf3dcdf3c16", + "reference": "23ed8bfc1a4115feca942cb5f1aacdf3dcdf3c16", + "shasum": "" + }, + "require": { + "php": "^7.2.5", + "psr/cache": "^1.0" + }, + "suggest": { + "symfony/cache-implementation": "" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Contracts\\Cache\\": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Generic abstractions related to caching", + "homepage": "https://symfony.com", + "keywords": [ + "abstractions", + "contracts", + "decoupling", + "interfaces", + "interoperability", + "standards" + ], + "time": "2019-11-18T17:27:11+00:00" + }, + { + "name": "symfony/config", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/config.git", + "reference": "cbfef5ae91ccd3b06621c18d58cd355c68c87ae9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/config/zipball/cbfef5ae91ccd3b06621c18d58cd355c68c87ae9", + "reference": "cbfef5ae91ccd3b06621c18d58cd355c68c87ae9", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "symfony/filesystem": "^3.4|^4.0|^5.0", + "symfony/polyfill-ctype": "~1.8" + }, + "conflict": { + "symfony/finder": "<3.4" + }, + "require-dev": { + "symfony/event-dispatcher": "^3.4|^4.0|^5.0", + "symfony/finder": "^3.4|^4.0|^5.0", + "symfony/messenger": "^4.1|^5.0", + "symfony/service-contracts": "^1.1|^2", + "symfony/yaml": "^3.4|^4.0|^5.0" + }, + "suggest": { + "symfony/yaml": "To use the yaml reference dumper" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Config\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Config Component", + "homepage": "https://symfony.com", + "time": "2020-02-04T09:32:40+00:00" + }, + { + "name": "symfony/console", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/console.git", + "reference": "4fa15ae7be74e53f6ec8c83ed403b97e23b665e9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/console/zipball/4fa15ae7be74e53f6ec8c83ed403b97e23b665e9", + "reference": "4fa15ae7be74e53f6ec8c83ed403b97e23b665e9", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "symfony/polyfill-mbstring": "~1.0", + "symfony/polyfill-php73": "^1.8", + "symfony/service-contracts": "^1.1|^2" + }, + "conflict": { + "symfony/dependency-injection": "<3.4", + "symfony/event-dispatcher": "<4.3|>=5", + "symfony/lock": "<4.4", + "symfony/process": "<3.3" + }, + "provide": { + "psr/log-implementation": "1.0" + }, + "require-dev": { + "psr/log": "~1.0", + "symfony/config": "^3.4|^4.0|^5.0", + "symfony/dependency-injection": "^3.4|^4.0|^5.0", + "symfony/event-dispatcher": "^4.3", + "symfony/lock": "^4.4|^5.0", + "symfony/process": "^3.4|^4.0|^5.0", + "symfony/var-dumper": "^4.3|^5.0" + }, + "suggest": { + "psr/log": "For using the console logger", + "symfony/event-dispatcher": "", + "symfony/lock": "", + "symfony/process": "" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Console\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Console Component", + "homepage": "https://symfony.com", + "time": "2020-02-24T13:10:00+00:00" + }, + { + "name": "symfony/debug", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/debug.git", + "reference": "a980d87a659648980d89193fd8b7a7ca89d97d21" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/debug/zipball/a980d87a659648980d89193fd8b7a7ca89d97d21", + "reference": "a980d87a659648980d89193fd8b7a7ca89d97d21", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "psr/log": "~1.0" + }, + "conflict": { + "symfony/http-kernel": "<3.4" + }, + "require-dev": { + "symfony/http-kernel": "^3.4|^4.0|^5.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Debug\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Debug Component", + "homepage": "https://symfony.com", + "time": "2020-02-23T14:41:43+00:00" + }, + { + "name": "symfony/dependency-injection", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/dependency-injection.git", + "reference": "ebb2e882e8c9e2eb990aa61ddcd389848466e342" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/dependency-injection/zipball/ebb2e882e8c9e2eb990aa61ddcd389848466e342", + "reference": "ebb2e882e8c9e2eb990aa61ddcd389848466e342", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "psr/container": "^1.0", + "symfony/service-contracts": "^1.1.6|^2" + }, + "conflict": { + "symfony/config": "<4.3|>=5.0", + "symfony/finder": "<3.4", + "symfony/proxy-manager-bridge": "<3.4", + "symfony/yaml": "<3.4" + }, + "provide": { + "psr/container-implementation": "1.0", + "symfony/service-implementation": "1.0" + }, + "require-dev": { + "symfony/config": "^4.3", + "symfony/expression-language": "^3.4|^4.0|^5.0", + "symfony/yaml": "^3.4|^4.0|^5.0" + }, + "suggest": { + "symfony/config": "", + "symfony/expression-language": "For using expressions in service container configuration", + "symfony/finder": "For using double-star glob patterns or when GLOB_BRACE portability is required", + "symfony/proxy-manager-bridge": "Generate service proxies to lazy load them", + "symfony/yaml": "" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\DependencyInjection\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony DependencyInjection Component", + "homepage": "https://symfony.com", + "time": "2020-02-29T09:50:10+00:00" + }, + { + "name": "symfony/error-handler", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/error-handler.git", + "reference": "89aa4b9ac6f1f35171b8621b24f60477312085be" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/error-handler/zipball/89aa4b9ac6f1f35171b8621b24f60477312085be", + "reference": "89aa4b9ac6f1f35171b8621b24f60477312085be", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "psr/log": "~1.0", + "symfony/debug": "^4.4.5", + "symfony/var-dumper": "^4.4|^5.0" + }, + "require-dev": { + "symfony/http-kernel": "^4.4|^5.0", + "symfony/serializer": "^4.4|^5.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\ErrorHandler\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony ErrorHandler Component", + "homepage": "https://symfony.com", + "time": "2020-02-26T11:45:31+00:00" + }, + { + "name": "symfony/event-dispatcher", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/event-dispatcher.git", + "reference": "4ad8e149799d3128621a3a1f70e92b9897a8930d" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/event-dispatcher/zipball/4ad8e149799d3128621a3a1f70e92b9897a8930d", + "reference": "4ad8e149799d3128621a3a1f70e92b9897a8930d", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "symfony/event-dispatcher-contracts": "^1.1" + }, + "conflict": { + "symfony/dependency-injection": "<3.4" + }, + "provide": { + "psr/event-dispatcher-implementation": "1.0", + "symfony/event-dispatcher-implementation": "1.1" + }, + "require-dev": { + "psr/log": "~1.0", + "symfony/config": "^3.4|^4.0|^5.0", + "symfony/dependency-injection": "^3.4|^4.0|^5.0", + "symfony/expression-language": "^3.4|^4.0|^5.0", + "symfony/http-foundation": "^3.4|^4.0|^5.0", + "symfony/service-contracts": "^1.1|^2", + "symfony/stopwatch": "^3.4|^4.0|^5.0" + }, + "suggest": { + "symfony/dependency-injection": "", + "symfony/http-kernel": "" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\EventDispatcher\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony EventDispatcher Component", + "homepage": "https://symfony.com", + "time": "2020-02-04T09:32:40+00:00" + }, + { + "name": "symfony/event-dispatcher-contracts", + "version": "v1.1.7", + "source": { + "type": "git", + "url": "https://github.com/symfony/event-dispatcher-contracts.git", + "reference": "c43ab685673fb6c8d84220c77897b1d6cdbe1d18" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/event-dispatcher-contracts/zipball/c43ab685673fb6c8d84220c77897b1d6cdbe1d18", + "reference": "c43ab685673fb6c8d84220c77897b1d6cdbe1d18", + "shasum": "" + }, + "require": { + "php": "^7.1.3" + }, + "suggest": { + "psr/event-dispatcher": "", + "symfony/event-dispatcher-implementation": "" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.1-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Contracts\\EventDispatcher\\": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Generic abstractions related to dispatching event", + "homepage": "https://symfony.com", + "keywords": [ + "abstractions", + "contracts", + "decoupling", + "interfaces", + "interoperability", + "standards" + ], + "time": "2019-09-17T09:54:03+00:00" + }, + { + "name": "symfony/filesystem", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/filesystem.git", + "reference": "266c9540b475f26122b61ef8b23dd9198f5d1cfd" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/filesystem/zipball/266c9540b475f26122b61ef8b23dd9198f5d1cfd", + "reference": "266c9540b475f26122b61ef8b23dd9198f5d1cfd", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "symfony/polyfill-ctype": "~1.8" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Filesystem\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Filesystem Component", + "homepage": "https://symfony.com", + "time": "2020-01-21T08:20:44+00:00" + }, + { + "name": "symfony/finder", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/finder.git", + "reference": "ea69c129aed9fdeca781d4b77eb20b62cf5d5357" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/finder/zipball/ea69c129aed9fdeca781d4b77eb20b62cf5d5357", + "reference": "ea69c129aed9fdeca781d4b77eb20b62cf5d5357", + "shasum": "" + }, + "require": { + "php": "^7.1.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Finder\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Finder Component", + "homepage": "https://symfony.com", + "time": "2020-02-14T07:42:58+00:00" + }, + { + "name": "symfony/http-foundation", + "version": "v5.0.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/http-foundation.git", + "reference": "6f9c2ba72f4295d7ce6cf9f79dbb18036291d335" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/http-foundation/zipball/6f9c2ba72f4295d7ce6cf9f79dbb18036291d335", + "reference": "6f9c2ba72f4295d7ce6cf9f79dbb18036291d335", + "shasum": "" + }, + "require": { + "php": "^7.2.5", + "symfony/mime": "^4.4|^5.0", + "symfony/polyfill-mbstring": "~1.1" + }, + "require-dev": { + "predis/predis": "~1.0", + "symfony/expression-language": "^4.4|^5.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.0-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\HttpFoundation\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony HttpFoundation Component", + "homepage": "https://symfony.com", + "time": "2020-02-14T07:43:07+00:00" + }, + { + "name": "symfony/http-kernel", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/http-kernel.git", + "reference": "8c8734486dada83a6041ab744709bdc1651a8462" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/http-kernel/zipball/8c8734486dada83a6041ab744709bdc1651a8462", + "reference": "8c8734486dada83a6041ab744709bdc1651a8462", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "psr/log": "~1.0", + "symfony/error-handler": "^4.4", + "symfony/event-dispatcher": "^4.4", + "symfony/http-foundation": "^4.4|^5.0", + "symfony/polyfill-ctype": "^1.8", + "symfony/polyfill-php73": "^1.9" + }, + "conflict": { + "symfony/browser-kit": "<4.3", + "symfony/config": "<3.4", + "symfony/console": ">=5", + "symfony/dependency-injection": "<4.3", + "symfony/translation": "<4.2", + "twig/twig": "<1.34|<2.4,>=2" + }, + "provide": { + "psr/log-implementation": "1.0" + }, + "require-dev": { + "psr/cache": "~1.0", + "symfony/browser-kit": "^4.3|^5.0", + "symfony/config": "^3.4|^4.0|^5.0", + "symfony/console": "^3.4|^4.0", + "symfony/css-selector": "^3.4|^4.0|^5.0", + "symfony/dependency-injection": "^4.3|^5.0", + "symfony/dom-crawler": "^3.4|^4.0|^5.0", + "symfony/expression-language": "^3.4|^4.0|^5.0", + "symfony/finder": "^3.4|^4.0|^5.0", + "symfony/process": "^3.4|^4.0|^5.0", + "symfony/routing": "^3.4|^4.0|^5.0", + "symfony/stopwatch": "^3.4|^4.0|^5.0", + "symfony/templating": "^3.4|^4.0|^5.0", + "symfony/translation": "^4.2|^5.0", + "symfony/translation-contracts": "^1.1|^2", + "twig/twig": "^1.34|^2.4|^3.0" + }, + "suggest": { + "symfony/browser-kit": "", + "symfony/config": "", + "symfony/console": "", + "symfony/dependency-injection": "" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\HttpKernel\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony HttpKernel Component", + "homepage": "https://symfony.com", + "time": "2020-02-29T10:31:38+00:00" + }, + { + "name": "symfony/mime", + "version": "v5.0.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/mime.git", + "reference": "9b3e5b5e58c56bbd76628c952d2b78556d305f3c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/mime/zipball/9b3e5b5e58c56bbd76628c952d2b78556d305f3c", + "reference": "9b3e5b5e58c56bbd76628c952d2b78556d305f3c", + "shasum": "" + }, + "require": { + "php": "^7.2.5", + "symfony/polyfill-intl-idn": "^1.10", + "symfony/polyfill-mbstring": "^1.0" + }, + "conflict": { + "symfony/mailer": "<4.4" + }, + "require-dev": { + "egulias/email-validator": "^2.1.10", + "symfony/dependency-injection": "^4.4|^5.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.0-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Mime\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "A library to manipulate MIME messages", + "homepage": "https://symfony.com", + "keywords": [ + "mime", + "mime-type" + ], + "time": "2020-02-04T09:41:09+00:00" + }, + { + "name": "symfony/options-resolver", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/options-resolver.git", + "reference": "9a02d6662660fe7bfadad63b5f0b0718d4c8b6b0" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/options-resolver/zipball/9a02d6662660fe7bfadad63b5f0b0718d4c8b6b0", + "reference": "9a02d6662660fe7bfadad63b5f0b0718d4c8b6b0", + "shasum": "" + }, + "require": { + "php": "^7.1.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\OptionsResolver\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony OptionsResolver Component", + "homepage": "https://symfony.com", + "keywords": [ + "config", + "configuration", + "options" + ], + "time": "2020-01-04T13:00:46+00:00" + }, + { + "name": "symfony/polyfill-ctype", + "version": "v1.14.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-ctype.git", + "reference": "fbdeaec0df06cf3d51c93de80c7eb76e271f5a38" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-ctype/zipball/fbdeaec0df06cf3d51c93de80c7eb76e271f5a38", + "reference": "fbdeaec0df06cf3d51c93de80c7eb76e271f5a38", + "shasum": "" + }, + "require": { + "php": ">=5.3.3" + }, + "suggest": { + "ext-ctype": "For best performance" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.14-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Ctype\\": "" + }, + "files": [ + "bootstrap.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Gert de Pagter", + "email": "BackEndTea@gmail.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill for ctype functions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "ctype", + "polyfill", + "portable" + ], + "time": "2020-01-13T11:15:53+00:00" + }, + { + "name": "symfony/polyfill-intl-idn", + "version": "v1.14.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-intl-idn.git", + "reference": "6842f1a39cf7d580655688069a03dd7cd83d244a" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-intl-idn/zipball/6842f1a39cf7d580655688069a03dd7cd83d244a", + "reference": "6842f1a39cf7d580655688069a03dd7cd83d244a", + "shasum": "" + }, + "require": { + "php": ">=5.3.3", + "symfony/polyfill-mbstring": "^1.3", + "symfony/polyfill-php72": "^1.10" + }, + "suggest": { + "ext-intl": "For best performance" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.14-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Intl\\Idn\\": "" + }, + "files": [ + "bootstrap.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Laurent Bassin", + "email": "laurent@bassin.info" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill for intl's idn_to_ascii and idn_to_utf8 functions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "idn", + "intl", + "polyfill", + "portable", + "shim" + ], + "time": "2020-01-17T12:01:36+00:00" + }, + { + "name": "symfony/polyfill-mbstring", + "version": "v1.14.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-mbstring.git", + "reference": "34094cfa9abe1f0f14f48f490772db7a775559f2" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/34094cfa9abe1f0f14f48f490772db7a775559f2", + "reference": "34094cfa9abe1f0f14f48f490772db7a775559f2", + "shasum": "" + }, + "require": { + "php": ">=5.3.3" + }, + "suggest": { + "ext-mbstring": "For best performance" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.14-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Mbstring\\": "" + }, + "files": [ + "bootstrap.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill for the Mbstring extension", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "mbstring", + "polyfill", + "portable", + "shim" + ], + "time": "2020-01-13T11:15:53+00:00" + }, + { + "name": "symfony/polyfill-php70", + "version": "v1.14.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-php70.git", + "reference": "419c4940024c30ccc033650373a1fe13890d3255" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-php70/zipball/419c4940024c30ccc033650373a1fe13890d3255", + "reference": "419c4940024c30ccc033650373a1fe13890d3255", + "shasum": "" + }, + "require": { + "paragonie/random_compat": "~1.0|~2.0|~9.99", + "php": ">=5.3.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.14-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Php70\\": "" + }, + "files": [ + "bootstrap.php" + ], + "classmap": [ + "Resources/stubs" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill backporting some PHP 7.0+ features to lower PHP versions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "polyfill", + "portable", + "shim" + ], + "time": "2020-01-13T11:15:53+00:00" + }, + { + "name": "symfony/polyfill-php72", + "version": "v1.14.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-php72.git", + "reference": "46ecacf4751dd0dc81e4f6bf01dbf9da1dc1dadf" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-php72/zipball/46ecacf4751dd0dc81e4f6bf01dbf9da1dc1dadf", + "reference": "46ecacf4751dd0dc81e4f6bf01dbf9da1dc1dadf", + "shasum": "" + }, + "require": { + "php": ">=5.3.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.14-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Php72\\": "" + }, + "files": [ + "bootstrap.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill backporting some PHP 7.2+ features to lower PHP versions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "polyfill", + "portable", + "shim" + ], + "time": "2020-01-13T11:15:53+00:00" + }, + { + "name": "symfony/polyfill-php73", + "version": "v1.14.0", + "source": { + "type": "git", + "url": "https://github.com/symfony/polyfill-php73.git", + "reference": "5e66a0fa1070bf46bec4bea7962d285108edd675" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/polyfill-php73/zipball/5e66a0fa1070bf46bec4bea7962d285108edd675", + "reference": "5e66a0fa1070bf46bec4bea7962d285108edd675", + "shasum": "" + }, + "require": { + "php": ">=5.3.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.14-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Polyfill\\Php73\\": "" + }, + "files": [ + "bootstrap.php" + ], + "classmap": [ + "Resources/stubs" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony polyfill backporting some PHP 7.3+ features to lower PHP versions", + "homepage": "https://symfony.com", + "keywords": [ + "compatibility", + "polyfill", + "portable", + "shim" + ], + "time": "2020-01-13T11:15:53+00:00" + }, + { + "name": "symfony/process", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/process.git", + "reference": "bf9166bac906c9e69fb7a11d94875e7ced97bcd7" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/process/zipball/bf9166bac906c9e69fb7a11d94875e7ced97bcd7", + "reference": "bf9166bac906c9e69fb7a11d94875e7ced97bcd7", + "shasum": "" + }, + "require": { + "php": "^7.1.3" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Process\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Process Component", + "homepage": "https://symfony.com", + "time": "2020-02-07T20:06:44+00:00" + }, + { + "name": "symfony/service-contracts", + "version": "v2.0.1", + "source": { + "type": "git", + "url": "https://github.com/symfony/service-contracts.git", + "reference": "144c5e51266b281231e947b51223ba14acf1a749" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/service-contracts/zipball/144c5e51266b281231e947b51223ba14acf1a749", + "reference": "144c5e51266b281231e947b51223ba14acf1a749", + "shasum": "" + }, + "require": { + "php": "^7.2.5", + "psr/container": "^1.0" + }, + "suggest": { + "symfony/service-implementation": "" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.0-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Contracts\\Service\\": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Generic abstractions related to writing services", + "homepage": "https://symfony.com", + "keywords": [ + "abstractions", + "contracts", + "decoupling", + "interfaces", + "interoperability", + "standards" + ], + "time": "2019-11-18T17:27:11+00:00" + }, + { + "name": "symfony/stopwatch", + "version": "v5.0.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/stopwatch.git", + "reference": "5d9add8034135b9a5f7b101d1e42c797e7f053e4" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/stopwatch/zipball/5d9add8034135b9a5f7b101d1e42c797e7f053e4", + "reference": "5d9add8034135b9a5f7b101d1e42c797e7f053e4", + "shasum": "" + }, + "require": { + "php": "^7.2.5", + "symfony/service-contracts": "^1.0|^2" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.0-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\Stopwatch\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony Stopwatch Component", + "homepage": "https://symfony.com", + "time": "2020-01-04T14:08:26+00:00" + }, + { + "name": "symfony/var-dumper", + "version": "v5.0.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/var-dumper.git", + "reference": "3a37aeb1132d1035536d3d6aa9cb06c2ff9355e9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/var-dumper/zipball/3a37aeb1132d1035536d3d6aa9cb06c2ff9355e9", + "reference": "3a37aeb1132d1035536d3d6aa9cb06c2ff9355e9", + "shasum": "" + }, + "require": { + "php": "^7.2.5", + "symfony/polyfill-mbstring": "~1.0" + }, + "conflict": { + "phpunit/phpunit": "<5.4.3", + "symfony/console": "<4.4" + }, + "require-dev": { + "ext-iconv": "*", + "symfony/console": "^4.4|^5.0", + "symfony/process": "^4.4|^5.0", + "twig/twig": "^2.4|^3.0" + }, + "suggest": { + "ext-iconv": "To convert non-UTF-8 strings to UTF-8 (or symfony/polyfill-iconv in case ext-iconv cannot be used).", + "ext-intl": "To show region name in time zone dump", + "symfony/console": "To use the ServerDumpCommand and/or the bin/var-dump-server script" + }, + "bin": [ + "Resources/bin/var-dump-server" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.0-dev" + } + }, + "autoload": { + "files": [ + "Resources/functions/dump.php" + ], + "psr-4": { + "Symfony\\Component\\VarDumper\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "Symfony mechanism for exploring and dumping PHP variables", + "homepage": "https://symfony.com", + "keywords": [ + "debug", + "dump" + ], + "time": "2020-02-26T22:30:10+00:00" + }, + { + "name": "symfony/var-exporter", + "version": "v5.0.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/var-exporter.git", + "reference": "30779a25c736b4290449eaedefe4196c1d060378" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/var-exporter/zipball/30779a25c736b4290449eaedefe4196c1d060378", + "reference": "30779a25c736b4290449eaedefe4196c1d060378", + "shasum": "" + }, + "require": { + "php": "^7.2.5" + }, + "require-dev": { + "symfony/var-dumper": "^4.4|^5.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "5.0-dev" + } + }, + "autoload": { + "psr-4": { + "Symfony\\Component\\VarExporter\\": "" + }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Nicolas Grekas", + "email": "p@tchwork.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "description": "A blend of var_export() + serialize() to turn any serializable data structure to plain PHP code", + "homepage": "https://symfony.com", + "keywords": [ + "clone", + "construct", + "export", + "hydrate", + "instantiate", + "serialize" + ], + "time": "2020-02-04T09:47:34+00:00" + }, + { + "name": "symfony/yaml", + "version": "v4.4.5", + "source": { + "type": "git", + "url": "https://github.com/symfony/yaml.git", + "reference": "94d005c176db2080e98825d98e01e8b311a97a88" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/symfony/yaml/zipball/94d005c176db2080e98825d98e01e8b311a97a88", + "reference": "94d005c176db2080e98825d98e01e8b311a97a88", + "shasum": "" + }, + "require": { + "php": "^7.1.3", + "symfony/polyfill-ctype": "~1.8" + }, + "conflict": { + "symfony/console": "<3.4" + }, + "require-dev": { + "symfony/console": "^3.4|^4.0|^5.0" + }, + "suggest": { + "symfony/console": "For validating YAML files using the lint command" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "4.4-dev" } }, "autoload": { @@ -1146,7 +4645,300 @@ ], "description": "Symfony Yaml Component", "homepage": "https://symfony.com", - "time": "2016-03-04 07:55:57" + "time": "2020-02-03T10:46:43+00:00" + }, + { + "name": "symplify/coding-standard", + "version": "v6.1.0", + "source": { + "type": "git", + "url": "https://github.com/Symplify/CodingStandard.git", + "reference": "d692701e2c74edd8c0cc7c35f47b8421b8b4885c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Symplify/CodingStandard/zipball/d692701e2c74edd8c0cc7c35f47b8421b8b4885c", + "reference": "d692701e2c74edd8c0cc7c35f47b8421b8b4885c", + "shasum": "" + }, + "require": { + "friendsofphp/php-cs-fixer": "^2.15", + "nette/finder": "^2.4", + "nette/utils": "^2.5|^3.0", + "php": "^7.1", + "phpstan/phpdoc-parser": "^0.3.4", + "squizlabs/php_codesniffer": "^3.4", + "symplify/package-builder": "^6.1" + }, + "require-dev": { + "nette/application": "^3.0", + "phpunit/phpunit": "^7.5|^8.0", + "symplify/easy-coding-standard-tester": "^6.1", + "symplify/package-builder": "^6.1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "6.1-dev" + } + }, + "autoload": { + "psr-4": { + "Symplify\\CodingStandard\\": "src", + "Symplify\\CodingStandard\\TokenRunner\\": "packages/TokenRunner/src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "Set of Symplify rules for PHP_CodeSniffer and PHP CS Fixer.", + "time": "2019-09-18T08:01:34+00:00" + }, + { + "name": "symplify/easy-coding-standard", + "version": "v6.1.0", + "source": { + "type": "git", + "url": "https://github.com/Symplify/EasyCodingStandard.git", + "reference": "94b8cf03af132d007d8a33c8dad5655cff6a74e8" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Symplify/EasyCodingStandard/zipball/94b8cf03af132d007d8a33c8dad5655cff6a74e8", + "reference": "94b8cf03af132d007d8a33c8dad5655cff6a74e8", + "shasum": "" + }, + "require": { + "composer/xdebug-handler": "^1.3", + "friendsofphp/php-cs-fixer": "^2.15", + "jean85/pretty-package-versions": "^1.2", + "nette/robot-loader": "^3.1.0", + "nette/utils": "^2.5|^3.0", + "ocramius/package-versions": "^1.3", + "php": "^7.1", + "psr/simple-cache": "^1.0", + "slevomat/coding-standard": "^5.0.1", + "squizlabs/php_codesniffer": "^3.4", + "symfony/cache": "^3.4|^4.3", + "symfony/config": "^3.4|^4.3", + "symfony/console": "^3.4|^4.3", + "symfony/dependency-injection": "^3.4.10|^4.2", + "symfony/finder": "^3.4|^4.3", + "symfony/http-kernel": "^3.4|^4.3", + "symfony/yaml": "^3.4|^4.3", + "symplify/coding-standard": "^6.1", + "symplify/package-builder": "^6.1" + }, + "require-dev": { + "phpunit/phpunit": "^7.5|^8.0", + "symplify/easy-coding-standard-tester": "^6.1" + }, + "bin": [ + "bin/ecs" + ], + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "6.1-dev" + } + }, + "autoload": { + "psr-4": { + "Symplify\\EasyCodingStandard\\": "src", + "Symplify\\EasyCodingStandard\\ChangedFilesDetector\\": "packages/ChangedFilesDetector/src", + "Symplify\\EasyCodingStandard\\Configuration\\": "packages/Configuration/src", + "Symplify\\EasyCodingStandard\\FixerRunner\\": "packages/FixerRunner/src", + "Symplify\\EasyCodingStandard\\SniffRunner\\": "packages/SniffRunner/src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "Use Coding Standard with 0-knowledge of PHP-CS-Fixer and PHP_CodeSniffer.", + "time": "2019-09-14T22:46:23+00:00" + }, + { + "name": "symplify/package-builder", + "version": "v6.1.0", + "source": { + "type": "git", + "url": "https://github.com/Symplify/PackageBuilder.git", + "reference": "fbdfe363a27070cfdfbc47d5f59e711ed08bb060" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Symplify/PackageBuilder/zipball/fbdfe363a27070cfdfbc47d5f59e711ed08bb060", + "reference": "fbdfe363a27070cfdfbc47d5f59e711ed08bb060", + "shasum": "" + }, + "require": { + "nette/finder": "^2.4", + "nette/utils": "^2.5|^3.0", + "php": "^7.1", + "symfony/config": "^3.4|^4.3", + "symfony/console": "^3.4|^4.3", + "symfony/debug": "^3.4|^4.3", + "symfony/dependency-injection": "^3.4.10|^4.2", + "symfony/finder": "^3.4|^4.3", + "symfony/http-kernel": "^3.4|^4.3", + "symfony/yaml": "^3.4|^4.3" + }, + "require-dev": { + "phpunit/phpunit": "^7.5|^8.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "6.1-dev" + } + }, + "autoload": { + "psr-4": { + "Symplify\\PackageBuilder\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "description": "Dependency Injection, Console and Kernel toolkit for Symplify packages.", + "time": "2019-09-17T20:48:03+00:00" + }, + { + "name": "theseer/tokenizer", + "version": "1.1.3", + "source": { + "type": "git", + "url": "https://github.com/theseer/tokenizer.git", + "reference": "11336f6f84e16a720dae9d8e6ed5019efa85a0f9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/theseer/tokenizer/zipball/11336f6f84e16a720dae9d8e6ed5019efa85a0f9", + "reference": "11336f6f84e16a720dae9d8e6ed5019efa85a0f9", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-tokenizer": "*", + "ext-xmlwriter": "*", + "php": "^7.0" + }, + "type": "library", + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Arne Blankerts", + "email": "arne@blankerts.de", + "role": "Developer" + } + ], + "description": "A small library for converting tokenized PHP source code into XML and potentially other formats", + "time": "2019-06-13T22:48:21+00:00" + }, + { + "name": "webmozart/assert", + "version": "1.7.0", + "source": { + "type": "git", + "url": "https://github.com/webmozart/assert.git", + "reference": "aed98a490f9a8f78468232db345ab9cf606cf598" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/webmozart/assert/zipball/aed98a490f9a8f78468232db345ab9cf606cf598", + "reference": "aed98a490f9a8f78468232db345ab9cf606cf598", + "shasum": "" + }, + "require": { + "php": "^5.3.3 || ^7.0", + "symfony/polyfill-ctype": "^1.8" + }, + "conflict": { + "vimeo/psalm": "<3.6.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.8.36 || ^7.5.13" + }, + "type": "library", + "autoload": { + "psr-4": { + "Webmozart\\Assert\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Bernhard Schussek", + "email": "bschussek@gmail.com" + } + ], + "description": "Assertions to validate method input/output with nice error messages.", + "keywords": [ + "assert", + "check", + "validate" + ], + "time": "2020-02-14T12:15:55+00:00" + }, + { + "name": "webmozart/path-util", + "version": "2.3.0", + "source": { + "type": "git", + "url": "https://github.com/webmozart/path-util.git", + "reference": "d939f7edc24c9a1bb9c0dee5cb05d8e859490725" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/webmozart/path-util/zipball/d939f7edc24c9a1bb9c0dee5cb05d8e859490725", + "reference": "d939f7edc24c9a1bb9c0dee5cb05d8e859490725", + "shasum": "" + }, + "require": { + "php": ">=5.3.3", + "webmozart/assert": "~1.0" + }, + "require-dev": { + "phpunit/phpunit": "^4.6", + "sebastian/version": "^1.0.1" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.3-dev" + } + }, + "autoload": { + "psr-4": { + "Webmozart\\PathUtil\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Bernhard Schussek", + "email": "bschussek@gmail.com" + } + ], + "description": "A robust cross-platform utility for normalizing, comparing and modifying file paths.", + "time": "2015-12-17T08:42:14+00:00" } ], "aliases": [], @@ -1155,7 +4947,7 @@ "prefer-stable": false, "prefer-lowest": false, "platform": { - "php": ">=7.0.0" + "php": "^7.2" }, "platform-dev": [] } diff --git a/docs/assets/php-ml-logo.png b/docs/assets/php-ml-logo.png new file mode 100644 index 0000000..5a3766a Binary files /dev/null and b/docs/assets/php-ml-logo.png differ diff --git a/docs/index.md b/docs/index.md index d3f65b7..bff00f2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,15 +1,26 @@ -# PHP Machine Learning library +# PHP-ML - Machine Learning library for PHP -[![Build Status](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/build.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/build-status/develop) -[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=develop)](http://php-ml.readthedocs.org/en/develop/?badge=develop) +[![Minimum PHP Version](https://img.shields.io/badge/php-%3E%3D%207.2-8892BF.svg)](https://php.net/) +[![Latest Stable Version](https://img.shields.io/packagist/v/php-ai/php-ml.svg)](https://packagist.org/packages/php-ai/php-ml) +[![Build Status](https://travis-ci.org/php-ai/php-ml.svg?branch=master)](https://travis-ci.org/php-ai/php-ml) +[![Documentation Status](https://readthedocs.org/projects/php-ml/badge/?version=master)](http://php-ml.readthedocs.org/) [![Total Downloads](https://poser.pugx.org/php-ai/php-ml/downloads.svg)](https://packagist.org/packages/php-ai/php-ml) [![License](https://poser.pugx.org/php-ai/php-ml/license.svg)](https://packagist.org/packages/php-ai/php-ml) -[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=develop)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=develop) +[![Coverage Status](https://coveralls.io/repos/github/php-ai/php-ml/badge.svg?branch=master)](https://coveralls.io/github/php-ai/php-ml?branch=master) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/php-ai/php-ml/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/php-ai/php-ml/?branch=master) -Fresh approach to Machine Learning in PHP. Note that at the moment PHP is not the best choice for machine learning but maybe this will change ... +

+ +

+ +Fresh approach to Machine Learning in PHP. Algorithms, Cross Validation, Neural Network, Preprocessing, Feature Extraction and much more in one library. + +PHP-ML requires PHP >= 7.2. Simple example of classification: ```php +require_once __DIR__ . '/vendor/autoload.php'; + use Phpml\Classification\KNearestNeighbors; $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; @@ -18,58 +29,84 @@ $labels = ['a', 'a', 'a', 'b', 'b', 'b']; $classifier = new KNearestNeighbors(); $classifier->train($samples, $labels); -$classifier->predict([3, 2]); +$classifier->predict([3, 2]); // return 'b' ``` +## Awards + + + + ## Documentation To find out how to use PHP-ML follow [Documentation](http://php-ml.readthedocs.org/). ## Installation -Currently this library is in the process of developing, but You can install it with Composer: +This library is still in beta. However, it can be installed with Composer: ``` composer require php-ai/php-ml ``` +## Examples + +Example scripts are available in a separate repository [php-ai/php-ml-examples](https://github.com/php-ai/php-ml-examples). + ## Features +* Association rule Learning + * [Apriori](machine-learning/association/apriori.md) * Classification - * [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/) - * [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/) + * [SVC](machine-learning/classification/svc.md) + * [k-Nearest Neighbors](machine-learning/classification/k-nearest-neighbors.md) + * [Naive Bayes](machine-learning/classification/naive-bayes.md) * Regression - * [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/) + * [Least Squares](machine-learning/regression/least-squares.md) + * [SVR](machine-learning/regression/svr.md) * Clustering - * [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means) - * [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan) + * [k-Means](machine-learning/clustering/k-means.md) + * [DBSCAN](machine-learning/clustering/dbscan.md) +* Metric + * [Accuracy](machine-learning/metric/accuracy.md) + * [Confusion Matrix](machine-learning/metric/confusion-matrix.md) + * [Classification Report](machine-learning/metric/classification-report.md) +* Workflow + * [Pipeline](machine-learning/workflow/pipeline) +* Neural Network + * [Multilayer Perceptron Classifier](machine-learning/neural-network/multilayer-perceptron-classifier.md) * Cross Validation - * [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split) + * [Random Split](machine-learning/cross-validation/random-split.md) + * [Stratified Random Split](machine-learning/cross-validation/stratified-random-split.md) +* Feature Selection + * [Variance Threshold](machine-learning/feature-selection/variance-threshold.md) + * [SelectKBest](machine-learning/feature-selection/selectkbest.md) +* Preprocessing + * [Normalization](machine-learning/preprocessing/normalization.md) + * [Imputation missing values](machine-learning/preprocessing/imputation-missing-values.md) + * LabelEncoder +* Feature Extraction + * [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer.md) + * [Tf-idf Transformer](machine-learning/feature-extraction/tf-idf-transformer.md) * Datasets - * [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset) + * [Array](machine-learning/datasets/array-dataset.md) + * [CSV](machine-learning/datasets/csv-dataset.md) + * [Files](machine-learning/datasets/files-dataset.md) + * [SVM](machine-learning/datasets/svm-dataset.md) + * [MNIST](machine-learning/datasets/mnist-dataset.md) * Ready to use: - * [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/) + * [Iris](machine-learning/datasets/demo/iris.md) + * [Wine](machine-learning/datasets/demo/wine.md) + * [Glass](machine-learning/datasets/demo/glass.md) +* Models management + * [Persistency](machine-learning/model-manager/persistency.md) * Math - * [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/) - * [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/) - + * [Distance](math/distance.md) + * [Matrix](math/matrix.md) + * [Set](math/set.md) + * [Statistic](math/statistic.md) -## Contribute - -- Issue Tracker: github.com/php-ai/php-ml/issues -- Source Code: github.com/php-ai/php-ml - -After installation, you can launch the test suite in project root directory (you will need to install dev requirements with Composer) - -``` -bin/phpunit -``` - -## License - -PHP-ML is released under the MIT Licence. See the bundled LICENSE file for details. - -## Author +## Arkadiusz Kondas (@ArkadiuszKondas) diff --git a/docs/machine-learning/association/apriori.md b/docs/machine-learning/association/apriori.md new file mode 100644 index 0000000..779ef28 --- /dev/null +++ b/docs/machine-learning/association/apriori.md @@ -0,0 +1,60 @@ +# Apriori Associator + +Association rule learning based on [Apriori algorithm](https://en.wikipedia.org/wiki/Apriori_algorithm) for frequent item set mining. + +### Constructor Parameters + +* $support - minimum threshold of [support](https://en.wikipedia.org/wiki/Association_rule_learning#Support), i.e. the ratio of samples which contain both X and Y for a rule "if X then Y" +* $confidence - minimum threshold of [confidence](https://en.wikipedia.org/wiki/Association_rule_learning#Confidence), i.e. the ratio of samples containing both X and Y to those containing X + +``` +use Phpml\Association\Apriori; + +$associator = new Apriori($support = 0.5, $confidence = 0.5); +``` + +### Train + +To train an associator, simply provide train samples and labels (as `array`). Example: + +``` +$samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']]; +$labels = []; + +use Phpml\Association\Apriori; + +$associator = new Apriori($support = 0.5, $confidence = 0.5); +$associator->train($samples, $labels); +``` + +You can train the associator using multiple data sets, predictions will be based on all the training data. + +### Predict + +To predict sample label use the `predict` method. You can provide one sample or array of samples: + +``` +$associator->predict(['alpha','theta']); +// return [['beta']] + +$associator->predict([['alpha','epsilon'],['beta','theta']]); +// return [[['beta']], [['alpha']]] +``` + +### Associating + +To get generated association rules, simply use the `rules` method. + +``` +$associator->getRules(); +// return [['antecedent' => ['alpha', 'theta'], 'consequent' => ['beta'], 'support' => 1.0, 'confidence' => 1.0], ... ] +``` + +### Frequent item sets + +To generate k-length frequent item sets, simply use the `apriori` method. + +``` +$associator->apriori(); +// return [ 1 => [['alpha'], ['beta'], ['theta'], ['epsilon']], 2 => [...], ...] +``` diff --git a/docs/machine-learning/classification/k-nearest-neighbors.md b/docs/machine-learning/classification/k-nearest-neighbors.md index 3d5aa27..a4ba53b 100644 --- a/docs/machine-learning/classification/k-nearest-neighbors.md +++ b/docs/machine-learning/classification/k-nearest-neighbors.md @@ -2,19 +2,19 @@ Classifier implementing the k-nearest neighbors algorithm. -### Constructor Parameters +## Constructor Parameters * $k - number of nearest neighbors to scan (default: 3) -* $distanceMetric - Distance object, default Euclidean (see [distance documentation](math/distance/)) +* $distanceMetric - Distance object, default Euclidean (see [distance documentation](../../math/distance.md)) ``` $classifier = new KNearestNeighbors($k=4); $classifier = new KNearestNeighbors($k=3, new Minkowski($lambda=4)); ``` -### Train +## Train -To train a classifier simply provide train samples and labels (as `array`). Example: +To train a classifier, simply provide train samples and labels (as `array`). Example: ``` $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; @@ -24,9 +24,11 @@ $classifier = new KNearestNeighbors(); $classifier->train($samples, $labels); ``` -### Predict +You can train the classifier using multiple data sets, predictions will be based on all the training data. -To predict sample label use `predict` method. You can provide one sample or array of samples: +## Predict + +To predict sample label use the `predict` method. You can provide one sample or array of samples: ``` $classifier->predict([3, 2]); diff --git a/docs/machine-learning/classification/naive-bayes.md b/docs/machine-learning/classification/naive-bayes.md index e990321..57fcdcf 100644 --- a/docs/machine-learning/classification/naive-bayes.md +++ b/docs/machine-learning/classification/naive-bayes.md @@ -4,7 +4,7 @@ Classifier based on applying Bayes' theorem with strong (naive) independence ass ### Train -To train a classifier simply provide train samples and labels (as `array`). Example: +To train a classifier, simply provide train samples and labels (as `array`). Example: ``` $samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; @@ -14,14 +14,16 @@ $classifier = new NaiveBayes(); $classifier->train($samples, $labels); ``` +You can train the classifier using multiple data sets, predictions will be based on all the training data. + ### Predict -To predict sample label use `predict` method. You can provide one sample or array of samples: +To predict sample label use the `predict` method. You can provide one sample or array of samples: ``` $classifier->predict([3, 1, 1]); // return 'a' -$classifier->predict([[3, 1, 1], [1, 4, 1]); +$classifier->predict([[3, 1, 1], [1, 4, 1]]); // return ['a', 'b'] ``` diff --git a/docs/machine-learning/classification/svc.md b/docs/machine-learning/classification/svc.md new file mode 100644 index 0000000..3d87b62 --- /dev/null +++ b/docs/machine-learning/classification/svc.md @@ -0,0 +1,88 @@ +# Support Vector Classification + +Classifier implementing Support Vector Machine based on libsvm. + +### Constructor Parameters + +* $kernel (int) - kernel type to be used in the algorithm (default Kernel::RBF) +* $cost (float) - parameter C of C-SVC (default 1.0) +* $degree (int) - degree of the Kernel::POLYNOMIAL function (default 3) +* $gamma (float) - kernel coefficient for ‘Kernel::RBF’, ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’. If gamma is ‘null’ then 1/features will be used instead. +* $coef0 (float) - independent term in kernel function. It is only significant in ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’ (default 0.0) +* $tolerance (float) - tolerance of termination criterion (default 0.001) +* $cacheSize (int) - cache memory size in MB (default 100) +* $shrinking (bool) - whether to use the shrinking heuristics (default true) +* $probabilityEstimates (bool) - whether to enable probability estimates (default false) + +``` +$classifier = new SVC(Kernel::LINEAR, $cost = 1000); +$classifier = new SVC(Kernel::RBF, $cost = 1000, $degree = 3, $gamma = 6); +``` + +### Train + +To train a classifier, simply provide train samples and labels (as `array`). Example: + +``` +use Phpml\Classification\SVC; +use Phpml\SupportVectorMachine\Kernel; + +$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; +$labels = ['a', 'a', 'a', 'b', 'b', 'b']; + +$classifier = new SVC(Kernel::LINEAR, $cost = 1000); +$classifier->train($samples, $labels); +``` + +You can train the classifier using multiple data sets, predictions will be based on all the training data. + +### Predict + +To predict sample label use the `predict` method. You can provide one sample or array of samples: + +``` +$classifier->predict([3, 2]); +// return 'b' + +$classifier->predict([[3, 2], [1, 5]]); +// return ['b', 'a'] +``` + +### Probability estimation + +To predict probabilities you must build a classifier with `$probabilityEstimates` set to true. Example: + +``` +use Phpml\Classification\SVC; +use Phpml\SupportVectorMachine\Kernel; + +$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; +$labels = ['a', 'a', 'a', 'b', 'b', 'b']; + +$classifier = new SVC( + Kernel::LINEAR, // $kernel + 1.0, // $cost + 3, // $degree + null, // $gamma + 0.0, // $coef0 + 0.001, // $tolerance + 100, // $cacheSize + true, // $shrinking + true // $probabilityEstimates, set to true +); + +$classifier->train($samples, $labels); +``` + +Then use the `predictProbability` method instead of `predict`: + +``` +$classifier->predictProbability([3, 2]); +// return ['a' => 0.349833, 'b' => 0.650167] + +$classifier->predictProbability([[3, 2], [1, 5]]); +// return [ +// ['a' => 0.349833, 'b' => 0.650167], +// ['a' => 0.922664, 'b' => 0.0773364], +// ] +``` diff --git a/docs/machine-learning/clustering/dbscan.md b/docs/machine-learning/clustering/dbscan.md index 45dd631..ce01198 100644 --- a/docs/machine-learning/clustering/dbscan.md +++ b/docs/machine-learning/clustering/dbscan.md @@ -7,7 +7,7 @@ It is a density-based clustering algorithm: given a set of points in some space, * $epsilon - epsilon, maximum distance between two samples for them to be considered as in the same neighborhood * $minSamples - number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself) -* $distanceMetric - Distance object, default Euclidean (see [distance documentation](math/distance/)) +* $distanceMetric - Distance object, default Euclidean (see [distance documentation](../../math/distance.md)) ``` $dbscan = new DBSCAN($epsilon = 2, $minSamples = 3); @@ -16,12 +16,12 @@ $dbscan = new DBSCAN($epsilon = 2, $minSamples = 3, new Minkowski($lambda=4)); ### Clustering -To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside. +To divide the samples into clusters, simply use the `cluster` method. It returns the `array` of clusters with samples inside. ``` $samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]]; $dbscan = new DBSCAN($epsilon = 2, $minSamples = 3); $dbscan->cluster($samples); -// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]] +// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]] ``` diff --git a/docs/machine-learning/clustering/k-means.md b/docs/machine-learning/clustering/k-means.md index 296feb1..132c2dc 100644 --- a/docs/machine-learning/clustering/k-means.md +++ b/docs/machine-learning/clustering/k-means.md @@ -1,6 +1,6 @@ # K-means clustering -The K-Means algorithm clusters data by trying to separate samples in n groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares. +The K-Means algorithm clusters data by trying to separate samples in n groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares. This algorithm requires the number of clusters to be specified. ### Constructor Parameters @@ -15,14 +15,16 @@ $kmeans = new KMeans(4, KMeans::INIT_RANDOM); ### Clustering -To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside. +To divide the samples into clusters, simply use the `cluster` method. It returns the `array` of clusters with samples inside. ``` $samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]]; +Or if you need to keep your identifiers along with yours samples you can use array keys as labels. +$samples = [ 'Label1' => [1, 1], 'Label2' => [8, 7], 'Label3' => [1, 2]]; $kmeans = new KMeans(2); $kmeans->cluster($samples); -// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]] +// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]] or [0=>['Label1' => [1, 1], 'Label3' => [1, 2], ...], 1=>['Label2' => [8, 7], ...]] ``` ### Initialization methods @@ -30,8 +32,8 @@ $kmeans->cluster($samples); #### kmeans++ (default) K-means++ method selects initial cluster centers for k-mean clustering in a smart way to speed up convergence. -It use the DASV seeding method consists of finding good initial centroids for the clusters. +It uses the DASV seeding method consists of finding good initial centroids for the clusters. #### random -Random initialization method chooses completely random centroid. It get the space boundaries to avoid placing clusters centroid too far from samples data. +Random initialization method chooses completely random centroid. It gets the space boundaries to avoid placing cluster centroids too far from samples data. diff --git a/docs/machine-learning/cross-validation/random-split.md b/docs/machine-learning/cross-validation/random-split.md index 464f0db..a5bf402 100644 --- a/docs/machine-learning/cross-validation/random-split.md +++ b/docs/machine-learning/cross-validation/random-split.md @@ -1,20 +1,20 @@ -# RandomSplit +# Random Split -One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust number of samples in each group. +One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust the number of samples in each group. ### Constructor Parameters * $dataset - object that implements `Dataset` interface * $testSize - a fraction of test split (float, from 0 to 1, default: 0.3) -* $seed - seed for random generator (for tests) - +* $seed - seed for random generator (e.g. for tests) + ``` $randomSplit = new RandomSplit($dataset, 0.2); ``` ### Samples and labels groups -To get samples or labels from test and train group you can use getters: +To get samples or labels from test and train group, you can use getters: ``` $dataset = new RandomSplit($dataset, 0.3, 1234); diff --git a/docs/machine-learning/cross-validation/stratified-random-split.md b/docs/machine-learning/cross-validation/stratified-random-split.md new file mode 100644 index 0000000..1a6caa1 --- /dev/null +++ b/docs/machine-learning/cross-validation/stratified-random-split.md @@ -0,0 +1,44 @@ +# Stratified Random Split + +Analogously to `RandomSpilt` class, samples are split to two groups: train group and test group. +Distribution of samples takes into account their targets and trying to divide them equally. +You can adjust the number of samples in each group. + +### Constructor Parameters + +* $dataset - object that implements `Dataset` interface +* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3) +* $seed - seed for random generator (e.g. for tests) + +``` +$split = new StratifiedRandomSplit($dataset, 0.2); +``` + +### Samples and labels groups + +To get samples or labels from test and train group, you can use getters: + +``` +$dataset = new StratifiedRandomSplit($dataset, 0.3, 1234); + +// train group +$dataset->getTrainSamples(); +$dataset->getTrainLabels(); + +// test group +$dataset->getTestSamples(); +$dataset->getTestLabels(); +``` + +### Example + +``` +$dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], + $targets = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] +); + +$split = new StratifiedRandomSplit($dataset, 0.5); +``` + +Split will have equal amounts of each target. Two of the target `a` and two of `b`. diff --git a/docs/machine-learning/datasets/array-dataset.md b/docs/machine-learning/datasets/array-dataset.md index 5081ed8..87bae48 100644 --- a/docs/machine-learning/datasets/array-dataset.md +++ b/docs/machine-learning/datasets/array-dataset.md @@ -2,20 +2,40 @@ Helper class that holds data as PHP `array` type. Implements the `Dataset` interface which is used heavily in other classes. -### Constructors Parameters +### Constructor Parameters * $samples - (array) of samples * $labels - (array) of labels ``` +use Phpml\Dataset\ArrayDataset; + $dataset = new ArrayDataset([[1, 1], [2, 1], [3, 2], [4, 1]], ['a', 'a', 'b', 'b']); ``` ### Samples and labels -To get samples or labels you can use getters: +To get samples or labels, you can use getters: ``` $dataset->getSamples(); -$dataset->getLabels(); +$dataset->getTargets(); +``` + +### Remove columns + +You can remove columns by their index numbers, for example: + +``` +use Phpml\Dataset\ArrayDataset; + +$dataset = new ArrayDataset( + [[1,2,3,4], [2,3,4,5], [3,4,5,6], [4,5,6,7]], + ['a', 'a', 'b', 'b'] +); + +$dataset->removeColumns([0,2]); + +// now from each sample column 0 and 2 are removed +// [[2,4], [3,5], [4,6], [5,7]] ``` diff --git a/docs/machine-learning/datasets/csv-dataset.md b/docs/machine-learning/datasets/csv-dataset.md index 0ea6319..557b7fc 100644 --- a/docs/machine-learning/datasets/csv-dataset.md +++ b/docs/machine-learning/datasets/csv-dataset.md @@ -2,14 +2,14 @@ Helper class that loads data from CSV file. It extends the `ArrayDataset`. -### Constructors Parameters +### Constructor Parameters * $filepath - (string) path to `.csv` file * $features - (int) number of columns that are features (starts from first column), last column must be a label -* $headingRow - (bool) define is file have a heading row (if `true` then first row will be ignored) +* $headingRow - (bool) define if the file has a heading row (if `true` then first row will be ignored) ``` $dataset = new CsvDataset('dataset.csv', 2, true); ``` -See [ArrayDataset](machine-learning/datasets/array-dataset/) for more information. +See [ArrayDataset](array-dataset.md) for more information. diff --git a/docs/machine-learning/datasets/demo/glass.md b/docs/machine-learning/datasets/demo/glass.md new file mode 100644 index 0000000..5ba1665 --- /dev/null +++ b/docs/machine-learning/datasets/demo/glass.md @@ -0,0 +1,42 @@ +# Glass Dataset + +From USA Forensic Science Service; 6 types of glass; defined in terms of their oxide content (i.e. Na, Fe, K, etc) + +### Specification + +| Classes | 6 | +| Samples total | 214 | +| Features per sample | 9 | + +Samples per class: + * 70 float processed building windows + * 17 float processed vehicle windows + * 76 non-float processed building windows + * 13 containers + * 9 tableware + * 29 headlamps + +### Load + +To load Glass dataset simple use: + +``` +use Phpml\Dataset\Demo\GlassDataset; + +$dataset = new GlassDataset(); +``` + +### Several samples example + +``` +RI: refractive index,Na: Sodium,Mg: Magnesium,Al: Aluminum,Si: Silicon,K: Potassium,Ca: Calcium,Ba: Barium,Fe: Iron,type of glass +1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,building_windows_float_processed +1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,building_windows_float_processed +1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,building_windows_float_processed +1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,building_windows_float_processed +1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,building_windows_float_processed +1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.00,0.26,building_windows_float_processed +1.51743,13.30,3.60,1.14,73.09,0.58,8.17,0.00,0.00,building_windows_float_processed +1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.00,0.00,building_windows_float_processed +1.51918,14.04,3.58,1.37,72.08,0.56,8.30,0.00,0.00,building_windows_float_processed +``` diff --git a/docs/machine-learning/datasets/demo/iris.md b/docs/machine-learning/datasets/demo/iris.md index 5972f1b..4b602cf 100644 --- a/docs/machine-learning/datasets/demo/iris.md +++ b/docs/machine-learning/datasets/demo/iris.md @@ -14,7 +14,9 @@ Most popular and widely available dataset of iris flower measurement and class n To load Iris dataset simple use: ``` -$dataset = new Iris(); +use Phpml\Dataset\Demo\IrisDataset; + +$dataset = new IrisDataset(); ``` ### Several samples example diff --git a/docs/machine-learning/datasets/demo/wine.md b/docs/machine-learning/datasets/demo/wine.md new file mode 100644 index 0000000..76a157b --- /dev/null +++ b/docs/machine-learning/datasets/demo/wine.md @@ -0,0 +1,35 @@ +# Wine Dataset + +These data are the results of a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. The analysis determined the quantities of 13 constituents found in each of the three types of wines. + +### Specification + +| Classes | 3 | +| Samples per class | class 1 59; class 2 71; class 3 48 | +| Samples total | 178 | +| Features per sample | 13 | + +### Load + +To load Wine dataset simple use: + +``` +use Phpml\Dataset\Demo\WineDataset; + +$dataset = new WineDataset(); +``` + +### Several samples example + +``` +alcohol,malic acid,ash,alcalinity of ash,magnesium,total phenols,flavanoids,nonflavanoid phenols,proanthocyanins,color intensity,hue,OD280/OD315 of diluted wines,proline,class +14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065,1 +13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050,1 +13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185,1 +14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480,1 +13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735,1 +14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450,1 +14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290,1 +14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295,1 +14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045,1 +``` diff --git a/docs/machine-learning/datasets/files-dataset.md b/docs/machine-learning/datasets/files-dataset.md new file mode 100644 index 0000000..6d55b3f --- /dev/null +++ b/docs/machine-learning/datasets/files-dataset.md @@ -0,0 +1,57 @@ +# FilesDataset + +Helper class that loads dataset from files. Use folder names as targets. It extends the `ArrayDataset`. + +### Constructor Parameters + +* $rootPath - (string) path to root folder that contains files dataset + +``` +use Phpml\Dataset\FilesDataset; + +$dataset = new FilesDataset('path/to/data'); +``` + +See [ArrayDataset](array-dataset.md) for more information. + +### Example + +Files structure: + +``` +data + business + 001.txt + 002.txt + ... + entertainment + 001.txt + 002.txt + ... + politics + 001.txt + 002.txt + ... + sport + 001.txt + 002.txt + ... + tech + 001.txt + 002.txt + ... +``` + +Load files data with `FilesDataset`: + +``` +use Phpml\Dataset\FilesDataset; + +$dataset = new FilesDataset('path/to/data'); + +$dataset->getSamples()[0][0] // content from file path/to/data/business/001.txt +$dataset->getTargets()[0] // business + +$dataset->getSamples()[40][0] // content from file path/to/data/tech/001.txt +$dataset->getTargets()[0] // tech +``` diff --git a/docs/machine-learning/datasets/mnist-dataset.md b/docs/machine-learning/datasets/mnist-dataset.md new file mode 100644 index 0000000..5c7a76e --- /dev/null +++ b/docs/machine-learning/datasets/mnist-dataset.md @@ -0,0 +1,26 @@ +# MnistDataset + +Helper class that loads data from MNIST dataset: [http://yann.lecun.com/exdb/mnist/](http://yann.lecun.com/exdb/mnist/) + +> The MNIST database of handwritten digits, available from this page, has a training set of 60,000 examples, and a test set of 10,000 examples. It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image. + It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting. + +### Constructors Parameters + +* $imagePath - (string) path to image file +* $labelPath - (string) path to label file + +``` +use Phpml\Dataset\MnistDataset; + +$trainDataset = new MnistDataset('train-images-idx3-ubyte', 'train-labels-idx1-ubyte'); +``` + +### Samples and labels + +To get samples or labels, you can use getters: + +``` +$dataset->getSamples(); +$dataset->getTargets(); +``` diff --git a/docs/machine-learning/datasets/svm-dataset.md b/docs/machine-learning/datasets/svm-dataset.md new file mode 100644 index 0000000..93a8cfb --- /dev/null +++ b/docs/machine-learning/datasets/svm-dataset.md @@ -0,0 +1,13 @@ +# SvmDataset + +Helper class that loads data from SVM-Light format file. It extends the `ArrayDataset`. + +### Constructor Parameters + +* $filepath - (string) path to the file + +``` +$dataset = new SvmDataset('dataset.svm'); +``` + +See [ArrayDataset](array-dataset.md) for more information. diff --git a/docs/machine-learning/feature-extraction/tf-idf-transformer.md b/docs/machine-learning/feature-extraction/tf-idf-transformer.md new file mode 100644 index 0000000..4ac2e5d --- /dev/null +++ b/docs/machine-learning/feature-extraction/tf-idf-transformer.md @@ -0,0 +1,42 @@ +# Tf-idf Transformer + +Tf–idf, short for term frequency–inverse document frequency, is a numerical statistic that is intended to reflect how important a word is to a document in a collection or corpus. + +### Constructor Parameters + +* $samples (array) - samples for fit tf-idf model + +``` +use Phpml\FeatureExtraction\TfIdfTransformer; + +$samples = [ + [1, 2, 4], + [0, 2, 1] +]; + +$transformer = new TfIdfTransformer($samples); +``` + +### Transformation + +To transform a collection of text samples, use the `transform` method. Example: + +``` +use Phpml\FeatureExtraction\TfIdfTransformer; + +$samples = [ + [0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 0, 5 => 0], + [0 => 1, 1 => 1, 2 => 0, 3 => 0, 4 => 2, 5 => 3], +]; + +$transformer = new TfIdfTransformer($samples); +$transformer->transform($samples); + +/* +$samples = [ + [0 => 0, 1 => 0, 2 => 0.602, 3 => 0.301, 4 => 0, 5 => 0], + [0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 0.602, 5 => 0.903], +]; +*/ + +``` diff --git a/docs/machine-learning/feature-extraction/token-count-vectorizer.md b/docs/machine-learning/feature-extraction/token-count-vectorizer.md new file mode 100644 index 0000000..7d9405e --- /dev/null +++ b/docs/machine-learning/feature-extraction/token-count-vectorizer.md @@ -0,0 +1,90 @@ +# Token Count Vectorizer + +Transform a collection of text samples to a vector of token counts. + +### Constructor Parameters + +* $tokenizer (Tokenizer) - tokenizer object (see below) +* $minDF (float) - ignore tokens that have a samples frequency strictly lower than the given threshold. This value is also called cut-off in the literature. (default 0) + +``` +use Phpml\FeatureExtraction\TokenCountVectorizer; +use Phpml\Tokenization\WhitespaceTokenizer; + +$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer()); +``` + +### Transformation + +To transform a collection of text samples, use the `transform` method. Example: + +``` +$samples = [ + 'Lorem ipsum dolor sit amet dolor', + 'Mauris placerat ipsum dolor', + 'Mauris diam eros fringilla diam', +]; + +$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer()); + +// Build the dictionary. +$vectorizer->fit($samples); + +// Transform the provided text samples into a vectorized list. +$vectorizer->transform($samples); +// return $samples = [ +// [0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 1], +// [5 => 1, 6 => 1, 1 => 1, 2 => 1], +// [5 => 1, 7 => 2, 8 => 1, 9 => 1], +//]; + +``` + +### Vocabulary + +You can extract vocabulary using the `getVocabulary()` method. Example: + +``` +$vectorizer->getVocabulary(); +// return $vocabulary = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet', 'Mauris', 'placerat', 'diam', 'eros', 'fringilla']; +``` + +### Tokenizers + +* WhitespaceTokenizer - select tokens by whitespace. +* WordTokenizer - select tokens of 2 or more alphanumeric characters (punctuation is completely ignored and always treated as a token separator). +* NGramTokenizer - continuous sequence of characters of the specified length. They are useful for querying languages that don’t use spaces or that have long compound words, like German. + +**NGramTokenizer** + +The NGramTokenizer tokenizer accepts the following parameters: + +`$minGram` - minimum length of characters in a gram. Defaults to 1. +`$maxGram` - maximum length of characters in a gram. Defaults to 2. + +```php +use Phpml\Tokenization\NGramTokenizer; + +$tokenizer = new NGramTokenizer(1, 2); + +$tokenizer->tokenize('Quick Fox'); + +// returns ['Q', 'u', 'i', 'c', 'k', 'Qu', 'ui', 'ic', 'ck', 'F', 'o', 'x', 'Fo', 'ox'] +``` + +**NGramWordTokenizer** + +The NGramWordTokenizer tokenizer accepts the following parameters: + +`$minGram` - minimum length of characters in a gram. Defaults to 1. +`$maxGram` - maximum length of characters in a gram. Defaults to 2. + +```php +use Phpml\Tokenization\NGramWordTokenizer; + +$tokenizer = new NGramWordTokenizer(1, 2); + +$tokenizer->tokenize('very quick fox'); + +// returns ['very', 'quick', 'fox', 'very quick', 'quick fox'] +``` diff --git a/docs/machine-learning/feature-selection/selectkbest.md b/docs/machine-learning/feature-selection/selectkbest.md new file mode 100644 index 0000000..71d1ff9 --- /dev/null +++ b/docs/machine-learning/feature-selection/selectkbest.md @@ -0,0 +1,96 @@ +# SelectKBest + +`SelectKBest` - select features according to the k highest scores. + +## Constructor Parameters + +* $k (int) - number of top features to select, rest will be removed (default: 10) +* $scoringFunction (ScoringFunction) - function that takes samples and targets and returns an array with scores (default: ANOVAFValue) + +```php +use Phpml\FeatureSelection\SelectKBest; + +$transformer = new SelectKBest(2); +``` + +## Example of use + +As an example we can perform feature selection on Iris dataset to retrieve only the two best features as follows: + +```php +use Phpml\FeatureSelection\SelectKBest; +use Phpml\Dataset\Demo\IrisDataset; + +$dataset = new IrisDataset(); +$selector = new SelectKBest(2); +$selector->fit($samples = $dataset->getSamples(), $dataset->getTargets()); +$selector->transform($samples); + +/* +$samples[0] = [1.4, 0.2]; +*/ +``` + +## Scores + +You can get an array with the calculated score for each feature. +A higher value means that a given feature is better suited for learning. +Of course, the rating depends on the scoring function used. + +``` +use Phpml\FeatureSelection\SelectKBest; +use Phpml\Dataset\Demo\IrisDataset; + +$dataset = new IrisDataset(); +$selector = new SelectKBest(2); +$selector->fit($samples = $dataset->getSamples(), $dataset->getTargets()); +$selector->scores(); + +/* +..array(4) { + [0]=> + float(119.26450218451) + [1]=> + float(47.364461402997) + [2]=> + float(1179.0343277002) + [3]=> + float(959.32440572573) +} +*/ +``` + +## Scoring function + +Available scoring functions: + +For classification: + - **ANOVAFValue** + The one-way ANOVA tests the null hypothesis that 2 or more groups have the same population mean. + The test is applied to samples from two or more groups, possibly with differing sizes. + +For regression: + - **UnivariateLinearRegression** + Quick linear model for testing the effect of a single regressor, sequentially for many regressors. + This is done in 2 steps: + - 1. The cross correlation between each regressor and the target is computed, that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)). + - 2. It is converted to an F score + +## Pipeline + +`SelectKBest` implements `Transformer` interface so it can be used as part of pipeline: + +```php +use Phpml\FeatureSelection\SelectKBest; +use Phpml\Classification\SVC; +use Phpml\FeatureExtraction\TfIdfTransformer; +use Phpml\Pipeline; + +$transformers = [ + new TfIdfTransformer(), + new SelectKBest(3) +]; +$estimator = new SVC(); + +$pipeline = new Pipeline($transformers, $estimator); +``` diff --git a/docs/machine-learning/feature-selection/variance-threshold.md b/docs/machine-learning/feature-selection/variance-threshold.md new file mode 100644 index 0000000..4021895 --- /dev/null +++ b/docs/machine-learning/feature-selection/variance-threshold.md @@ -0,0 +1,60 @@ +# Variance Threshold + +`VarianceThreshold` is a simple baseline approach to feature selection. +It removes all features whose variance doesn’t meet some threshold. +By default, it removes all zero-variance features, i.e. features that have the same value in all samples. + +## Constructor Parameters + +* $threshold (float) - features with a variance lower than this threshold will be removed (default 0.0) + +```php +use Phpml\FeatureSelection\VarianceThreshold; + +$transformer = new VarianceThreshold(0.15); +``` + +## Example of use + +As an example, suppose that we have a dataset with boolean features and +we want to remove all features that are either one or zero (on or off) +in more than 80% of the samples. +Boolean features are Bernoulli random variables, and the variance of such +variables is given by +``` +Var[X] = p(1 - p) +``` +so we can select using the threshold .8 * (1 - .8): + +```php +use Phpml\FeatureSelection\VarianceThreshold; + +$samples = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]]; +$transformer = new VarianceThreshold(0.8 * (1 - 0.8)); + +$transformer->fit($samples); +$transformer->transform($samples); + +/* +$samples = [[0, 1], [1, 0], [0, 0], [1, 1], [1, 0], [1, 1]]; +*/ +``` + +## Pipeline + +`VarianceThreshold` implements `Transformer` interface so it can be used as part of pipeline: + +```php +use Phpml\FeatureSelection\VarianceThreshold; +use Phpml\Classification\SVC; +use Phpml\FeatureExtraction\TfIdfTransformer; +use Phpml\Pipeline; + +$transformers = [ + new TfIdfTransformer(), + new VarianceThreshold(0.1) +]; +$estimator = new SVC(); + +$pipeline = new Pipeline($transformers, $estimator); +``` diff --git a/docs/machine-learning/metric/accuracy.md b/docs/machine-learning/metric/accuracy.md index 5045973..efdab23 100644 --- a/docs/machine-learning/metric/accuracy.md +++ b/docs/machine-learning/metric/accuracy.md @@ -1,10 +1,10 @@ # Accuracy -Class for calculate classifier accuracy. +Class for calculating classifier accuracy. ### Score -To calculate classifier accuracy score use `score` static method. Parameters: +To calculate classifier accuracy score, use the `score` static method. Parameters: * $actualLabels - (array) true sample labels * $predictedLabels - (array) predicted labels (e.x. from test group) diff --git a/docs/machine-learning/metric/classification-report.md b/docs/machine-learning/metric/classification-report.md new file mode 100644 index 0000000..f5591a8 --- /dev/null +++ b/docs/machine-learning/metric/classification-report.md @@ -0,0 +1,66 @@ +# Classification Report + +Class for calculating main classifier metrics: precision, recall, F1 score and support. + +### Report + +To generate report you must provide the following parameters: + +* $actualLabels - (array) true sample labels +* $predictedLabels - (array) predicted labels (e.x. from test group) + +``` +use Phpml\Metric\ClassificationReport; + +$actualLabels = ['cat', 'ant', 'bird', 'bird', 'bird']; +$predictedLabels = ['cat', 'cat', 'bird', 'bird', 'ant']; + +$report = new ClassificationReport($actualLabels, $predictedLabels); +``` + +Optionally you can provide the following parameter: + +* $average - (int) averaging method for multi-class classification + * `ClassificationReport::MICRO_AVERAGE` = 1 + * `ClassificationReport::MACRO_AVERAGE` = 2 (default) + * `ClassificationReport::WEIGHTED_AVERAGE` = 3 + +### Metrics + +After creating the report you can draw its individual metrics: + +* precision (`getPrecision()`) - fraction of retrieved instances that are relevant +* recall (`getRecall()`) - fraction of relevant instances that are retrieved +* F1 score (`getF1score()`) - measure of a test's accuracy +* support (`getSupport()`) - count of testes samples + +``` +$precision = $report->getPrecision(); +// $precision = ['cat' => 0.5, 'ant' => 0.0, 'bird' => 1.0]; +``` + +### Example + +``` +use Phpml\Metric\ClassificationReport; + +$actualLabels = ['cat', 'ant', 'bird', 'bird', 'bird']; +$predictedLabels = ['cat', 'cat', 'bird', 'bird', 'ant']; + +$report = new ClassificationReport($actualLabels, $predictedLabels); + +$report->getPrecision(); +// ['cat' => 0.5, 'ant' => 0.0, 'bird' => 1.0] + +$report->getRecall(); +// ['cat' => 1.0, 'ant' => 0.0, 'bird' => 0.67] + +$report->getF1score(); +// ['cat' => 0.67, 'ant' => 0.0, 'bird' => 0.80] + +$report->getSupport(); +// ['cat' => 1, 'ant' => 1, 'bird' => 3] + +$report->getAverage(); +// ['precision' => 0.5, 'recall' => 0.56, 'f1score' => 0.49] +``` diff --git a/docs/machine-learning/metric/confusion-matrix.md b/docs/machine-learning/metric/confusion-matrix.md new file mode 100644 index 0000000..4ff08c9 --- /dev/null +++ b/docs/machine-learning/metric/confusion-matrix.md @@ -0,0 +1,44 @@ +# Confusion Matrix + +Class for computing confusion matrix to evaluate the accuracy of a classification. + +### Example (all targets) + +Compute ConfusionMatrix for all targets. + +``` +use Phpml\Metric\ConfusionMatrix; + +$actualTargets = [2, 0, 2, 2, 0, 1]; +$predictedTargets = [0, 0, 2, 2, 0, 2]; + +$confusionMatrix = ConfusionMatrix::compute($actualTargets, $predictedTargets) + +/* +$confusionMatrix = [ + [2, 0, 0], + [0, 0, 1], + [1, 0, 2], +]; +*/ +``` + +### Example (chosen targets) + +Compute ConfusionMatrix for chosen targets. + +``` +use Phpml\Metric\ConfusionMatrix; + +$actualTargets = ['cat', 'ant', 'cat', 'cat', 'ant', 'bird']; +$predictedTargets = ['ant', 'ant', 'cat', 'cat', 'ant', 'cat']; + +$confusionMatrix = ConfusionMatrix::compute($actualTargets, $predictedTargets, ['ant', 'bird']) + +/* +$confusionMatrix = [ + [2, 0], + [0, 0], +]; +*/ +``` diff --git a/docs/machine-learning/model-manager/persistency.md b/docs/machine-learning/model-manager/persistency.md new file mode 100644 index 0000000..626ae42 --- /dev/null +++ b/docs/machine-learning/model-manager/persistency.md @@ -0,0 +1,24 @@ +# Persistency + +You can save trained models for future use. Persistency across requests achieved by saving and restoring serialized estimators into files. + +### Example + +``` +use Phpml\Classification\KNearestNeighbors; +use Phpml\ModelManager; + +$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; +$labels = ['a', 'a', 'a', 'b', 'b', 'b']; + +$classifier = new KNearestNeighbors(); +$classifier->train($samples, $labels); + +$filepath = '/path/to/store/the/model'; +$modelManager = new ModelManager(); +$modelManager->saveToFile($classifier, $filepath); + +$restoredClassifier = $modelManager->restoreFromFile($filepath); +$restoredClassifier->predict([3, 2]); +// return 'b' +``` diff --git a/docs/machine-learning/neural-network/multilayer-perceptron-classifier.md b/docs/machine-learning/neural-network/multilayer-perceptron-classifier.md new file mode 100644 index 0000000..976d475 --- /dev/null +++ b/docs/machine-learning/neural-network/multilayer-perceptron-classifier.md @@ -0,0 +1,88 @@ +# MLPClassifier + +A multilayer perceptron (MLP) is a feedforward artificial neural network model that maps sets of input data onto a set of appropriate outputs. + +## Constructor Parameters + +* $inputLayerFeatures (int) - the number of input layer features +* $hiddenLayers (array) - array with the hidden layers configuration, each value represent number of neurons in each layers +* $classes (array) - array with the different training set classes (array keys are ignored) +* $iterations (int) - number of training iterations +* $learningRate (float) - the learning rate +* $activationFunction (ActivationFunction) - neuron activation function + +``` +use Phpml\Classification\MLPClassifier; +$mlp = new MLPClassifier(4, [2], ['a', 'b', 'c']); + +// 4 nodes in input layer, 2 nodes in first hidden layer and 3 possible labels. + +``` + +An Activation Function may also be passed in with each individual hidden layer. Example: + +``` +use Phpml\NeuralNetwork\ActivationFunction\PReLU; +use Phpml\NeuralNetwork\ActivationFunction\Sigmoid; +$mlp = new MLPClassifier(4, [[2, new PReLU], [2, new Sigmoid]], ['a', 'b', 'c']); +``` + +Instead of configuring each hidden layer as an array, they may also be configured with Layer objects. Example: + +``` +use Phpml\NeuralNetwork\Layer; +use Phpml\NeuralNetwork\Node\Neuron; +$layer1 = new Layer(2, Neuron::class, new PReLU); +$layer2 = new Layer(2, Neuron::class, new Sigmoid); +$mlp = new MLPClassifier(4, [$layer1, $layer2], ['a', 'b', 'c']); +``` + +## Train + +To train a MLP, simply provide train samples and labels (as array). Example: + +``` +$mlp->train( + $samples = [[1, 0, 0, 0], [0, 1, 1, 0], [1, 1, 1, 1], [0, 0, 0, 0]], + $targets = ['a', 'a', 'b', 'c'] +); +``` + +Use partialTrain method to train in batches. Example: + +``` +$mlp->partialTrain( + $samples = [[1, 0, 0, 0], [0, 1, 1, 0]], + $targets = ['a', 'a'] +); +$mlp->partialTrain( + $samples = [[1, 1, 1, 1], [0, 0, 0, 0]], + $targets = ['b', 'c'] +); + +``` + +You can update the learning rate between partialTrain runs: + +``` +$mlp->setLearningRate(0.1); +``` + +## Predict + +To predict sample label use the `predict` method. You can provide one sample or array of samples: + +``` +$mlp->predict([[1, 1, 1, 1], [0, 0, 0, 0]]); +// return ['b', 'c']; + +``` + +## Activation Functions + +* BinaryStep +* Gaussian +* HyperbolicTangent +* Parametric Rectified Linear Unit +* Sigmoid (default) +* Thresholded Rectified Linear Unit diff --git a/docs/machine-learning/preprocessing/imputation-missing-values.md b/docs/machine-learning/preprocessing/imputation-missing-values.md new file mode 100644 index 0000000..302d89d --- /dev/null +++ b/docs/machine-learning/preprocessing/imputation-missing-values.md @@ -0,0 +1,67 @@ +# Imputation missing values + +For various reasons, many real world datasets contain missing values, often encoded as blanks, NaNs or other placeholders. +To solve this problem you can use the `Imputer` class. + +## Constructor Parameters + +* $missingValue (mixed) - this value will be replaced (default null) +* $strategy (Strategy) - imputation strategy (read to use: MeanStrategy, MedianStrategy, MostFrequentStrategy) +* $axis (int) - axis for strategy, Imputer::AXIS_COLUMN or Imputer::AXIS_ROW +* $samples (array) - array of samples to train + +``` +$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); +$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW); +``` + +## Strategy + +* MeanStrategy - replace missing values using the mean along the axis +* MedianStrategy - replace missing values using the median along the axis +* MostFrequentStrategy - replace missing using the most frequent value along the axis + +## Example of use + +``` +use Phpml\Preprocessing\Imputer; +use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy; + +$data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], +]; + +$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); +$imputer->fit($data); +$imputer->transform($data); + +/* +$data = [ + [1, 5.33, 3, 4], + [4, 3, 2, 1], + [4.33, 6, 7, 8], + [8, 7, 4, 5], +]; +*/ + +``` + +You can also use the `$samples` constructor parameter instead of the `fit` method: + +``` +use Phpml\Preprocessing\Imputer; +use Phpml\Preprocessing\Imputer\Strategy\MeanStrategy; + +$data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], +]; + +$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $data); +$imputer->transform($data); +``` diff --git a/docs/machine-learning/preprocessing/normalization.md b/docs/machine-learning/preprocessing/normalization.md new file mode 100644 index 0000000..61b1a8d --- /dev/null +++ b/docs/machine-learning/preprocessing/normalization.md @@ -0,0 +1,59 @@ +# Normalization + +Normalization is the process of scaling individual samples to have unit norm. + +## L2 norm + +[http://mathworld.wolfram.com/L2-Norm.html](http://mathworld.wolfram.com/L2-Norm.html) + +Example: + +``` +use Phpml\Preprocessing\Normalizer; + +$samples = [ + [1, -1, 2], + [2, 0, 0], + [0, 1, -1], +]; + +$normalizer = new Normalizer(); +$normalizer->preprocess($samples); + +/* +$samples = [ + [0.4, -0.4, 0.81], + [1.0, 0.0, 0.0], + [0.0, 0.7, -0.7], +]; +*/ + +``` + +## L1 norm + +[http://mathworld.wolfram.com/L1-Norm.html](http://mathworld.wolfram.com/L1-Norm.html) + +Example: + +``` +use Phpml\Preprocessing\Normalizer; + +$samples = [ + [1, -1, 2], + [2, 0, 0], + [0, 1, -1], +]; + +$normalizer = new Normalizer(Normalizer::NORM_L1); +$normalizer->preprocess($samples); + +/* +$samples = [ + [0.25, -0.25, 0.5], + [1.0, 0.0, 0.0], + [0.0, 0.5, -0.5], +]; +*/ + +``` diff --git a/docs/machine-learning/regression/least-squares.md b/docs/machine-learning/regression/least-squares.md index 4a00bcd..5505f13 100644 --- a/docs/machine-learning/regression/least-squares.md +++ b/docs/machine-learning/regression/least-squares.md @@ -1,10 +1,10 @@ # LeastSquares Linear Regression -Linear model that use least squares method to approximate solution. +Linear model that uses least squares method to approximate solution. ### Train -To train a model simply provide train samples and targets values (as `array`). Example: +To train a model, simply provide train samples and targets values (as `array`). Example: ``` $samples = [[60], [61], [62], [63], [65]]; @@ -14,9 +14,11 @@ $regression = new LeastSquares(); $regression->train($samples, $targets); ``` +You can train the model using multiple data sets, predictions will be based on all the training data. + ### Predict -To predict sample target value use `predict` method with sample to check (as `array`). Example: +To predict sample target value, use the `predict` method with sample to check (as `array`). Example: ``` $regression->predict([64]); @@ -25,8 +27,8 @@ $regression->predict([64]); ### Multiple Linear Regression -The term multiple attached to linear regression means that there are two or more sample parameters used to predict target. -For example you can use: mileage and production year to predict price of a car. +The term multiple attached to linear regression means that there are two or more sample parameters used to predict target. +For example you can use: mileage and production year to predict the price of a car. ``` $samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; @@ -40,7 +42,7 @@ $regression->predict([60000, 1996]) ### Intercept and Coefficients -After you train your model you can get the intercept and coefficients array. +After you train your model, you can get the intercept and coefficients array. ``` $regression->getIntercept(); diff --git a/docs/machine-learning/regression/svr.md b/docs/machine-learning/regression/svr.md new file mode 100644 index 0000000..14f9e6a --- /dev/null +++ b/docs/machine-learning/regression/svr.md @@ -0,0 +1,46 @@ +# Support Vector Regression + +Class implementing Epsilon-Support Vector Regression based on libsvm. + +### Constructor Parameters + +* $kernel (int) - kernel type to be used in the algorithm (default Kernel::RBF) +* $degree (int) - degree of the Kernel::POLYNOMIAL function (default 3) +* $epsilon (float) - epsilon in loss function of epsilon-SVR (default 0.1) +* $cost (float) - parameter C of C-SVC (default 1.0) +* $gamma (float) - kernel coefficient for ‘Kernel::RBF’, ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’. If gamma is ‘null’ then 1/features will be used instead. +* $coef0 (float) - independent term in kernel function. It is only significant in ‘Kernel::POLYNOMIAL’ and ‘Kernel::SIGMOID’ (default 0.0) +* $tolerance (float) - tolerance of termination criterion (default 0.001) +* $cacheSize (int) - cache memory size in MB (default 100) +* $shrinking (bool) - whether to use the shrinking heuristics (default true) + +``` +$regression = new SVR(Kernel::LINEAR); +$regression = new SVR(Kernel::LINEAR, $degree = 3, $epsilon=10.0); +``` + +### Train + +To train a model, simply provide train samples and targets values (as `array`). Example: + +``` +use Phpml\Regression\SVR; +use Phpml\SupportVectorMachine\Kernel; + +$samples = [[60], [61], [62], [63], [65]]; +$targets = [3.1, 3.6, 3.8, 4, 4.1]; + +$regression = new SVR(Kernel::LINEAR); +$regression->train($samples, $targets); +``` + +You can train the model using multiple data sets, predictions will be based on all the training data. + +### Predict + +To predict sample target value, use the `predict` method. You can provide one sample or array of samples: + +``` +$regression->predict([64]) +// return 4.03 +``` diff --git a/docs/machine-learning/workflow/pipeline.md b/docs/machine-learning/workflow/pipeline.md new file mode 100644 index 0000000..b89b88e --- /dev/null +++ b/docs/machine-learning/workflow/pipeline.md @@ -0,0 +1,65 @@ +# Pipeline + +In machine learning, it is common to run a sequence of algorithms to process and learn from dataset. For example: + + * Split each document’s text into tokens. + * Convert each document’s words into a numerical feature vector ([Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)). + * Learn a prediction model using the feature vectors and labels. + +PHP-ML represents such a workflow as a Pipeline, which consists of a sequence of transformers and an estimator. + +### Constructor Parameters + +* $transformers (array|Transformer[]) - sequence of objects that implements the Transformer interface +* $estimator (Estimator) - estimator that can train and predict + +``` +use Phpml\Classification\SVC; +use Phpml\FeatureExtraction\TfIdfTransformer; +use Phpml\Pipeline; + +$transformers = [ + new TfIdfTransformer(), +]; +$estimator = new SVC(); + +$pipeline = new Pipeline($transformers, $estimator); +``` + +### Example + +First, our pipeline replaces the missing value, then normalizes samples and finally trains the SVC estimator. +Thus prepared pipeline repeats each transformation step for predicted sample. + +``` +use Phpml\Classification\SVC; +use Phpml\Pipeline; +use Phpml\Preprocessing\Imputer; +use Phpml\Preprocessing\Normalizer; +use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy; + +$transformers = [ + new Imputer(null, new MostFrequentStrategy()), + new Normalizer(), +]; +$estimator = new SVC(); + +$samples = [ + [1, -1, 2], + [2, 0, null], + [null, 1, -1], +]; + +$targets = [ + 4, + 1, + 4, +]; + +$pipeline = new Pipeline($transformers, $estimator); +$pipeline->train($samples, $targets); + +$predicted = $pipeline->predict([[0, 0, 0]]); + +// $predicted == 4 +``` diff --git a/docs/math/distance.md b/docs/math/distance.md index fd491ea..c7c3a98 100644 --- a/docs/math/distance.md +++ b/docs/math/distance.md @@ -4,7 +4,7 @@ Selected algorithms require the use of a function for calculating the distance. ### Euclidean -Class for calculation Euclidean distance. +Class for calculating Euclidean distance. ![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance") @@ -13,7 +13,7 @@ To calculate Euclidean distance: ``` $a = [4, 6]; $b = [2, 5]; - + $euclidean = new Euclidean(); $euclidean->distance($a, $b); // return 2.2360679774998 @@ -21,7 +21,7 @@ $euclidean->distance($a, $b); ### Manhattan -Class for calculation Manhattan distance. +Class for calculating Manhattan distance. ![manhattan](https://upload.wikimedia.org/math/4/c/5/4c568bd1d76a6b15e19cb2ac3ad75350.png "Manhattan Distance") @@ -30,7 +30,7 @@ To calculate Manhattan distance: ``` $a = [4, 6]; $b = [2, 5]; - + $manhattan = new Manhattan(); $manhattan->distance($a, $b); // return 3 @@ -38,7 +38,7 @@ $manhattan->distance($a, $b); ### Chebyshev -Class for calculation Chebyshev distance. +Class for calculating Chebyshev distance. ![chebyshev](https://upload.wikimedia.org/math/7/1/2/71200f7dbb43b3bcfbcbdb9e02ab0a0c.png "Chebyshev Distance") @@ -47,7 +47,7 @@ To calculate Chebyshev distance: ``` $a = [4, 6]; $b = [2, 5]; - + $chebyshev = new Chebyshev(); $chebyshev->distance($a, $b); // return 2 @@ -55,7 +55,7 @@ $chebyshev->distance($a, $b); ### Minkowski -Class for calculation Minkowski distance. +Class for calculating Minkowski distance. ![minkowski](https://upload.wikimedia.org/math/a/a/0/aa0c62083c12390cb15ac3217de88e66.png "Minkowski Distance") @@ -64,7 +64,7 @@ To calculate Minkowski distance: ``` $a = [4, 6]; $b = [2, 5]; - + $minkowski = new Minkowski(); $minkowski->distance($a, $b); // return 2.080 @@ -83,7 +83,7 @@ $minkowski->distance($a, $b); ### Custom distance -To apply your own function of distance use `Distance` interface. Example +To apply your own function of distance use the `Distance` interface. Example: ``` class CustomDistance implements Distance @@ -94,7 +94,7 @@ class CustomDistance implements Distance * * @return float */ - public function distance(array $a, array $b): float + public function distance(array $a, array $b) : float { $distance = []; $count = count($a); @@ -103,7 +103,7 @@ class CustomDistance implements Distance $distance[] = $a[$i] * $b[$i]; } - return min($distance); + return min($distance); } } ``` diff --git a/docs/math/set.md b/docs/math/set.md new file mode 100644 index 0000000..fa016ed --- /dev/null +++ b/docs/math/set.md @@ -0,0 +1,127 @@ +# Set + +Class that wraps PHP arrays containing primitive types to mathematical sets. + +### Creation + +To create Set use flat arrays containing primitives only: + +``` +use \Phpml\Math\Set; + +$set = new Set([1, 2, 2, 3, 1.1, -1, -10]); +$set->toArray(); +// return [-10, -1, 1, 1.1, 2, 3] + +$set = new Set(['B', '', 'A']); +$set->toArray(); +// return ['', 'A', 'B'] +``` + +Injected array is sorted by SORT_ASC, duplicates are removed and index is rewritten. + +### Union + +Create the union of two Sets: + +``` +use \Phpml\Math\Set; + +$union = Set::union(new Set([1, 3]), new Set([1, 2])); +$union->toArray(); +//return [1, 2, 3] +``` + +### Intersection + +Create the intersection of two Sets: + +``` +use \Phpml\Math\Set; + +$intersection = Set::intersection(new Set(['A', 'C']), new Set(['B', 'C'])); +$intersection->toArray(); +//return ['C'] +``` + +### Complement + +Create the set-theoretic difference of two Sets: + +``` +use \Phpml\Math\Set; + +$difference = Set::difference(new Set(['A', 'B', 'C']), new Set(['A'])); +$union->toArray(); +//return ['B', 'C'] +``` + +### Adding elements + +``` +use \Phpml\Math\Set; + +$set = new Set([1, 2]); +$set->addAll([3]); +$set->add(4); +$set->toArray(); +//return [1, 2, 3, 4] +``` + +### Removing elements + +``` +use \Phpml\Math\Set; + +$set = new Set([1, 2]); +$set->removeAll([2]); +$set->remove(1); +$set->toArray(); +//return [] +``` + +### Check membership + +``` +use \Phpml\Math\Set; + +$set = new Set([1, 2]); +$set->containsAll([2, 3]); +//return false +$set->contains(1); +//return true +``` + +### Cardinality + +``` +use \Phpml\Math\Set; + +$set = new Set([1, 2]); +$set->cardinality(); +//return 2 +``` + +### Is empty + +``` +use \Phpml\Math\Set; + +$set = new Set(); +$set->isEmpty(); +//return true +``` + +### Working with loops + +``` +use \Phpml\Math\Set; + +$set = new Set(['A', 'B', 'C']); + +foreach($set as $element) { + echo "$element, "; +} + +// echoes A, B, C +``` diff --git a/ecs.yml b/ecs.yml new file mode 100644 index 0000000..a602695 --- /dev/null +++ b/ecs.yml @@ -0,0 +1,69 @@ +imports: + - { resource: 'vendor/symplify/easy-coding-standard/config/set/psr2.yaml' } + - { resource: 'vendor/symplify/easy-coding-standard/config/set/php71.yaml' } + - { resource: 'vendor/symplify/easy-coding-standard/config/set/clean-code.yaml' } + - { resource: 'vendor/symplify/easy-coding-standard/config/set/common.yaml' } + +services: + # spacing + PhpCsFixer\Fixer\PhpTag\BlankLineAfterOpeningTagFixer: ~ + PhpCsFixer\Fixer\Whitespace\BlankLineBeforeStatementFixer: ~ + PhpCsFixer\Fixer\CastNotation\CastSpacesFixer: ~ + PhpCsFixer\Fixer\Operator\ConcatSpaceFixer: + spacing: none + PhpCsFixer\Fixer\ClassNotation\MethodSeparationFixer: ~ + PhpCsFixer\Fixer\ClassNotation\NoBlankLinesAfterClassOpeningFixer: ~ + PhpCsFixer\Fixer\Whitespace\NoSpacesAroundOffsetFixer: + positions: ['inside', 'outside'] + PhpCsFixer\Fixer\Operator\BinaryOperatorSpacesFixer: + align_double_arrow: false + align_equals: false + PhpCsFixer\Fixer\PhpUnit\PhpUnitTestCaseStaticMethodCallsFixer: + call_type: 'self' + # phpdoc + PhpCsFixer\Fixer\Phpdoc\PhpdocSeparationFixer: ~ + PhpCsFixer\Fixer\Phpdoc\PhpdocAlignFixer: ~ + + # Symplify + Symplify\CodingStandard\Fixer\Import\ImportNamespacedNameFixer: ~ + Symplify\CodingStandard\Fixer\Php\ClassStringToClassConstantFixer: ~ + Symplify\CodingStandard\Fixer\Property\ArrayPropertyDefaultValueFixer: ~ + Symplify\CodingStandard\Fixer\ArrayNotation\StandaloneLineInMultilineArrayFixer: ~ + +parameters: + skip: + # from strict.neon + PhpCsFixer\Fixer\PhpUnit\PhpUnitStrictFixer: ~ + PhpCsFixer\Fixer\Strict\StrictComparisonFixer: ~ + + # personal prefference + PhpCsFixer\Fixer\Operator\NotOperatorWithSuccessorSpaceFixer: ~ + + PhpCsFixer\Fixer\Alias\RandomApiMigrationFixer: + # random_int() breaks code + - 'src/CrossValidation/RandomSplit.php' + SlevomatCodingStandard\Sniffs\Classes\UnusedPrivateElementsSniff: + # magic calls + - 'src/Preprocessing/Normalizer.php' + PhpCsFixer\Fixer\StringNotation\ExplicitStringVariableFixer: + # bugged + - 'src/Classification/DecisionTree/DecisionTreeLeaf.php' + Symplify\CodingStandard\Fixer\Commenting\RemoveUselessDocBlockFixer: + # false positive - already fixed in master + - 'src/Helper/OneVsRest.php' + # bug in fixer + - 'src/Math/LinearAlgebra/LUDecomposition.php' + PhpCsFixer\Fixer\FunctionNotation\VoidReturnFixer: + # covariant return types + - 'src/Classification/Linear/Perceptron.php' + + # missing typehints + SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingParameterTypeHint: ~ + SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableParameterTypeHintSpecification: ~ + SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingReturnTypeHint: ~ + SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversableReturnTypeHintSpecification: ~ + SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingPropertyTypeHint: ~ + SlevomatCodingStandard\Sniffs\TypeHints\TypeHintDeclarationSniff.MissingTraversablePropertyTypeHintSpecification: ~ + + # assignment in "while ($var = ...)" are ok + PHP_CodeSniffer\Standards\Generic\Sniffs\CodeAnalysis\AssignmentInConditionSniff.FoundInWhileCondition: diff --git a/humbug.json.dist b/humbug.json.dist deleted file mode 100644 index 2535633..0000000 --- a/humbug.json.dist +++ /dev/null @@ -1,11 +0,0 @@ -{ - "source": { - "directories": [ - "src" - ] - }, - "timeout": 10, - "logs": { - "text": "humbuglog.txt" - } -} diff --git a/mkdocs.yml b/mkdocs.yml index a596d91..451d6e9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,24 +2,53 @@ site_name: PHP-ML - Machine Learning library for PHP pages: - Home: index.md - Machine Learning: + - Association rule learning: + - Apriori: machine-learning/association/apriori.md - Classification: + - SVC: machine-learning/classification/svc.md - KNearestNeighbors: machine-learning/classification/k-nearest-neighbors.md - NaiveBayes: machine-learning/classification/naive-bayes.md - Regression: - LeastSquares: machine-learning/regression/least-squares.md + - SVR: machine-learning/regression/svr.md - Clustering: - KMeans: machine-learning/clustering/k-means.md - DBSCAN: machine-learning/clustering/dbscan.md + - Metric: + - Accuracy: machine-learning/metric/accuracy.md + - Confusion Matrix: machine-learning/metric/confusion-matrix.md + - Classification Report: machine-learning/metric/classification-report.md + - Workflow: + - Pipeline: machine-learning/workflow/pipeline.md + - Neural Network: + - Multilayer Perceptron Classifier: machine-learning/neural-network/multilayer-perceptron-classifier.md - Cross Validation: - RandomSplit: machine-learning/cross-validation/random-split.md + - Stratified Random Split: machine-learning/cross-validation/stratified-random-split.md + - Feature Selection: + - VarianceThreshold: machine-learning/feature-selection/variance-threshold.md + - SelectKBest: machine-learning/feature-selection/selectkbest.md + - Preprocessing: + - Normalization: machine-learning/preprocessing/normalization.md + - Imputation missing values: machine-learning/preprocessing/imputation-missing-values.md + - Feature Extraction: + - Token Count Vectorizer: machine-learning/feature-extraction/token-count-vectorizer.md + - Tf-idf Transformer: machine-learning/feature-extraction/tf-idf-transformer.md - Datasets: - Array Dataset: machine-learning/datasets/array-dataset.md - CSV Dataset: machine-learning/datasets/csv-dataset.md + - Files Dataset: machine-learning/datasets/files-dataset.md + - SVM Dataset: machine-learning/datasets/svm-dataset.md + - MNIST Dataset: machine-learning/datasets/mnist-dataset.md - Ready to use datasets: - Iris: machine-learning/datasets/demo/iris.md - - Metric: - - Accuracy: machine-learning/metric/accuracy.md + - Wine: machine-learning/datasets/demo/wine.md + - Glass: machine-learning/datasets/demo/glass.md + - Models management: + - Persistency: machine-learning/model-manager/persistency.md - Math: - Distance: math/distance.md - Matrix: math/matrix.md + - Set: math/set.md + - Statistic: math/statistic.md theme: readthedocs diff --git a/phpbench.json b/phpbench.json new file mode 100644 index 0000000..ea802f7 --- /dev/null +++ b/phpbench.json @@ -0,0 +1,17 @@ +{ + "bootstrap": "tests/Performance/bootstrap.php", + "path": "tests/Performance", + "reports": { + "time": { + "extends": "aggregate", + "title": "The Consumation of Time", + "cols": [ "subject", "mode", "mean", "rstdev", "diff"] + }, + "memory": { + "extends": "aggregate", + "title": "The Memory Usage", + "cols": [ "subject", "mem_real", "mem_final", "mem_peak", "diff"], + "diff_col": "mem_peak" + } + } +} \ No newline at end of file diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..7af78fa --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,21 @@ +includes: + - vendor/phpstan/phpstan-strict-rules/rules.neon + - vendor/phpstan/phpstan-phpunit/extension.neon + - vendor/phpstan/phpstan-phpunit/rules.neon + +parameters: + checkGenericClassInNonGenericObjectType: false + checkMissingIterableValueType: false + + ignoreErrors: + - '#Property Phpml\\Clustering\\KMeans\\Cluster\:\:\$points \(iterable\\&SplObjectStorage\) does not accept SplObjectStorage#' + - '#Phpml\\Dataset\\(.*)Dataset::__construct\(\) does not call parent constructor from Phpml\\Dataset\\ArrayDataset#' + - '#Variable property access on .+#' + - '#Variable method call on .+#' + - message: '#ReflectionClass#' + paths: + - src/Classification/Ensemble/AdaBoost.php + - src/Classification/Ensemble/Bagging.php + # probably known value + - '#Method Phpml\\Classification\\DecisionTree::getBestSplit\(\) should return Phpml\\Classification\\DecisionTree\\DecisionTreeLeaf but returns Phpml\\Classification\\DecisionTree\\DecisionTreeLeaf\|null#' + - '#Call to an undefined method Phpml\\Helper\\Optimizer\\Optimizer::getCostValues\(\)#' diff --git a/phpunit.xml b/phpunit.xml index cbf6c18..e0669d3 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -1,16 +1,15 @@ - - - tests/* - - ​ + + tests + @@ -18,4 +17,8 @@ + + + + diff --git a/src/Association/Apriori.php b/src/Association/Apriori.php new file mode 100644 index 0000000..1f73679 --- /dev/null +++ b/src/Association/Apriori.php @@ -0,0 +1,332 @@ +support = $support; + $this->confidence = $confidence; + } + + /** + * Get all association rules which are generated for every k-length frequent item set. + * + * @return mixed[][] + */ + public function getRules(): array + { + if (count($this->large) === 0) { + $this->large = $this->apriori(); + } + + if (count($this->rules) > 0) { + return $this->rules; + } + + $this->rules = []; + + $this->generateAllRules(); + + return $this->rules; + } + + /** + * Generates frequent item sets. + * + * @return mixed[][][] + */ + public function apriori(): array + { + $L = []; + + $items = $this->frequent($this->items()); + for ($k = 1; isset($items[0]); ++$k) { + $L[$k] = $items; + $items = $this->frequent($this->candidates($items)); + } + + return $L; + } + + /** + * @param mixed[] $sample + * + * @return mixed[][] + */ + protected function predictSample(array $sample): array + { + $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool { + return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample); + })); + + return array_map(static function ($rule) { + return $rule[self::ARRAY_KEY_CONSEQUENT]; + }, $predicts); + } + + /** + * Generate rules for each k-length frequent item set. + */ + private function generateAllRules(): void + { + for ($k = 2; isset($this->large[$k]); ++$k) { + foreach ($this->large[$k] as $frequent) { + $this->generateRules($frequent); + } + } + } + + /** + * Generate confident rules for frequent item set. + * + * @param mixed[] $frequent + */ + private function generateRules(array $frequent): void + { + foreach ($this->antecedents($frequent) as $antecedent) { + $confidence = $this->confidence($frequent, $antecedent); + if ($this->confidence <= $confidence) { + $consequent = array_values(array_diff($frequent, $antecedent)); + $this->rules[] = [ + self::ARRAY_KEY_ANTECEDENT => $antecedent, + self::ARRAY_KEY_CONSEQUENT => $consequent, + self::ARRAY_KEY_SUPPORT => $this->support($frequent), + self::ARRAY_KEY_CONFIDENCE => $confidence, + ]; + } + } + } + + /** + * Generates the power set for given item set $sample. + * + * @param mixed[] $sample + * + * @return mixed[][] + */ + private function powerSet(array $sample): array + { + $results = [[]]; + foreach ($sample as $item) { + foreach ($results as $combination) { + $results[] = array_merge([$item], $combination); + } + } + + return $results; + } + + /** + * Generates all proper subsets for given set $sample without the empty set. + * + * @param mixed[] $sample + * + * @return mixed[][] + */ + private function antecedents(array $sample): array + { + $cardinality = count($sample); + $antecedents = $this->powerSet($sample); + + return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool { + return (count($antecedent) != $cardinality) && ($antecedent != []); + }); + } + + /** + * Calculates frequent k = 1 item sets. + * + * @return mixed[][] + */ + private function items(): array + { + $items = []; + + foreach ($this->samples as $sample) { + foreach ($sample as $item) { + if (!in_array($item, $items, true)) { + $items[] = $item; + } + } + } + + return array_map(static function ($entry): array { + return [$entry]; + }, $items); + } + + /** + * Returns frequent item sets only. + * + * @param mixed[][] $samples + * + * @return mixed[][] + */ + private function frequent(array $samples): array + { + return array_values(array_filter($samples, function ($entry): bool { + return $this->support($entry) >= $this->support; + })); + } + + /** + * Calculates frequent k item sets, where count($samples) == $k - 1. + * + * @param mixed[][] $samples + * + * @return mixed[][] + */ + private function candidates(array $samples): array + { + $candidates = []; + + foreach ($samples as $p) { + foreach ($samples as $q) { + if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) { + continue; + } + + $candidate = array_values(array_unique(array_merge($p, $q))); + + if ($this->contains($candidates, $candidate)) { + continue; + } + + foreach ($this->samples as $sample) { + if ($this->subset($sample, $candidate)) { + $candidates[] = $candidate; + + continue 2; + } + } + } + } + + return $candidates; + } + + /** + * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain + * $set. + * + * @param mixed[] $set + * @param mixed[] $subset + */ + private function confidence(array $set, array $subset): float + { + return $this->support($set) / $this->support($subset); + } + + /** + * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data + * pool. + * + * @see \Phpml\Association\Apriori::samples + * + * @param mixed[] $sample + */ + private function support(array $sample): float + { + return $this->frequency($sample) / count($this->samples); + } + + /** + * Counts occurrences of $sample as subset in data pool. + * + * @see \Phpml\Association\Apriori::samples + * + * @param mixed[] $sample + */ + private function frequency(array $sample): int + { + return count(array_filter($this->samples, function ($entry) use ($sample): bool { + return $this->subset($entry, $sample); + })); + } + + /** + * Returns true if set is an element of system. + * + * @see \Phpml\Association\Apriori::equals() + * + * @param mixed[][] $system + * @param mixed[] $set + */ + private function contains(array $system, array $set): bool + { + return (bool) array_filter($system, function ($entry) use ($set): bool { + return $this->equals($entry, $set); + }); + } + + /** + * Returns true if subset is a (proper) subset of set by its items string representation. + * + * @param mixed[] $set + * @param mixed[] $subset + */ + private function subset(array $set, array $subset): bool + { + return count(array_diff($subset, array_intersect($subset, $set))) === 0; + } + + /** + * Returns true if string representation of items does not differ. + * + * @param mixed[] $set1 + * @param mixed[] $set2 + */ + private function equals(array $set1, array $set2): bool + { + return array_diff($set1, $set2) == array_diff($set2, $set1); + } +} diff --git a/src/Association/Associator.php b/src/Association/Associator.php new file mode 100644 index 0000000..c339b5e --- /dev/null +++ b/src/Association/Associator.php @@ -0,0 +1,11 @@ +maxDepth = $maxDepth; + } + + public function train(array $samples, array $targets): void + { + $this->samples = array_merge($this->samples, $samples); + $this->targets = array_merge($this->targets, $targets); + + $this->featureCount = count($this->samples[0]); + $this->columnTypes = self::getColumnTypes($this->samples); + $this->labels = array_keys(array_count_values($this->targets)); + $this->tree = $this->getSplitLeaf(range(0, count($this->samples) - 1)); + + // Each time the tree is trained, feature importances are reset so that + // we will have to compute it again depending on the new data + $this->featureImportances = null; + + // If column names are given or computed before, then there is no + // need to init it and accidentally remove the previous given names + if ($this->columnNames === []) { + $this->columnNames = range(0, $this->featureCount - 1); + } elseif (count($this->columnNames) > $this->featureCount) { + $this->columnNames = array_slice($this->columnNames, 0, $this->featureCount); + } elseif (count($this->columnNames) < $this->featureCount) { + $this->columnNames = array_merge( + $this->columnNames, + range(count($this->columnNames), $this->featureCount - 1) + ); + } + } + + public static function getColumnTypes(array $samples): array + { + $types = []; + $featureCount = count($samples[0]); + for ($i = 0; $i < $featureCount; ++$i) { + $values = array_column($samples, $i); + $isCategorical = self::isCategoricalColumn($values); + $types[] = $isCategorical ? self::NOMINAL : self::CONTINUOUS; + } + + return $types; + } + + /** + * @param mixed $baseValue + */ + public function getGiniIndex($baseValue, array $colValues, array $targets): float + { + $countMatrix = []; + foreach ($this->labels as $label) { + $countMatrix[$label] = [0, 0]; + } + + foreach ($colValues as $index => $value) { + $label = $targets[$index]; + $rowIndex = $value === $baseValue ? 0 : 1; + ++$countMatrix[$label][$rowIndex]; + } + + $giniParts = [0, 0]; + for ($i = 0; $i <= 1; ++$i) { + $part = 0; + $sum = array_sum(array_column($countMatrix, $i)); + if ($sum > 0) { + foreach ($this->labels as $label) { + $part += ($countMatrix[$label][$i] / (float) $sum) ** 2; + } + } + + $giniParts[$i] = (1 - $part) * $sum; + } + + return array_sum($giniParts) / count($colValues); + } + + /** + * This method is used to set number of columns to be used + * when deciding a split at an internal node of the tree.
+ * If the value is given 0, then all features are used (default behaviour), + * otherwise the given value will be used as a maximum for number of columns + * randomly selected for each split operation. + * + * @return $this + * + * @throws InvalidArgumentException + */ + public function setNumFeatures(int $numFeatures) + { + if ($numFeatures < 0) { + throw new InvalidArgumentException('Selected column count should be greater or equal to zero'); + } + + $this->numUsableFeatures = $numFeatures; + + return $this; + } + + /** + * A string array to represent columns. Useful when HTML output or + * column importances are desired to be inspected. + * + * @return $this + * + * @throws InvalidArgumentException + */ + public function setColumnNames(array $names) + { + if ($this->featureCount !== 0 && count($names) !== $this->featureCount) { + throw new InvalidArgumentException(sprintf('Length of the given array should be equal to feature count %s', $this->featureCount)); + } + + $this->columnNames = $names; + + return $this; + } + + public function getHtml(): string + { + return $this->tree->getHTML($this->columnNames); + } + + /** + * This will return an array including an importance value for + * each column in the given dataset. The importance values are + * normalized and their total makes 1.
+ */ + public function getFeatureImportances(): array + { + if ($this->featureImportances !== null) { + return $this->featureImportances; + } + + $sampleCount = count($this->samples); + $this->featureImportances = []; + foreach ($this->columnNames as $column => $columnName) { + $nodes = $this->getSplitNodesByColumn($column, $this->tree); + + $importance = 0; + foreach ($nodes as $node) { + $importance += $node->getNodeImpurityDecrease($sampleCount); + } + + $this->featureImportances[$columnName] = $importance; + } + + // Normalize & sort the importances + $total = array_sum($this->featureImportances); + if ($total > 0) { + array_walk($this->featureImportances, function (&$importance) use ($total): void { + $importance /= $total; + }); + arsort($this->featureImportances); + } + + return $this->featureImportances; + } + + protected function getSplitLeaf(array $records, int $depth = 0): DecisionTreeLeaf + { + $split = $this->getBestSplit($records); + $split->level = $depth; + if ($this->actualDepth < $depth) { + $this->actualDepth = $depth; + } + + // Traverse all records to see if all records belong to the same class, + // otherwise group the records so that we can classify the leaf + // in case maximum depth is reached + $leftRecords = []; + $rightRecords = []; + $remainingTargets = []; + $prevRecord = null; + $allSame = true; + + foreach ($records as $recordNo) { + // Check if the previous record is the same with the current one + $record = $this->samples[$recordNo]; + if ($prevRecord !== null && $prevRecord != $record) { + $allSame = false; + } + + $prevRecord = $record; + + // According to the split criteron, this record will + // belong to either left or the right side in the next split + if ($split->evaluate($record)) { + $leftRecords[] = $recordNo; + } else { + $rightRecords[] = $recordNo; + } + + // Group remaining targets + $target = $this->targets[$recordNo]; + if (!array_key_exists($target, $remainingTargets)) { + $remainingTargets[$target] = 1; + } else { + ++$remainingTargets[$target]; + } + } + + if ($allSame || $depth >= $this->maxDepth || count($remainingTargets) === 1) { + $split->isTerminal = true; + arsort($remainingTargets); + $split->classValue = (string) key($remainingTargets); + } else { + if (isset($leftRecords[0])) { + $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); + } + + if (isset($rightRecords[0])) { + $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); + } + } + + return $split; + } + + protected function getBestSplit(array $records): DecisionTreeLeaf + { + $targets = array_intersect_key($this->targets, array_flip($records)); + $samples = (array) array_combine( + $records, + $this->preprocess(array_intersect_key($this->samples, array_flip($records))) + ); + $bestGiniVal = 1; + $bestSplit = null; + $features = $this->getSelectedFeatures(); + foreach ($features as $i) { + $colValues = []; + foreach ($samples as $index => $row) { + $colValues[$index] = $row[$i]; + } + + $counts = array_count_values($colValues); + arsort($counts); + $baseValue = key($counts); + if ($baseValue === null) { + continue; + } + + $gini = $this->getGiniIndex($baseValue, $colValues, $targets); + if ($bestSplit === null || $bestGiniVal > $gini) { + $split = new DecisionTreeLeaf(); + $split->value = $baseValue; + $split->giniIndex = $gini; + $split->columnIndex = $i; + $split->isContinuous = $this->columnTypes[$i] === self::CONTINUOUS; + $split->records = $records; + + // If a numeric column is to be selected, then + // the original numeric value and the selected operator + // will also be saved into the leaf for future access + if ($this->columnTypes[$i] === self::CONTINUOUS) { + $matches = []; + preg_match("/^([<>=]{1,2})\s*(.*)/", (string) $split->value, $matches); + $split->operator = $matches[1]; + $split->numericValue = (float) $matches[2]; + } + + $bestSplit = $split; + $bestGiniVal = $gini; + } + } + + return $bestSplit; + } + + /** + * Returns available features/columns to the tree for the decision making + * process.
+ * + * If a number is given with setNumFeatures() method, then a random selection + * of features up to this number is returned.
+ * + * If some features are manually selected by use of setSelectedFeatures(), + * then only these features are returned
+ * + * If any of above methods were not called beforehand, then all features + * are returned by default. + */ + protected function getSelectedFeatures(): array + { + $allFeatures = range(0, $this->featureCount - 1); + if ($this->numUsableFeatures === 0 && count($this->selectedFeatures) === 0) { + return $allFeatures; + } + + if (count($this->selectedFeatures) > 0) { + return $this->selectedFeatures; + } + + $numFeatures = $this->numUsableFeatures; + if ($numFeatures > $this->featureCount) { + $numFeatures = $this->featureCount; + } + + shuffle($allFeatures); + $selectedFeatures = array_slice($allFeatures, 0, $numFeatures); + sort($selectedFeatures); + + return $selectedFeatures; + } + + protected function preprocess(array $samples): array + { + // Detect and convert continuous data column values into + // discrete values by using the median as a threshold value + $columns = []; + for ($i = 0; $i < $this->featureCount; ++$i) { + $values = array_column($samples, $i); + if ($this->columnTypes[$i] == self::CONTINUOUS) { + $median = Mean::median($values); + foreach ($values as &$value) { + if ($value <= $median) { + $value = "<= ${median}"; + } else { + $value = "> ${median}"; + } + } + } + + $columns[] = $values; + } + + // Below method is a strange yet very simple & efficient method + // to get the transpose of a 2D array + return array_map(null, ...$columns); + } + + protected static function isCategoricalColumn(array $columnValues): bool + { + $count = count($columnValues); + + // There are two main indicators that *may* show whether a + // column is composed of discrete set of values: + // 1- Column may contain string values and non-float values + // 2- Number of unique values in the column is only a small fraction of + // all values in that column (Lower than or equal to %20 of all values) + $numericValues = array_filter($columnValues, 'is_numeric'); + $floatValues = array_filter($columnValues, 'is_float'); + if (count($floatValues) > 0) { + return false; + } + + if (count($numericValues) !== $count) { + return true; + } + + $distinctValues = array_count_values($columnValues); + + return count($distinctValues) <= $count / 5; + } + + /** + * Used to set predefined features to consider while deciding which column to use for a split + */ + protected function setSelectedFeatures(array $selectedFeatures): void + { + $this->selectedFeatures = $selectedFeatures; + } + + /** + * Collects and returns an array of internal nodes that use the given + * column as a split criterion + */ + protected function getSplitNodesByColumn(int $column, DecisionTreeLeaf $node): array + { + if ($node->isTerminal) { + return []; + } + + $nodes = []; + if ($node->columnIndex === $column) { + $nodes[] = $node; + } + + $lNodes = []; + $rNodes = []; + if ($node->leftLeaf !== null) { + $lNodes = $this->getSplitNodesByColumn($column, $node->leftLeaf); + } + + if ($node->rightLeaf !== null) { + $rNodes = $this->getSplitNodesByColumn($column, $node->rightLeaf); + } + + return array_merge($nodes, $lNodes, $rNodes); + } + + /** + * @return mixed + */ + protected function predictSample(array $sample) + { + $node = $this->tree; + do { + if ($node->isTerminal) { + return $node->classValue; + } + + if ($node->evaluate($sample)) { + $node = $node->leftLeaf; + } else { + $node = $node->rightLeaf; + } + } while ($node); + + return $this->labels[0]; + } +} diff --git a/src/Classification/DecisionTree/DecisionTreeLeaf.php b/src/Classification/DecisionTree/DecisionTreeLeaf.php new file mode 100644 index 0000000..04af3d6 --- /dev/null +++ b/src/Classification/DecisionTree/DecisionTreeLeaf.php @@ -0,0 +1,165 @@ +getHTML(); + } + + public function evaluate(array $record): bool + { + $recordField = $record[$this->columnIndex]; + + if ($this->isContinuous) { + return Comparison::compare((string) $recordField, $this->numericValue, $this->operator); + } + + return $recordField == $this->value; + } + + /** + * Returns Mean Decrease Impurity (MDI) in the node. + * For terminal nodes, this value is equal to 0 + */ + public function getNodeImpurityDecrease(int $parentRecordCount): float + { + if ($this->isTerminal) { + return 0.0; + } + + $nodeSampleCount = (float) count($this->records); + $iT = $this->giniIndex; + + if ($this->leftLeaf !== null) { + $pL = count($this->leftLeaf->records) / $nodeSampleCount; + $iT -= $pL * $this->leftLeaf->giniIndex; + } + + if ($this->rightLeaf !== null) { + $pR = count($this->rightLeaf->records) / $nodeSampleCount; + $iT -= $pR * $this->rightLeaf->giniIndex; + } + + return $iT * $nodeSampleCount / $parentRecordCount; + } + + /** + * Returns HTML representation of the node including children nodes + */ + public function getHTML(?array $columnNames = null): string + { + if ($this->isTerminal) { + $value = "${this}->classValue"; + } else { + $value = $this->value; + if ($columnNames !== null) { + $col = $columnNames[$this->columnIndex]; + } else { + $col = "col_$this->columnIndex"; + } + + if ((bool) preg_match('/^[<>=]{1,2}/', (string) $value) === false) { + $value = "=${value}"; + } + + $value = "${col} ${value}
Gini: ".number_format($this->giniIndex, 2); + } + + $str = ""; + + if ($this->leftLeaf !== null || $this->rightLeaf !== null) { + $str .= ''; + if ($this->leftLeaf !== null) { + $str .= ''; + } else { + $str .= ''; + } + + $str .= ''; + if ($this->rightLeaf !== null) { + $str .= ''; + } else { + $str .= ''; + } + + $str .= ''; + } + + $str .= '
${value}
| Yes
'.$this->leftLeaf->getHTML($columnNames).'
 No |
'.$this->rightLeaf->getHTML($columnNames).'
'; + + return $str; + } +} diff --git a/src/Classification/Ensemble/AdaBoost.php b/src/Classification/Ensemble/AdaBoost.php new file mode 100644 index 0000000..fdaeb63 --- /dev/null +++ b/src/Classification/Ensemble/AdaBoost.php @@ -0,0 +1,252 @@ +maxIterations = $maxIterations; + } + + /** + * Sets the base classifier that will be used for boosting (default = DecisionStump) + */ + public function setBaseClassifier(string $baseClassifier = DecisionStump::class, array $classifierOptions = []): void + { + $this->baseClassifier = $baseClassifier; + $this->classifierOptions = $classifierOptions; + } + + /** + * @throws InvalidArgumentException + */ + public function train(array $samples, array $targets): void + { + // Initialize usual variables + $this->labels = array_keys(array_count_values($targets)); + if (count($this->labels) !== 2) { + throw new InvalidArgumentException('AdaBoost is a binary classifier and can classify between two classes only'); + } + + // Set all target values to either -1 or 1 + $this->labels = [ + 1 => $this->labels[0], + -1 => $this->labels[1], + ]; + foreach ($targets as $target) { + $this->targets[] = $target == $this->labels[1] ? 1 : -1; + } + + $this->samples = array_merge($this->samples, $samples); + $this->featureCount = count($samples[0]); + $this->sampleCount = count($this->samples); + + // Initialize AdaBoost parameters + $this->weights = array_fill(0, $this->sampleCount, 1.0 / $this->sampleCount); + $this->classifiers = []; + $this->alpha = []; + + // Execute the algorithm for a maximum number of iterations + $currIter = 0; + while ($this->maxIterations > $currIter++) { + // Determine the best 'weak' classifier based on current weights + $classifier = $this->getBestClassifier(); + $errorRate = $this->evaluateClassifier($classifier); + + // Update alpha & weight values at each iteration + $alpha = $this->calculateAlpha($errorRate); + $this->updateWeights($classifier, $alpha); + + $this->classifiers[] = $classifier; + $this->alpha[] = $alpha; + } + } + + /** + * @return mixed + */ + public function predictSample(array $sample) + { + $sum = 0; + foreach ($this->alpha as $index => $alpha) { + $h = $this->classifiers[$index]->predict($sample); + $sum += $h * $alpha; + } + + return $this->labels[$sum > 0 ? 1 : -1]; + } + + /** + * Returns the classifier with the lowest error rate with the + * consideration of current sample weights + */ + protected function getBestClassifier(): Classifier + { + $ref = new ReflectionClass($this->baseClassifier); + /** @var Classifier $classifier */ + $classifier = count($this->classifierOptions) === 0 ? $ref->newInstance() : $ref->newInstanceArgs($this->classifierOptions); + + if ($classifier instanceof WeightedClassifier) { + $classifier->setSampleWeights($this->weights); + $classifier->train($this->samples, $this->targets); + } else { + [$samples, $targets] = $this->resample(); + $classifier->train($samples, $targets); + } + + return $classifier; + } + + /** + * Resamples the dataset in accordance with the weights and + * returns the new dataset + */ + protected function resample(): array + { + $weights = $this->weights; + $std = StandardDeviation::population($weights); + $mean = Mean::arithmetic($weights); + $min = min($weights); + $minZ = (int) round(($min - $mean) / $std); + + $samples = []; + $targets = []; + foreach ($weights as $index => $weight) { + $z = (int) round(($weight - $mean) / $std) - $minZ + 1; + for ($i = 0; $i < $z; ++$i) { + if (random_int(0, 1) == 0) { + continue; + } + + $samples[] = $this->samples[$index]; + $targets[] = $this->targets[$index]; + } + } + + return [$samples, $targets]; + } + + /** + * Evaluates the classifier and returns the classification error rate + */ + protected function evaluateClassifier(Classifier $classifier): float + { + $total = (float) array_sum($this->weights); + $wrong = 0; + foreach ($this->samples as $index => $sample) { + $predicted = $classifier->predict($sample); + if ($predicted != $this->targets[$index]) { + $wrong += $this->weights[$index]; + } + } + + return $wrong / $total; + } + + /** + * Calculates alpha of a classifier + */ + protected function calculateAlpha(float $errorRate): float + { + if ($errorRate == 0) { + $errorRate = 1e-10; + } + + return 0.5 * log((1 - $errorRate) / $errorRate); + } + + /** + * Updates the sample weights + */ + protected function updateWeights(Classifier $classifier, float $alpha): void + { + $sumOfWeights = array_sum($this->weights); + $weightsT1 = []; + foreach ($this->weights as $index => $weight) { + $desired = $this->targets[$index]; + $output = $classifier->predict($this->samples[$index]); + + $weight *= exp(-$alpha * $desired * $output) / $sumOfWeights; + + $weightsT1[] = $weight; + } + + $this->weights = $weightsT1; + } +} diff --git a/src/Classification/Ensemble/Bagging.php b/src/Classification/Ensemble/Bagging.php new file mode 100644 index 0000000..2c9010d --- /dev/null +++ b/src/Classification/Ensemble/Bagging.php @@ -0,0 +1,170 @@ + 20]; + + /** + * @var array + */ + protected $classifiers = []; + + /** + * @var float + */ + protected $subsetRatio = 0.7; + + /** + * Creates an ensemble classifier with given number of base classifiers + * Default number of base classifiers is 50. + * The more number of base classifiers, the better performance but at the cost of procesing time + */ + public function __construct(int $numClassifier = 50) + { + $this->numClassifier = $numClassifier; + } + + /** + * This method determines the ratio of samples used to create the 'bootstrap' subset, + * e.g., random samples drawn from the original dataset with replacement (allow repeats), + * to train each base classifier. + * + * @return $this + * + * @throws InvalidArgumentException + */ + public function setSubsetRatio(float $ratio) + { + if ($ratio < 0.1 || $ratio > 1.0) { + throw new InvalidArgumentException('Subset ratio should be between 0.1 and 1.0'); + } + + $this->subsetRatio = $ratio; + + return $this; + } + + /** + * This method is used to set the base classifier. Default value is + * DecisionTree::class, but any class that implements the Classifier + * can be used.
+ * While giving the parameters of the classifier, the values should be + * given in the order they are in the constructor of the classifier and parameter + * names are neglected. + * + * @return $this + */ + public function setClassifer(string $classifier, array $classifierOptions = []) + { + $this->classifier = $classifier; + $this->classifierOptions = $classifierOptions; + + return $this; + } + + public function train(array $samples, array $targets): void + { + $this->samples = array_merge($this->samples, $samples); + $this->targets = array_merge($this->targets, $targets); + $this->featureCount = count($samples[0]); + $this->numSamples = count($this->samples); + + // Init classifiers and train them with bootstrap samples + $this->classifiers = $this->initClassifiers(); + $index = 0; + foreach ($this->classifiers as $classifier) { + [$samples, $targets] = $this->getRandomSubset($index); + $classifier->train($samples, $targets); + ++$index; + } + } + + protected function getRandomSubset(int $index): array + { + $samples = []; + $targets = []; + srand($index); + $bootstrapSize = $this->subsetRatio * $this->numSamples; + for ($i = 0; $i < $bootstrapSize; ++$i) { + $rand = random_int(0, $this->numSamples - 1); + $samples[] = $this->samples[$rand]; + $targets[] = $this->targets[$rand]; + } + + return [$samples, $targets]; + } + + protected function initClassifiers(): array + { + $classifiers = []; + for ($i = 0; $i < $this->numClassifier; ++$i) { + $ref = new ReflectionClass($this->classifier); + /** @var Classifier $obj */ + $obj = count($this->classifierOptions) === 0 ? $ref->newInstance() : $ref->newInstanceArgs($this->classifierOptions); + + $classifiers[] = $this->initSingleClassifier($obj); + } + + return $classifiers; + } + + protected function initSingleClassifier(Classifier $classifier): Classifier + { + return $classifier; + } + + /** + * @return mixed + */ + protected function predictSample(array $sample) + { + $predictions = []; + foreach ($this->classifiers as $classifier) { + /** @var Classifier $classifier */ + $predictions[] = $classifier->predict($sample); + } + + $counts = array_count_values($predictions); + arsort($counts); + reset($counts); + + return key($counts); + } +} diff --git a/src/Classification/Ensemble/RandomForest.php b/src/Classification/Ensemble/RandomForest.php new file mode 100644 index 0000000..71ea8d1 --- /dev/null +++ b/src/Classification/Ensemble/RandomForest.php @@ -0,0 +1,157 @@ +setSubsetRatio(1.0); + } + + /** + * This method is used to determine how many of the original columns (features) + * will be used to construct subsets to train base classifiers.
+ * + * Allowed values: 'sqrt', 'log' or any float number between 0.1 and 1.0
+ * + * Default value for the ratio is 'log' which results in log(numFeatures, 2) + 1 + * features to be taken into consideration while selecting subspace of features + * + * @param mixed $ratio + */ + public function setFeatureSubsetRatio($ratio): self + { + if (!is_string($ratio) && !is_float($ratio)) { + throw new InvalidArgumentException('Feature subset ratio must be a string or a float'); + } + + if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) { + throw new InvalidArgumentException('When a float is given, feature subset ratio should be between 0.1 and 1.0'); + } + + if (is_string($ratio) && $ratio !== 'sqrt' && $ratio !== 'log') { + throw new InvalidArgumentException("When a string is given, feature subset ratio can only be 'sqrt' or 'log'"); + } + + $this->featureSubsetRatio = $ratio; + + return $this; + } + + /** + * RandomForest algorithm is usable *only* with DecisionTree + * + * @return $this + */ + public function setClassifer(string $classifier, array $classifierOptions = []) + { + if ($classifier !== DecisionTree::class) { + throw new InvalidArgumentException('RandomForest can only use DecisionTree as base classifier'); + } + + parent::setClassifer($classifier, $classifierOptions); + + return $this; + } + + /** + * This will return an array including an importance value for + * each column in the given dataset. Importance values for a column + * is the average importance of that column in all trees in the forest + */ + public function getFeatureImportances(): array + { + // Traverse each tree and sum importance of the columns + $sum = []; + foreach ($this->classifiers as $tree) { + /** @var DecisionTree $tree */ + $importances = $tree->getFeatureImportances(); + + foreach ($importances as $column => $importance) { + if (array_key_exists($column, $sum)) { + $sum[$column] += $importance; + } else { + $sum[$column] = $importance; + } + } + } + + // Normalize & sort the importance values + $total = array_sum($sum); + array_walk($sum, function (&$importance) use ($total): void { + $importance /= $total; + }); + arsort($sum); + + return $sum; + } + + /** + * A string array to represent the columns is given. They are useful + * when trying to print some information about the trees such as feature importances + * + * @return $this + */ + public function setColumnNames(array $names) + { + $this->columnNames = $names; + + return $this; + } + + /** + * @return DecisionTree + */ + protected function initSingleClassifier(Classifier $classifier): Classifier + { + if (!$classifier instanceof DecisionTree) { + throw new InvalidArgumentException( + sprintf('Classifier %s expected, got %s', DecisionTree::class, get_class($classifier)) + ); + } + + if (is_float($this->featureSubsetRatio)) { + $featureCount = (int) ($this->featureSubsetRatio * $this->featureCount); + } elseif ($this->featureSubsetRatio === 'sqrt') { + $featureCount = (int) ($this->featureCount ** .5) + 1; + } else { + $featureCount = (int) log($this->featureCount, 2) + 1; + } + + if ($featureCount >= $this->featureCount) { + $featureCount = $this->featureCount; + } + + if ($this->columnNames === null) { + $this->columnNames = range(0, $this->featureCount - 1); + } + + return $classifier + ->setColumnNames($this->columnNames) + ->setNumFeatures($featureCount); + } +} diff --git a/src/Phpml/Classification/KNearestNeighbors.php b/src/Classification/KNearestNeighbors.php similarity index 65% rename from src/Phpml/Classification/KNearestNeighbors.php rename to src/Classification/KNearestNeighbors.php index 93991ae..9b78baa 100644 --- a/src/Phpml/Classification/KNearestNeighbors.php +++ b/src/Classification/KNearestNeighbors.php @@ -1,17 +1,18 @@ k = $k; $this->samples = []; - $this->labels = []; + $this->targets = []; $this->distanceMetric = $distanceMetric; } /** - * @param array $sample - * * @return mixed */ protected function predictSample(array $sample) { $distances = $this->kNeighborsDistances($sample); + $predictions = (array) array_combine(array_values($this->targets), array_fill(0, count($this->targets), 0)); - $predictions = array_combine(array_values($this->labels), array_fill(0, count($this->labels), 0)); - - foreach ($distances as $index => $distance) { - ++$predictions[$this->labels[$index]]; + foreach (array_keys($distances) as $index) { + ++$predictions[$this->targets[$index]]; } arsort($predictions); @@ -61,10 +58,6 @@ class KNearestNeighbors implements Classifier } /** - * @param array $sample - * - * @return array - * * @throws \Phpml\Exception\InvalidArgumentException */ private function kNeighborsDistances(array $sample): array diff --git a/src/Classification/Linear/Adaline.php b/src/Classification/Linear/Adaline.php new file mode 100644 index 0000000..e5bc9d9 --- /dev/null +++ b/src/Classification/Linear/Adaline.php @@ -0,0 +1,75 @@ + + * + * Learning rate should be a float value between 0.0(exclusive) and 1.0 (inclusive)
+ * Maximum number of iterations can be an integer value greater than 0
+ * If normalizeInputs is set to true, then every input given to the algorithm will be standardized + * by use of standard deviation and mean calculation + * + * @throws InvalidArgumentException + */ + public function __construct( + float $learningRate = 0.001, + int $maxIterations = 1000, + bool $normalizeInputs = true, + int $trainingType = self::BATCH_TRAINING + ) { + if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING], true)) { + throw new InvalidArgumentException('Adaline can only be trained with batch and online/stochastic gradient descent algorithm'); + } + + $this->trainingType = $trainingType; + + parent::__construct($learningRate, $maxIterations, $normalizeInputs); + } + + /** + * Adapts the weights with respect to given samples and targets + * by use of gradient descent learning rule + */ + protected function runTraining(array $samples, array $targets): void + { + // The cost function is the sum of squares + $callback = function ($weights, $sample, $target): array { + $this->weights = $weights; + + $output = $this->output($sample); + $gradient = $output - $target; + $error = $gradient ** 2; + + return [$error, $gradient]; + }; + + $isBatch = $this->trainingType == self::BATCH_TRAINING; + + parent::runGradientDescent($samples, $targets, $callback, $isBatch); + } +} diff --git a/src/Classification/Linear/DecisionStump.php b/src/Classification/Linear/DecisionStump.php new file mode 100644 index 0000000..258939e --- /dev/null +++ b/src/Classification/Linear/DecisionStump.php @@ -0,0 +1,319 @@ + + * + * If columnIndex is given, then the stump tries to produce a decision node + * on this column, otherwise in cases given the value of -1, the stump itself + * decides which column to take for the decision (Default DecisionTree behaviour) + */ + public function __construct(int $columnIndex = self::AUTO_SELECT) + { + $this->givenColumnIndex = $columnIndex; + } + + public function __toString(): string + { + return "IF ${this}->column ${this}->operator ${this}->value ". + 'THEN '.$this->binaryLabels[0].' '. + 'ELSE '.$this->binaryLabels[1]; + } + + /** + * While finding best split point for a numerical valued column, + * DecisionStump looks for equally distanced values between minimum and maximum + * values in the column. Given $count value determines how many split + * points to be probed. The more split counts, the better performance but + * worse processing time (Default value is 10.0) + */ + public function setNumericalSplitCount(float $count): void + { + $this->numSplitCount = $count; + } + + /** + * @throws InvalidArgumentException + */ + protected function trainBinary(array $samples, array $targets, array $labels): void + { + $this->binaryLabels = $labels; + $this->featureCount = count($samples[0]); + + // If a column index is given, it should be among the existing columns + if ($this->givenColumnIndex > count($samples[0]) - 1) { + $this->givenColumnIndex = self::AUTO_SELECT; + } + + // Check the size of the weights given. + // If none given, then assign 1 as a weight to each sample + if (count($this->weights) === 0) { + $this->weights = array_fill(0, count($samples), 1); + } else { + $numWeights = count($this->weights); + if ($numWeights !== count($samples)) { + throw new InvalidArgumentException('Number of sample weights does not match with number of samples'); + } + } + + // Determine type of each column as either "continuous" or "nominal" + $this->columnTypes = DecisionTree::getColumnTypes($samples); + + // Try to find the best split in the columns of the dataset + // by calculating error rate for each split point in each column + $columns = range(0, count($samples[0]) - 1); + if ($this->givenColumnIndex !== self::AUTO_SELECT) { + $columns = [$this->givenColumnIndex]; + } + + $bestSplit = [ + 'value' => 0, + 'operator' => '', + 'prob' => [], + 'column' => 0, + 'trainingErrorRate' => 1.0, + ]; + foreach ($columns as $col) { + if ($this->columnTypes[$col] == DecisionTree::CONTINUOUS) { + $split = $this->getBestNumericalSplit($samples, $targets, $col); + } else { + $split = $this->getBestNominalSplit($samples, $targets, $col); + } + + if ($split['trainingErrorRate'] < $bestSplit['trainingErrorRate']) { + $bestSplit = $split; + } + } + + // Assign determined best values to the stump + foreach ($bestSplit as $name => $value) { + $this->{$name} = $value; + } + } + + /** + * Determines best split point for the given column + */ + protected function getBestNumericalSplit(array $samples, array $targets, int $col): array + { + $values = array_column($samples, $col); + // Trying all possible points may be accomplished in two general ways: + // 1- Try all values in the $samples array ($values) + // 2- Artificially split the range of values into several parts and try them + // We choose the second one because it is faster in larger datasets + $minValue = min($values); + $maxValue = max($values); + $stepSize = ($maxValue - $minValue) / $this->numSplitCount; + + $split = []; + + foreach (['<=', '>'] as $operator) { + // Before trying all possible split points, let's first try + // the average value for the cut point + $threshold = array_sum($values) / (float) count($values); + [$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values); + if (!isset($split['trainingErrorRate']) || $errorRate < $split['trainingErrorRate']) { + $split = [ + 'value' => $threshold, + 'operator' => $operator, + 'prob' => $prob, + 'column' => $col, + 'trainingErrorRate' => $errorRate, + ]; + } + + // Try other possible points one by one + for ($step = $minValue; $step <= $maxValue; $step += $stepSize) { + $threshold = (float) $step; + [$errorRate, $prob] = $this->calculateErrorRate($targets, $threshold, $operator, $values); + if ($errorRate < $split['trainingErrorRate']) { + $split = [ + 'value' => $threshold, + 'operator' => $operator, + 'prob' => $prob, + 'column' => $col, + 'trainingErrorRate' => $errorRate, + ]; + } + }// for + } + + return $split; + } + + protected function getBestNominalSplit(array $samples, array $targets, int $col): array + { + $values = array_column($samples, $col); + $valueCounts = array_count_values($values); + $distinctVals = array_keys($valueCounts); + + $split = []; + + foreach (['=', '!='] as $operator) { + foreach ($distinctVals as $val) { + [$errorRate, $prob] = $this->calculateErrorRate($targets, $val, $operator, $values); + if (!isset($split['trainingErrorRate']) || $split['trainingErrorRate'] < $errorRate) { + $split = [ + 'value' => $val, + 'operator' => $operator, + 'prob' => $prob, + 'column' => $col, + 'trainingErrorRate' => $errorRate, + ]; + } + } + } + + return $split; + } + + /** + * Calculates the ratio of wrong predictions based on the new threshold + * value given as the parameter + */ + protected function calculateErrorRate(array $targets, float $threshold, string $operator, array $values): array + { + $wrong = 0.0; + $prob = []; + $leftLabel = $this->binaryLabels[0]; + $rightLabel = $this->binaryLabels[1]; + + foreach ($values as $index => $value) { + if (Comparison::compare($value, $threshold, $operator)) { + $predicted = $leftLabel; + } else { + $predicted = $rightLabel; + } + + $target = $targets[$index]; + if ((string) $predicted != (string) $targets[$index]) { + $wrong += $this->weights[$index]; + } + + if (!isset($prob[$predicted][$target])) { + $prob[$predicted][$target] = 0; + } + + ++$prob[$predicted][$target]; + } + + // Calculate probabilities: Proportion of labels in each leaf + $dist = array_combine($this->binaryLabels, array_fill(0, 2, 0.0)); + foreach ($prob as $leaf => $counts) { + $leafTotal = (float) array_sum($prob[$leaf]); + foreach ($counts as $label => $count) { + if ((string) $leaf == (string) $label) { + $dist[$leaf] = $count / $leafTotal; + } + } + } + + return [$wrong / (float) array_sum($this->weights), $dist]; + } + + /** + * Returns the probability of the sample of belonging to the given label + * + * Probability of a sample is calculated as the proportion of the label + * within the labels of the training samples in the decision node + * + * @param mixed $label + */ + protected function predictProbability(array $sample, $label): float + { + $predicted = $this->predictSampleBinary($sample); + if ((string) $predicted == (string) $label) { + return $this->prob[$label]; + } + + return 0.0; + } + + /** + * @return mixed + */ + protected function predictSampleBinary(array $sample) + { + if (Comparison::compare($sample[$this->column], $this->value, $this->operator)) { + return $this->binaryLabels[0]; + } + + return $this->binaryLabels[1]; + } + + protected function resetBinary(): void + { + } +} diff --git a/src/Classification/Linear/LogisticRegression.php b/src/Classification/Linear/LogisticRegression.php new file mode 100644 index 0000000..4014fb0 --- /dev/null +++ b/src/Classification/Linear/LogisticRegression.php @@ -0,0 +1,283 @@ + + * - 'log' : log likelihood
+ * - 'sse' : sum of squared errors
+ * + * @var string + */ + protected $costFunction = 'log'; + + /** + * Regularization term: only 'L2' is supported + * + * @var string + */ + protected $penalty = 'L2'; + + /** + * Lambda (λ) parameter of regularization term. If λ is set to 0, then + * regularization term is cancelled. + * + * @var float + */ + protected $lambda = 0.5; + + /** + * Initalize a Logistic Regression classifier with maximum number of iterations + * and learning rule to be applied
+ * + * Maximum number of iterations can be an integer value greater than 0
+ * If normalizeInputs is set to true, then every input given to the algorithm will be standardized + * by use of standard deviation and mean calculation
+ * + * Cost function can be 'log' for log-likelihood and 'sse' for sum of squared errors
+ * + * Penalty (Regularization term) can be 'L2' or empty string to cancel penalty term + * + * @throws InvalidArgumentException + */ + public function __construct( + int $maxIterations = 500, + bool $normalizeInputs = true, + int $trainingType = self::CONJUGATE_GRAD_TRAINING, + string $cost = 'log', + string $penalty = 'L2' + ) { + $trainingTypes = range(self::BATCH_TRAINING, self::CONJUGATE_GRAD_TRAINING); + if (!in_array($trainingType, $trainingTypes, true)) { + throw new InvalidArgumentException( + 'Logistic regression can only be trained with '. + 'batch (gradient descent), online (stochastic gradient descent) '. + 'or conjugate batch (conjugate gradients) algorithms' + ); + } + + if (!in_array($cost, ['log', 'sse'], true)) { + throw new InvalidArgumentException( + "Logistic regression cost function can be one of the following: \n". + "'log' for log-likelihood and 'sse' for sum of squared errors" + ); + } + + if ($penalty !== '' && strtoupper($penalty) !== 'L2') { + throw new InvalidArgumentException('Logistic regression supports only \'L2\' regularization'); + } + + $this->learningRate = 0.001; + + parent::__construct($this->learningRate, $maxIterations, $normalizeInputs); + + $this->trainingType = $trainingType; + $this->costFunction = $cost; + $this->penalty = $penalty; + } + + /** + * Sets the learning rate if gradient descent algorithm is + * selected for training + */ + public function setLearningRate(float $learningRate): void + { + $this->learningRate = $learningRate; + } + + /** + * Lambda (λ) parameter of regularization term. If 0 is given, + * then the regularization term is cancelled + */ + public function setLambda(float $lambda): void + { + $this->lambda = $lambda; + } + + /** + * Adapts the weights with respect to given samples and targets + * by use of selected solver + * + * @throws \Exception + */ + protected function runTraining(array $samples, array $targets): void + { + $callback = $this->getCostFunction(); + + switch ($this->trainingType) { + case self::BATCH_TRAINING: + $this->runGradientDescent($samples, $targets, $callback, true); + + return; + + case self::ONLINE_TRAINING: + $this->runGradientDescent($samples, $targets, $callback, false); + + return; + + case self::CONJUGATE_GRAD_TRAINING: + $this->runConjugateGradient($samples, $targets, $callback); + + return; + + default: + // Not reached + throw new Exception(sprintf('Logistic regression has invalid training type: %d.', $this->trainingType)); + } + } + + /** + * Executes Conjugate Gradient method to optimize the weights of the LogReg model + */ + protected function runConjugateGradient(array $samples, array $targets, Closure $gradientFunc): void + { + if ($this->optimizer === null) { + $this->optimizer = (new ConjugateGradient($this->featureCount)) + ->setMaxIterations($this->maxIterations); + } + + $this->weights = $this->optimizer->runOptimization($samples, $targets, $gradientFunc); + $this->costValues = $this->optimizer->getCostValues(); + } + + /** + * Returns the appropriate callback function for the selected cost function + * + * @throws \Exception + */ + protected function getCostFunction(): Closure + { + $penalty = 0; + if ($this->penalty === 'L2') { + $penalty = $this->lambda; + } + + switch ($this->costFunction) { + case 'log': + /* + * Negative of Log-likelihood cost function to be minimized: + * J(x) = ∑( - y . log(h(x)) - (1 - y) . log(1 - h(x))) + * + * If regularization term is given, then it will be added to the cost: + * for L2 : J(x) = J(x) + λ/m . w + * + * The gradient of the cost function to be used with gradient descent: + * ∇J(x) = -(y - h(x)) = (h(x) - y) + */ + return function ($weights, $sample, $y) use ($penalty): array { + $this->weights = $weights; + $hX = $this->output($sample); + + // In cases where $hX = 1 or $hX = 0, the log-likelihood + // value will give a NaN, so we fix these values + if ($hX == 1) { + $hX = 1 - 1e-10; + } + + if ($hX == 0) { + $hX = 1e-10; + } + + $y = $y < 0 ? 0 : 1; + + $error = -$y * log($hX) - (1 - $y) * log(1 - $hX); + $gradient = $hX - $y; + + return [$error, $gradient, $penalty]; + }; + case 'sse': + /* + * Sum of squared errors or least squared errors cost function: + * J(x) = ∑ (y - h(x))^2 + * + * If regularization term is given, then it will be added to the cost: + * for L2 : J(x) = J(x) + λ/m . w + * + * The gradient of the cost function: + * ∇J(x) = -(h(x) - y) . h(x) . (1 - h(x)) + */ + return function ($weights, $sample, $y) use ($penalty): array { + $this->weights = $weights; + $hX = $this->output($sample); + + $y = $y < 0 ? 0 : 1; + + $error = (($y - $hX) ** 2); + $gradient = -($y - $hX) * $hX * (1 - $hX); + + return [$error, $gradient, $penalty]; + }; + default: + // Not reached + throw new Exception(sprintf('Logistic regression has invalid cost function: %s.', $this->costFunction)); + } + } + + /** + * Returns the output of the network, a float value between 0.0 and 1.0 + */ + protected function output(array $sample): float + { + $sum = parent::output($sample); + + return 1.0 / (1.0 + exp(-$sum)); + } + + /** + * Returns the class value (either -1 or 1) for the given input + */ + protected function outputClass(array $sample): int + { + $output = $this->output($sample); + + if ($output > 0.5) { + return 1; + } + + return -1; + } + + /** + * Returns the probability of the sample of belonging to the given label. + * + * The probability is simply taken as the distance of the sample + * to the decision plane. + * + * @param mixed $label + */ + protected function predictProbability(array $sample, $label): float + { + $sample = $this->checkNormalizedSample($sample); + $probability = $this->output($sample); + + if (array_search($label, $this->labels, true) > 0) { + return $probability; + } + + return 1 - $probability; + } +} diff --git a/src/Classification/Linear/Perceptron.php b/src/Classification/Linear/Perceptron.php new file mode 100644 index 0000000..44220a6 --- /dev/null +++ b/src/Classification/Linear/Perceptron.php @@ -0,0 +1,264 @@ + 1.0) { + throw new InvalidArgumentException('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)'); + } + + if ($maxIterations <= 0) { + throw new InvalidArgumentException('Maximum number of iterations must be an integer greater than 0'); + } + + if ($normalizeInputs) { + $this->normalizer = new Normalizer(Normalizer::NORM_STD); + } + + $this->learningRate = $learningRate; + $this->maxIterations = $maxIterations; + } + + public function partialTrain(array $samples, array $targets, array $labels = []): void + { + $this->trainByLabel($samples, $targets, $labels); + } + + public function trainBinary(array $samples, array $targets, array $labels): void + { + if ($this->normalizer !== null) { + $this->normalizer->transform($samples); + } + + // Set all target values to either -1 or 1 + $this->labels = [ + 1 => $labels[0], + -1 => $labels[1], + ]; + foreach ($targets as $key => $target) { + $targets[$key] = (string) $target == (string) $this->labels[1] ? 1 : -1; + } + + // Set samples and feature count vars + $this->featureCount = count($samples[0]); + + $this->runTraining($samples, $targets); + } + + /** + * Normally enabling early stopping for the optimization procedure may + * help saving processing time while in some cases it may result in + * premature convergence.
+ * + * If "false" is given, the optimization procedure will always be executed + * for $maxIterations times + * + * @return $this + */ + public function setEarlyStop(bool $enable = true) + { + $this->enableEarlyStop = $enable; + + return $this; + } + + /** + * Returns the cost values obtained during the training. + */ + public function getCostValues(): array + { + return $this->costValues; + } + + protected function resetBinary(): void + { + $this->labels = []; + $this->optimizer = null; + $this->featureCount = 0; + $this->weights = []; + $this->costValues = []; + } + + /** + * Trains the perceptron model with Stochastic Gradient Descent optimization + * to get the correct set of weights + */ + protected function runTraining(array $samples, array $targets): void + { + // The cost function is the sum of squares + $callback = function ($weights, $sample, $target): array { + $this->weights = $weights; + + $prediction = $this->outputClass($sample); + $gradient = $prediction - $target; + $error = $gradient ** 2; + + return [$error, $gradient]; + }; + + $this->runGradientDescent($samples, $targets, $callback); + } + + /** + * Executes a Gradient Descent algorithm for + * the given cost function + */ + protected function runGradientDescent(array $samples, array $targets, Closure $gradientFunc, bool $isBatch = false): void + { + $class = $isBatch ? GD::class : StochasticGD::class; + + if ($this->optimizer === null) { + $this->optimizer = (new $class($this->featureCount)) + ->setLearningRate($this->learningRate) + ->setMaxIterations($this->maxIterations) + ->setChangeThreshold(1e-6) + ->setEarlyStop($this->enableEarlyStop); + } + + $this->weights = $this->optimizer->runOptimization($samples, $targets, $gradientFunc); + $this->costValues = $this->optimizer->getCostValues(); + } + + /** + * Checks if the sample should be normalized and if so, returns the + * normalized sample + */ + protected function checkNormalizedSample(array $sample): array + { + if ($this->normalizer !== null) { + $samples = [$sample]; + $this->normalizer->transform($samples); + $sample = $samples[0]; + } + + return $sample; + } + + /** + * Calculates net output of the network as a float value for the given input + * + * @return int|float + */ + protected function output(array $sample) + { + $sum = 0; + foreach ($this->weights as $index => $w) { + if ($index == 0) { + $sum += $w; + } else { + $sum += $w * $sample[$index - 1]; + } + } + + return $sum; + } + + /** + * Returns the class value (either -1 or 1) for the given input + */ + protected function outputClass(array $sample): int + { + return $this->output($sample) > 0 ? 1 : -1; + } + + /** + * Returns the probability of the sample of belonging to the given label. + * + * The probability is simply taken as the distance of the sample + * to the decision plane. + * + * @param mixed $label + */ + protected function predictProbability(array $sample, $label): float + { + $predicted = $this->predictSampleBinary($sample); + + if ((string) $predicted == (string) $label) { + $sample = $this->checkNormalizedSample($sample); + + return (float) abs($this->output($sample)); + } + + return 0.0; + } + + /** + * @return mixed + */ + protected function predictSampleBinary(array $sample) + { + $sample = $this->checkNormalizedSample($sample); + + $predictedClass = $this->outputClass($sample); + + return $this->labels[$predictedClass]; + } +} diff --git a/src/Classification/MLPClassifier.php b/src/Classification/MLPClassifier.php new file mode 100644 index 0000000..35678d5 --- /dev/null +++ b/src/Classification/MLPClassifier.php @@ -0,0 +1,58 @@ +classes, true)) { + throw new InvalidArgumentException( + sprintf('Target with value "%s" is not part of the accepted classes', $target) + ); + } + + return array_search($target, $this->classes, true); + } + + /** + * @return mixed + */ + protected function predictSample(array $sample) + { + $output = $this->setInput($sample)->getOutput(); + + $predictedClass = null; + $max = 0; + foreach ($output as $class => $value) { + if ($value > $max) { + $predictedClass = $class; + $max = $value; + } + } + + return $predictedClass; + } + + /** + * @param mixed $target + */ + protected function trainSample(array $sample, $target): void + { + // Feed-forward. + $this->setInput($sample); + + // Back-propagate. + $this->backpropagation->backpropagate($this->getLayers(), $this->getTargetClass($target)); + } +} diff --git a/src/Classification/NaiveBayes.php b/src/Classification/NaiveBayes.php new file mode 100644 index 0000000..079b6f7 --- /dev/null +++ b/src/Classification/NaiveBayes.php @@ -0,0 +1,184 @@ +samples = array_merge($this->samples, $samples); + $this->targets = array_merge($this->targets, $targets); + $this->sampleCount = count($this->samples); + $this->featureCount = count($this->samples[0]); + + $this->labels = array_map('strval', array_flip(array_flip($this->targets))); + foreach ($this->labels as $label) { + $samples = $this->getSamplesByLabel($label); + $this->p[$label] = count($samples) / $this->sampleCount; + $this->calculateStatistics($label, $samples); + } + } + + /** + * @return mixed + */ + protected function predictSample(array $sample) + { + // Use NaiveBayes assumption for each label using: + // P(label|features) = P(label) * P(feature0|label) * P(feature1|label) .... P(featureN|label) + // Then compare probability for each class to determine which label is most likely + $predictions = []; + foreach ($this->labels as $label) { + $p = $this->p[$label]; + for ($i = 0; $i < $this->featureCount; ++$i) { + $Plf = $this->sampleProbability($sample, $i, $label); + $p += $Plf; + } + + $predictions[$label] = $p; + } + + arsort($predictions, SORT_NUMERIC); + reset($predictions); + + return key($predictions); + } + + /** + * Calculates vital statistics for each label & feature. Stores these + * values in private array in order to avoid repeated calculation + */ + private function calculateStatistics(string $label, array $samples): void + { + $this->std[$label] = array_fill(0, $this->featureCount, 0); + $this->mean[$label] = array_fill(0, $this->featureCount, 0); + $this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); + $this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); + for ($i = 0; $i < $this->featureCount; ++$i) { + // Get the values of nth column in the samples array + // Mean::arithmetic is called twice, can be optimized + $values = array_column($samples, $i); + $numValues = count($values); + // if the values contain non-numeric data, + // then it should be treated as nominal/categorical/discrete column + if ($values !== array_filter($values, 'is_numeric')) { + $this->dataType[$label][$i] = self::NOMINAL; + $this->discreteProb[$label][$i] = array_count_values($values); + $db = &$this->discreteProb[$label][$i]; + $db = array_map(function ($el) use ($numValues) { + return $el / $numValues; + }, $db); + } else { + $this->mean[$label][$i] = Mean::arithmetic($values); + // Add epsilon in order to avoid zero stdev + $this->std[$label][$i] = 1e-10 + StandardDeviation::population($values, false); + } + } + } + + /** + * Calculates the probability P(label|sample_n) + */ + private function sampleProbability(array $sample, int $feature, string $label): float + { + if (!isset($sample[$feature])) { + throw new InvalidArgumentException('Missing feature. All samples must have equal number of features'); + } + + $value = $sample[$feature]; + if ($this->dataType[$label][$feature] == self::NOMINAL) { + if (!isset($this->discreteProb[$label][$feature][$value]) || + $this->discreteProb[$label][$feature][$value] == 0) { + return self::EPSILON; + } + + return $this->discreteProb[$label][$feature][$value]; + } + + $std = $this->std[$label][$feature]; + $mean = $this->mean[$label][$feature]; + // Calculate the probability density by use of normal/Gaussian distribution + // Ref: https://en.wikipedia.org/wiki/Normal_distribution + // + // In order to avoid numerical errors because of small or zero values, + // some libraries adopt taking log of calculations such as + // scikit-learn did. + // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) + $pdf = -0.5 * log(2.0 * M_PI * $std * $std); + $pdf -= 0.5 * (($value - $mean) ** 2) / ($std * $std); + + return $pdf; + } + + /** + * Return samples belonging to specific label + */ + private function getSamplesByLabel(string $label): array + { + $samples = []; + for ($i = 0; $i < $this->sampleCount; ++$i) { + if ($this->targets[$i] == $label) { + $samples[] = $this->samples[$i]; + } + } + + return $samples; + } +} diff --git a/src/Classification/SVC.php b/src/Classification/SVC.php new file mode 100644 index 0000000..fbc47ba --- /dev/null +++ b/src/Classification/SVC.php @@ -0,0 +1,26 @@ +weights = $weights; + } +} diff --git a/src/Clustering/Clusterer.php b/src/Clustering/Clusterer.php new file mode 100644 index 0000000..22e65e6 --- /dev/null +++ b/src/Clustering/Clusterer.php @@ -0,0 +1,10 @@ +epsilon = $epsilon; + $this->minSamples = $minSamples; + $this->distanceMetric = $distanceMetric; + } + + public function cluster(array $samples): array + { + $labels = []; + $n = 0; + + foreach ($samples as $index => $sample) { + if (isset($labels[$index])) { + continue; + } + + $neighborIndices = $this->getIndicesInRegion($sample, $samples); + + if (count($neighborIndices) < $this->minSamples) { + $labels[$index] = self::NOISE; + + continue; + } + + $labels[$index] = $n; + + $this->expandCluster($samples, $neighborIndices, $labels, $n); + + ++$n; + } + + return $this->groupByCluster($samples, $labels, $n); + } + + private function expandCluster(array $samples, array $seeds, array &$labels, int $n): void + { + while (($index = array_pop($seeds)) !== null) { + if (isset($labels[$index])) { + if ($labels[$index] === self::NOISE) { + $labels[$index] = $n; + } + + continue; + } + + $labels[$index] = $n; + + $sample = $samples[$index]; + $neighborIndices = $this->getIndicesInRegion($sample, $samples); + + if (count($neighborIndices) >= $this->minSamples) { + $seeds = array_unique(array_merge($seeds, $neighborIndices)); + } + } + } + + private function getIndicesInRegion(array $center, array $samples): array + { + $indices = []; + + foreach ($samples as $index => $sample) { + if ($this->distanceMetric->distance($center, $sample) < $this->epsilon) { + $indices[] = $index; + } + } + + return $indices; + } + + private function groupByCluster(array $samples, array $labels, int $n): array + { + $clusters = array_fill(0, $n, []); + + foreach ($samples as $index => $sample) { + if ($labels[$index] !== self::NOISE) { + $clusters[$labels[$index]][$index] = $sample; + } + } + + // Reindex (i.e. to 0, 1, 2, ...) integer indices for backword compatibility + foreach ($clusters as $index => $cluster) { + $clusters[$index] = array_merge($cluster, []); + } + + return $clusters; + } +} diff --git a/src/Clustering/FuzzyCMeans.php b/src/Clustering/FuzzyCMeans.php new file mode 100644 index 0000000..ce86f5f --- /dev/null +++ b/src/Clustering/FuzzyCMeans.php @@ -0,0 +1,236 @@ +clustersNumber = $clustersNumber; + $this->fuzziness = $fuzziness; + $this->epsilon = $epsilon; + $this->maxIterations = $maxIterations; + } + + public function getMembershipMatrix(): array + { + return $this->membership; + } + + public function cluster(array $samples): array + { + // Initialize variables, clusters and membership matrix + $this->sampleCount = count($samples); + $this->samples = &$samples; + $this->space = new Space(count($samples[0])); + $this->initClusters(); + + // Our goal is minimizing the objective value while + // executing the clustering steps at a maximum number of iterations + $lastObjective = 0.0; + $iterations = 0; + do { + // Update the membership matrix and cluster centers, respectively + $this->updateMembershipMatrix(); + $this->updateClusters(); + + // Calculate the new value of the objective function + $objectiveVal = $this->getObjective(); + $difference = abs($lastObjective - $objectiveVal); + $lastObjective = $objectiveVal; + } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations); + + // Attach (hard cluster) each data point to the nearest cluster + for ($k = 0; $k < $this->sampleCount; ++$k) { + $column = array_column($this->membership, $k); + arsort($column); + reset($column); + $cluster = $this->clusters[key($column)]; + $cluster->attach(new Point($this->samples[$k])); + } + + // Return grouped samples + $grouped = []; + foreach ($this->clusters as $cluster) { + $grouped[] = $cluster->getPoints(); + } + + return $grouped; + } + + protected function initClusters(): void + { + // Membership array is a matrix of cluster number by sample counts + // We initilize the membership array with random values + $dim = $this->space->getDimension(); + $this->generateRandomMembership($dim, $this->sampleCount); + $this->updateClusters(); + } + + protected function generateRandomMembership(int $rows, int $cols): void + { + $this->membership = []; + for ($i = 0; $i < $rows; ++$i) { + $row = []; + $total = 0.0; + for ($k = 0; $k < $cols; ++$k) { + $val = random_int(1, 5) / 10.0; + $row[] = $val; + $total += $val; + } + + $this->membership[] = array_map(static function ($val) use ($total): float { + return $val / $total; + }, $row); + } + } + + protected function updateClusters(): void + { + $dim = $this->space->getDimension(); + if (count($this->clusters) === 0) { + for ($i = 0; $i < $this->clustersNumber; ++$i) { + $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0)); + } + } + + for ($i = 0; $i < $this->clustersNumber; ++$i) { + $cluster = $this->clusters[$i]; + $center = $cluster->getCoordinates(); + for ($k = 0; $k < $dim; ++$k) { + $a = $this->getMembershipRowTotal($i, $k, true); + $b = $this->getMembershipRowTotal($i, $k, false); + $center[$k] = $a / $b; + } + + $cluster->setCoordinates($center); + } + } + + protected function getMembershipRowTotal(int $row, int $col, bool $multiply): float + { + $sum = 0.0; + for ($k = 0; $k < $this->sampleCount; ++$k) { + $val = $this->membership[$row][$k] ** $this->fuzziness; + if ($multiply) { + $val *= $this->samples[$k][$col]; + } + + $sum += $val; + } + + return $sum; + } + + protected function updateMembershipMatrix(): void + { + for ($i = 0; $i < $this->clustersNumber; ++$i) { + for ($k = 0; $k < $this->sampleCount; ++$k) { + $distCalc = $this->getDistanceCalc($i, $k); + $this->membership[$i][$k] = 1.0 / $distCalc; + } + } + } + + protected function getDistanceCalc(int $row, int $col): float + { + $sum = 0.0; + $distance = new Euclidean(); + $dist1 = $distance->distance( + $this->clusters[$row]->getCoordinates(), + $this->samples[$col] + ); + + for ($j = 0; $j < $this->clustersNumber; ++$j) { + $dist2 = $distance->distance( + $this->clusters[$j]->getCoordinates(), + $this->samples[$col] + ); + + $val = (($dist1 / $dist2) ** 2.0) / ($this->fuzziness - 1); + $sum += $val; + } + + return $sum; + } + + /** + * The objective is to minimize the distance between all data points + * and all cluster centers. This method returns the summation of all + * these distances + */ + protected function getObjective(): float + { + $sum = 0.0; + $distance = new Euclidean(); + for ($i = 0; $i < $this->clustersNumber; ++$i) { + $clust = $this->clusters[$i]->getCoordinates(); + for ($k = 0; $k < $this->sampleCount; ++$k) { + $point = $this->samples[$k]; + $sum += $distance->distance($clust, $point); + } + } + + return $sum; + } +} diff --git a/src/Phpml/Clustering/KMeans.php b/src/Clustering/KMeans.php similarity index 58% rename from src/Phpml/Clustering/KMeans.php rename to src/Clustering/KMeans.php index c5372b0..1aff1c4 100644 --- a/src/Phpml/Clustering/KMeans.php +++ b/src/Clustering/KMeans.php @@ -1,6 +1,6 @@ clustersNumber = $clustersNumber; $this->initialization = $initialization; } - /** - * @param array $samples - * - * @return array - */ - public function cluster(array $samples) + public function cluster(array $samples): array { - $space = new Space(count($samples[0])); - foreach ($samples as $sample) { - $space->addPoint($sample); + $space = new Space(count(reset($samples))); + foreach ($samples as $key => $sample) { + $space->addPoint($sample, $key); } $clusters = []; diff --git a/src/Phpml/Clustering/KMeans/Cluster.php b/src/Clustering/KMeans/Cluster.php similarity index 60% rename from src/Phpml/Clustering/KMeans/Cluster.php rename to src/Clustering/KMeans/Cluster.php index 5cd974d..f4c3d3e 100644 --- a/src/Phpml/Clustering/KMeans/Cluster.php +++ b/src/Clustering/KMeans/Cluster.php @@ -1,15 +1,14 @@ points = new SplObjectStorage(); } - /** - * @return array - */ - public function getPoints() + public function getPoints(): array { $points = []; foreach ($this->points as $point) { - $points[] = $point->toArray(); + if ($point->label === null) { + $points[] = $point->toArray(); + } else { + $points[$point->label] = $point->toArray(); + } } return $points; } - /** - * @return array - */ - public function toArray() + public function toArray(): array { - return array( + return [ 'centroid' => parent::toArray(), 'points' => $this->getPoints(), - ); + ]; } - /** - * @param Point $point - * - * @return Point - */ - public function attach(Point $point) + public function attach(Point $point): Point { if ($point instanceof self) { - throw new LogicException('cannot attach a cluster to another'); + throw new LogicException('Cannot attach a cluster to another'); } $this->points->attach($point); @@ -72,37 +60,27 @@ class Cluster extends Point implements IteratorAggregate, Countable return $point; } - /** - * @param Point $point - * - * @return Point - */ - public function detach(Point $point) + public function detach(Point $point): Point { $this->points->detach($point); return $point; } - /** - * @param SplObjectStorage $points - */ - public function attachAll(SplObjectStorage $points) + public function attachAll(SplObjectStorage $points): void { $this->points->addAll($points); } - /** - * @param SplObjectStorage $points - */ - public function detachAll(SplObjectStorage $points) + public function detachAll(SplObjectStorage $points): void { $this->points->removeAll($points); } - public function updateCentroid() + public function updateCentroid(): void { - if (!$count = count($this->points)) { + $count = count($this->points); + if ($count === 0) { return; } @@ -127,11 +105,13 @@ class Cluster extends Point implements IteratorAggregate, Countable return $this->points; } - /** - * @return mixed - */ - public function count() + public function count(): int { return count($this->points); } + + public function setCoordinates(array $newCoordinates): void + { + $this->coordinates = $newCoordinates; + } } diff --git a/src/Phpml/Clustering/KMeans/Point.php b/src/Clustering/KMeans/Point.php similarity index 63% rename from src/Phpml/Clustering/KMeans/Point.php rename to src/Clustering/KMeans/Point.php index 5dcf5ec..a3f195d 100644 --- a/src/Phpml/Clustering/KMeans/Point.php +++ b/src/Clustering/KMeans/Point.php @@ -1,12 +1,12 @@ dimension = count($coordinates); $this->coordinates = $coordinates; + $this->label = $label; } - /** - * @return array - */ - public function toArray() + public function toArray(): array { return $this->coordinates; } /** - * @param Point $point - * @param bool $precise - * - * @return int|mixed + * @return float|int */ - public function getDistanceWith(self $point, $precise = true) + public function getDistanceWith(self $point, bool $precise = true) { $distance = 0; for ($n = 0; $n < $this->dimension; ++$n) { $difference = $this->coordinates[$n] - $point->coordinates[$n]; - $distance += $difference * $difference; + $distance += $difference * $difference; } - return $precise ? sqrt((float) $distance) : $distance; + return $precise ? $distance ** .5 : $distance; } /** - * @param $points - * - * @return mixed + * @param Point[] $points */ - public function getClosest($points) + public function getClosest(array $points): ?self { + $minPoint = null; + foreach ($points as $point) { $distance = $this->getDistanceWith($point, false); if (!isset($minDistance)) { $minDistance = $distance; $minPoint = $point; + continue; } @@ -77,20 +78,15 @@ class Point implements ArrayAccess return $minPoint; } - /** - * @return array - */ - public function getCoordinates() + public function getCoordinates(): array { return $this->coordinates; } /** * @param mixed $offset - * - * @return bool */ - public function offsetExists($offset) + public function offsetExists($offset): bool { return isset($this->coordinates[$offset]); } @@ -109,7 +105,7 @@ class Point implements ArrayAccess * @param mixed $offset * @param mixed $value */ - public function offsetSet($offset, $value) + public function offsetSet($offset, $value): void { $this->coordinates[$offset] = $value; } @@ -117,8 +113,13 @@ class Point implements ArrayAccess /** * @param mixed $offset */ - public function offsetUnset($offset) + public function offsetUnset($offset): void { unset($this->coordinates[$offset]); } + + public function count(): int + { + return count($this->coordinates); + } } diff --git a/src/Phpml/Clustering/KMeans/Space.php b/src/Clustering/KMeans/Space.php similarity index 62% rename from src/Phpml/Clustering/KMeans/Space.php rename to src/Clustering/KMeans/Space.php index 2904e2f..e4207cc 100644 --- a/src/Phpml/Clustering/KMeans/Space.php +++ b/src/Clustering/KMeans/Space.php @@ -1,13 +1,13 @@ dimension = $dimension; } - /** - * @return array - */ - public function toArray() + public function toArray(): array { $points = []; + + /** @var Point $point */ foreach ($this as $point) { $points[] = $point->toArray(); } @@ -42,45 +38,40 @@ class Space extends SplObjectStorage } /** - * @param array $coordinates - * - * @return Point + * @param mixed $label */ - public function newPoint(array $coordinates) + public function newPoint(array $coordinates, $label = null): Point { - if (count($coordinates) != $this->dimension) { + if (count($coordinates) !== $this->dimension) { throw new LogicException('('.implode(',', $coordinates).') is not a point of this space'); } - return new Point($coordinates); + return new Point($coordinates, $label); } /** - * @param array $coordinates - * @param null $data + * @param mixed $label + * @param mixed $data */ - public function addPoint(array $coordinates, $data = null) + public function addPoint(array $coordinates, $label = null, $data = null): void { - return $this->attach($this->newPoint($coordinates), $data); + $this->attach($this->newPoint($coordinates, $label), $data); } /** * @param object $point - * @param null $data + * @param mixed $data */ - public function attach($point, $data = null) + public function attach($point, $data = null): void { if (!$point instanceof Point) { throw new InvalidArgumentException('can only attach points to spaces'); } - return parent::attach($point, $data); + parent::attach($point, $data); } - /** - * @return int - */ - public function getDimension() + public function getDimension(): int { return $this->dimension; } @@ -90,47 +81,44 @@ class Space extends SplObjectStorage */ public function getBoundaries() { - if (!count($this)) { + if (count($this) === 0) { return false; } $min = $this->newPoint(array_fill(0, $this->dimension, null)); $max = $this->newPoint(array_fill(0, $this->dimension, null)); + /** @var Point $point */ foreach ($this as $point) { for ($n = 0; $n < $this->dimension; ++$n) { - ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n]; - ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n]; + if ($min[$n] === null || $min[$n] > $point[$n]) { + $min[$n] = $point[$n]; + } + + if ($max[$n] === null || $max[$n] < $point[$n]) { + $max[$n] = $point[$n]; + } } } - return array($min, $max); + return [$min, $max]; } - /** - * @param Point $min - * @param Point $max - * - * @return Point - */ - public function getRandomPoint(Point $min, Point $max) + public function getRandomPoint(Point $min, Point $max): Point { $point = $this->newPoint(array_fill(0, $this->dimension, null)); for ($n = 0; $n < $this->dimension; ++$n) { - $point[$n] = rand($min[$n], $max[$n]); + $point[$n] = random_int($min[$n], $max[$n]); } return $point; } /** - * @param int $clustersNumber - * @param int $initMethod - * - * @return array|Cluster[] + * @return Cluster[] */ - public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM) + public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array { $clusters = $this->initializeClusters($clustersNumber, $initMethod); @@ -141,33 +129,34 @@ class Space extends SplObjectStorage } /** - * @param $clustersNumber - * @param $initMethod - * - * @return array|Cluster[] + * @return Cluster[] */ - protected function initializeClusters(int $clustersNumber, int $initMethod) + protected function initializeClusters(int $clustersNumber, int $initMethod): array { switch ($initMethod) { case KMeans::INIT_RANDOM: $clusters = $this->initializeRandomClusters($clustersNumber); + break; case KMeans::INIT_KMEANS_PLUS_PLUS: $clusters = $this->initializeKMPPClusters($clustersNumber); + break; + + default: + return []; } + $clusters[0]->attachAll($this); return $clusters; } /** - * @param $clusters - * - * @return bool + * @param Cluster[] $clusters */ - protected function iterate($clusters) + protected function iterate(array $clusters): bool { $convergence = true; @@ -178,9 +167,13 @@ class Space extends SplObjectStorage foreach ($cluster as $point) { $closest = $point->getClosest($clusters); + if ($closest === null) { + continue; + } + if ($closest !== $cluster) { - isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage(); - isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage(); + $attach[$closest] ?? $attach[$closest] = new SplObjectStorage(); + $detach[$cluster] ?? $detach[$cluster] = new SplObjectStorage(); $attach[$closest]->attach($point); $detach[$cluster]->attach($point); @@ -190,10 +183,12 @@ class Space extends SplObjectStorage } } + /** @var Cluster $cluster */ foreach ($attach as $cluster) { $cluster->attachAll($attach[$cluster]); } + /** @var Cluster $cluster */ foreach ($detach as $cluster) { $cluster->detachAll($detach[$cluster]); } @@ -206,14 +201,58 @@ class Space extends SplObjectStorage } /** - * @param int $clustersNumber - * - * @return array + * @return Cluster[] */ - private function initializeRandomClusters(int $clustersNumber) + protected function initializeKMPPClusters(int $clustersNumber): array { $clusters = []; - list($min, $max) = $this->getBoundaries(); + $this->rewind(); + + /** @var Point $current */ + $current = $this->current(); + + $clusters[] = new Cluster($this, $current->getCoordinates()); + + $distances = new SplObjectStorage(); + + for ($i = 1; $i < $clustersNumber; ++$i) { + $sum = 0; + /** @var Point $point */ + foreach ($this as $point) { + $closest = $point->getClosest($clusters); + if ($closest === null) { + continue; + } + + $distance = $point->getDistanceWith($closest); + $sum += $distances[$point] = $distance; + } + + $sum = random_int(0, (int) $sum); + /** @var Point $point */ + foreach ($this as $point) { + $sum -= $distances[$point]; + + if ($sum > 0) { + continue; + } + + $clusters[] = new Cluster($this, $point->getCoordinates()); + + break; + } + } + + return $clusters; + } + + /** + * @return Cluster[] + */ + private function initializeRandomClusters(int $clustersNumber): array + { + $clusters = []; + [$min, $max] = $this->getBoundaries(); for ($n = 0; $n < $clustersNumber; ++$n) { $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); @@ -221,39 +260,4 @@ class Space extends SplObjectStorage return $clusters; } - - /** - * @param int $clustersNumber - * - * @return array - */ - protected function initializeKMPPClusters(int $clustersNumber) - { - $clusters = []; - $position = rand(1, count($this)); - for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next()); - $clusters[] = new Cluster($this, $this->current()->getCoordinates()); - - $distances = new SplObjectStorage(); - - for ($i = 1; $i < $clustersNumber; ++$i) { - $sum = 0; - foreach ($this as $point) { - $distance = $point->getDistanceWith($point->getClosest($clusters)); - $sum += $distances[$point] = $distance; - } - - $sum = rand(0, (int) $sum); - foreach ($this as $point) { - if (($sum -= $distances[$point]) > 0) { - continue; - } - - $clusters[] = new Cluster($this, $point->getCoordinates()); - break; - } - } - - return $clusters; - } } diff --git a/src/CrossValidation/RandomSplit.php b/src/CrossValidation/RandomSplit.php new file mode 100644 index 0000000..8507ee5 --- /dev/null +++ b/src/CrossValidation/RandomSplit.php @@ -0,0 +1,26 @@ +getSamples(); + $labels = $dataset->getTargets(); + $datasetSize = count($samples); + $testCount = count($this->testSamples); + + for ($i = $datasetSize; $i > 0; --$i) { + $key = mt_rand(0, $datasetSize - 1); + $setName = (count($this->testSamples) - $testCount) / $datasetSize >= $testSize ? 'train' : 'test'; + + $this->{$setName.'Samples'}[] = $samples[$key]; + $this->{$setName.'Labels'}[] = $labels[$key]; + } + } +} diff --git a/src/CrossValidation/Split.php b/src/CrossValidation/Split.php new file mode 100644 index 0000000..e9d401c --- /dev/null +++ b/src/CrossValidation/Split.php @@ -0,0 +1,73 @@ += 1) { + throw new InvalidArgumentException('testsize must be between 0.0 and 1.0'); + } + + $this->seedGenerator($seed); + + $this->splitDataset($dataset, $testSize); + } + + public function getTrainSamples(): array + { + return $this->trainSamples; + } + + public function getTestSamples(): array + { + return $this->testSamples; + } + + public function getTrainLabels(): array + { + return $this->trainLabels; + } + + public function getTestLabels(): array + { + return $this->testLabels; + } + + abstract protected function splitDataset(Dataset $dataset, float $testSize): void; + + protected function seedGenerator(?int $seed = null): void + { + if ($seed === null) { + mt_srand(); + } else { + mt_srand($seed); + } + } +} diff --git a/src/CrossValidation/StratifiedRandomSplit.php b/src/CrossValidation/StratifiedRandomSplit.php new file mode 100644 index 0000000..3b3acc4 --- /dev/null +++ b/src/CrossValidation/StratifiedRandomSplit.php @@ -0,0 +1,49 @@ +splitByTarget($dataset); + + foreach ($datasets as $targetSet) { + parent::splitDataset($targetSet, $testSize); + } + } + + /** + * @return Dataset[] + */ + private function splitByTarget(Dataset $dataset): array + { + $targets = $dataset->getTargets(); + $samples = $dataset->getSamples(); + + $uniqueTargets = array_unique($targets); + /** @var array $split */ + $split = array_combine($uniqueTargets, array_fill(0, count($uniqueTargets), [])); + + foreach ($samples as $key => $sample) { + $split[$targets[$key]][] = $sample; + } + + return $this->createDatasets($uniqueTargets, $split); + } + + private function createDatasets(array $uniqueTargets, array $split): array + { + $datasets = []; + foreach ($uniqueTargets as $target) { + $datasets[$target] = new ArrayDataset($split[$target], array_fill(0, count($split[$target]), $target)); + } + + return $datasets; + } +} diff --git a/src/Dataset/ArrayDataset.php b/src/Dataset/ArrayDataset.php new file mode 100644 index 0000000..095c597 --- /dev/null +++ b/src/Dataset/ArrayDataset.php @@ -0,0 +1,62 @@ +samples = $samples; + $this->targets = $targets; + } + + public function getSamples(): array + { + return $this->samples; + } + + public function getTargets(): array + { + return $this->targets; + } + + /** + * @param int[] $columns + */ + public function removeColumns(array $columns): void + { + foreach ($this->samples as &$sample) { + $this->removeColumnsFromSample($sample, $columns); + } + } + + private function removeColumnsFromSample(array &$sample, array $columns): void + { + foreach ($columns as $index) { + unset($sample[$index]); + } + + $sample = array_values($sample); + } +} diff --git a/src/Dataset/CsvDataset.php b/src/Dataset/CsvDataset.php new file mode 100644 index 0000000..483664d --- /dev/null +++ b/src/Dataset/CsvDataset.php @@ -0,0 +1,52 @@ +columnNames = array_slice((array) $data, 0, $features); + } else { + $this->columnNames = range(0, $features - 1); + } + + $samples = $targets = []; + while ($data = fgetcsv($handle, $maxLineLength, $delimiter)) { + $samples[] = array_slice($data, 0, $features); + $targets[] = $data[$features]; + } + + fclose($handle); + + parent::__construct($samples, $targets); + } + + public function getColumnNames(): array + { + return $this->columnNames; + } +} diff --git a/src/Dataset/Dataset.php b/src/Dataset/Dataset.php new file mode 100644 index 0000000..0c775a9 --- /dev/null +++ b/src/Dataset/Dataset.php @@ -0,0 +1,12 @@ +scanRootPath($rootPath); + } + + private function scanRootPath(string $rootPath): void + { + $dirs = glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR); + + if ($dirs === false) { + throw new DatasetException(sprintf('An error occurred during directory "%s" scan', $rootPath)); + } + + foreach ($dirs as $dir) { + $this->scanDir($dir); + } + } + + private function scanDir(string $dir): void + { + $target = basename($dir); + + $files = glob($dir.DIRECTORY_SEPARATOR.'*'); + if ($files === false) { + return; + } + + foreach (array_filter($files, 'is_file') as $file) { + $this->samples[] = file_get_contents($file); + $this->targets[] = $target; + } + } +} diff --git a/src/Dataset/MnistDataset.php b/src/Dataset/MnistDataset.php new file mode 100644 index 0000000..59a3a26 --- /dev/null +++ b/src/Dataset/MnistDataset.php @@ -0,0 +1,101 @@ +samples = $this->readImages($imagePath); + $this->targets = $this->readLabels($labelPath); + + if (count($this->samples) !== count($this->targets)) { + throw new InvalidArgumentException('Must have the same number of images and labels'); + } + } + + private function readImages(string $imagePath): array + { + $stream = fopen($imagePath, 'rb'); + + if ($stream === false) { + throw new InvalidArgumentException('Could not open file: '.$imagePath); + } + + $images = []; + + try { + $header = fread($stream, 16); + + $fields = unpack('Nmagic/Nsize/Nrows/Ncols', (string) $header); + + if ($fields['magic'] !== self::MAGIC_IMAGE) { + throw new InvalidArgumentException('Invalid magic number: '.$imagePath); + } + + if ($fields['rows'] != self::IMAGE_ROWS) { + throw new InvalidArgumentException('Invalid number of image rows: '.$imagePath); + } + + if ($fields['cols'] != self::IMAGE_COLS) { + throw new InvalidArgumentException('Invalid number of image cols: '.$imagePath); + } + + for ($i = 0; $i < $fields['size']; $i++) { + $imageBytes = fread($stream, $fields['rows'] * $fields['cols']); + + // Convert to float between 0 and 1 + $images[] = array_map(function ($b) { + return $b / 255; + }, array_values(unpack('C*', (string) $imageBytes))); + } + } finally { + fclose($stream); + } + + return $images; + } + + private function readLabels(string $labelPath): array + { + $stream = fopen($labelPath, 'rb'); + + if ($stream === false) { + throw new InvalidArgumentException('Could not open file: '.$labelPath); + } + + $labels = []; + + try { + $header = fread($stream, 8); + + $fields = unpack('Nmagic/Nsize', (string) $header); + + if ($fields['magic'] !== self::MAGIC_LABEL) { + throw new InvalidArgumentException('Invalid magic number: '.$labelPath); + } + + $labels = fread($stream, $fields['size']); + } finally { + fclose($stream); + } + + return array_values(unpack('C*', (string) $labels)); + } +} diff --git a/src/Dataset/SvmDataset.php b/src/Dataset/SvmDataset.php new file mode 100644 index 0000000..4ac951c --- /dev/null +++ b/src/Dataset/SvmDataset.php @@ -0,0 +1,131 @@ + $maxIndex) { + $maxIndex = $index; + $sample = array_pad($sample, $maxIndex + 1, 0); + } + + $sample[$index] = $value; + } + + return [$sample, $target, $maxIndex]; + } + + private static function parseLine(string $line): array + { + $line = explode('#', $line, 2)[0]; + $line = rtrim($line); + $line = str_replace("\t", ' ', $line); + + return explode(' ', $line); + } + + private static function parseTargetColumn(string $column): float + { + if (!is_numeric($column)) { + throw new DatasetException(sprintf('Invalid target "%s".', $column)); + } + + return (float) $column; + } + + private static function parseFeatureColumn(string $column): array + { + $feature = explode(':', $column, 2); + if (count($feature) !== 2) { + throw new DatasetException(sprintf('Invalid value "%s".', $column)); + } + + $index = self::parseFeatureIndex($feature[0]); + $value = self::parseFeatureValue($feature[1]); + + return [$index, $value]; + } + + private static function parseFeatureIndex(string $index): int + { + if (!is_numeric($index) || !ctype_digit($index)) { + throw new DatasetException(sprintf('Invalid index "%s".', $index)); + } + + if ((int) $index < 1) { + throw new DatasetException(sprintf('Invalid index "%s".', $index)); + } + + return (int) $index - 1; + } + + private static function parseFeatureValue(string $value): float + { + if (!is_numeric($value)) { + throw new DatasetException(sprintf('Invalid value "%s".', $value)); + } + + return (float) $value; + } +} diff --git a/src/DimensionReduction/EigenTransformerBase.php b/src/DimensionReduction/EigenTransformerBase.php new file mode 100644 index 0000000..ec64163 --- /dev/null +++ b/src/DimensionReduction/EigenTransformerBase.php @@ -0,0 +1,94 @@ +getRealEigenvalues(); + $eigVects = $eig->getEigenvectors(); + + $totalEigVal = array_sum($eigVals); + // Sort eigenvalues in descending order + arsort($eigVals); + + $explainedVar = 0.0; + $vectors = []; + $values = []; + foreach ($eigVals as $i => $eigVal) { + $explainedVar += $eigVal / $totalEigVal; + $vectors[] = $eigVects[$i]; + $values[] = $eigVal; + + if ($this->numFeatures !== null) { + if (count($vectors) == $this->numFeatures) { + break; + } + } else { + if ($explainedVar >= $this->totalVariance) { + break; + } + } + } + + $this->eigValues = $values; + $this->eigVectors = $vectors; + } + + /** + * Returns the reduced data + */ + protected function reduce(array $data): array + { + $m1 = new Matrix($data); + $m2 = new Matrix($this->eigVectors); + + return $m1->multiply($m2->transpose())->toArray(); + } +} diff --git a/src/DimensionReduction/KernelPCA.php b/src/DimensionReduction/KernelPCA.php new file mode 100644 index 0000000..beeaba4 --- /dev/null +++ b/src/DimensionReduction/KernelPCA.php @@ -0,0 +1,234 @@ +
+ * Example: $kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0); + * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0.
+ * This transformation will return the same number of rows with only 2 columns. + * + * @param float $totalVariance Total variance to be preserved if numFeatures is not given + * @param int $numFeatures Number of columns to be returned + * @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels + * + * @throws InvalidArgumentException + */ + public function __construct(int $kernel = self::KERNEL_RBF, ?float $totalVariance = null, ?int $numFeatures = null, ?float $gamma = null) + { + if (!in_array($kernel, [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR], true)) { + throw new InvalidArgumentException('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian'); + } + + parent::__construct($totalVariance, $numFeatures); + + $this->kernel = $kernel; + $this->gamma = $gamma; + } + + /** + * Takes a data and returns a lower dimensional version + * of this data while preserving $totalVariance or $numFeatures.
+ * $data is an n-by-m matrix and returned array is + * n-by-k matrix where k <= m + */ + public function fit(array $data): array + { + $numRows = count($data); + $this->data = $data; + + if ($this->gamma === null) { + $this->gamma = 1.0 / $numRows; + } + + $matrix = $this->calculateKernelMatrix($this->data, $numRows); + $matrix = $this->centerMatrix($matrix, $numRows); + + $this->eigenDecomposition($matrix); + + $this->fit = true; + + return Matrix::transposeArray($this->eigVectors); + } + + /** + * Transforms the given sample to a lower dimensional vector by using + * the variables obtained during the last run of fit. + * + * @throws InvalidArgumentException + * @throws InvalidOperationException + */ + public function transform(array $sample): array + { + if (!$this->fit) { + throw new InvalidOperationException('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first'); + } + + if (is_array($sample[0])) { + throw new InvalidArgumentException('KernelPCA::transform() accepts only one-dimensional arrays'); + } + + $pairs = $this->getDistancePairs($sample); + + return $this->projectSample($pairs); + } + + /** + * Calculates similarity matrix by use of selected kernel function
+ * An n-by-m matrix is given and an n-by-n matrix is returned + */ + protected function calculateKernelMatrix(array $data, int $numRows): array + { + $kernelFunc = $this->getKernel(); + + $matrix = []; + for ($i = 0; $i < $numRows; ++$i) { + for ($k = 0; $k < $numRows; ++$k) { + if ($i <= $k) { + $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]); + } else { + $matrix[$i][$k] = $matrix[$k][$i]; + } + } + } + + return $matrix; + } + + /** + * Kernel matrix is centered in its original space by using the following + * conversion: + * + * K′ = K − N.K − K.N + N.K.N where N is n-by-n matrix filled with 1/n + */ + protected function centerMatrix(array $matrix, int $n): array + { + $N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n)); + $N = new Matrix($N, false); + $K = new Matrix($matrix, false); + + // K.N (This term is repeated so we cache it once) + $K_N = $K->multiply($N); + // N.K + $N_K = $N->multiply($K); + // N.K.N + $N_K_N = $N->multiply($K_N); + + return $K->subtract($N_K) + ->subtract($K_N) + ->add($N_K_N) + ->toArray(); + } + + /** + * Returns the callable kernel function + * + * @throws \Exception + */ + protected function getKernel(): Closure + { + switch ($this->kernel) { + case self::KERNEL_LINEAR: + // k(x,y) = xT.y + return function ($x, $y) { + return Matrix::dot($x, $y)[0]; + }; + case self::KERNEL_RBF: + // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance + $dist = new Euclidean(); + + return function ($x, $y) use ($dist): float { + return exp(-$this->gamma * $dist->sqDistance($x, $y)); + }; + + case self::KERNEL_SIGMOID: + // k(x,y)=tanh(γ.xT.y+c0) where c0=1 + return function ($x, $y): float { + $res = Matrix::dot($x, $y)[0] + 1.0; + + return tanh((float) $this->gamma * $res); + }; + + case self::KERNEL_LAPLACIAN: + // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance + $dist = new Manhattan(); + + return function ($x, $y) use ($dist): float { + return exp(-$this->gamma * $dist->distance($x, $y)); + }; + + default: + // Not reached + throw new InvalidArgumentException(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel)); + } + } + + protected function getDistancePairs(array $sample): array + { + $kernel = $this->getKernel(); + + $pairs = []; + foreach ($this->data as $row) { + $pairs[] = $kernel($row, $sample); + } + + return $pairs; + } + + protected function projectSample(array $pairs): array + { + // Normalize eigenvectors by eig = eigVectors / eigValues + $func = function ($eigVal, $eigVect) { + $m = new Matrix($eigVect, false); + $a = $m->divideByScalar($eigVal)->toArray(); + + return $a[0]; + }; + $eig = array_map($func, $this->eigValues, $this->eigVectors); + + // return k.dot(eig) + return Matrix::dot($pairs, $eig); + } +} diff --git a/src/DimensionReduction/LDA.php b/src/DimensionReduction/LDA.php new file mode 100644 index 0000000..68ab0cd --- /dev/null +++ b/src/DimensionReduction/LDA.php @@ -0,0 +1,223 @@ +
+ * The algorithm can be initialized by speciyfing + * either with the totalVariance(a value between 0.1 and 0.99) + * or numFeatures (number of features in the dataset) to be preserved. + * + * @param float|null $totalVariance Total explained variance to be preserved + * @param int|null $numFeatures Number of features to be preserved + * + * @throws InvalidArgumentException + */ + public function __construct(?float $totalVariance = null, ?int $numFeatures = null) + { + if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { + throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99'); + } + + if ($numFeatures !== null && $numFeatures <= 0) { + throw new InvalidArgumentException('Number of features to be preserved should be greater than 0'); + } + + if (($totalVariance !== null) === ($numFeatures !== null)) { + throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm'); + } + + if ($numFeatures !== null) { + $this->numFeatures = $numFeatures; + } + + if ($totalVariance !== null) { + $this->totalVariance = $totalVariance; + } + } + + /** + * Trains the algorithm to transform the given data to a lower dimensional space. + */ + public function fit(array $data, array $classes): array + { + $this->labels = $this->getLabels($classes); + $this->means = $this->calculateMeans($data, $classes); + + $sW = $this->calculateClassVar($data, $classes); + $sB = $this->calculateClassCov(); + + $S = $sW->inverse()->multiply($sB); + $this->eigenDecomposition($S->toArray()); + + $this->fit = true; + + return $this->reduce($data); + } + + /** + * Transforms the given sample to a lower dimensional vector by using + * the eigenVectors obtained in the last run of fit. + * + * @throws InvalidOperationException + */ + public function transform(array $sample): array + { + if (!$this->fit) { + throw new InvalidOperationException('LDA has not been fitted with respect to original dataset, please run LDA::fit() first'); + } + + if (!is_array($sample[0])) { + $sample = [$sample]; + } + + return $this->reduce($sample); + } + + /** + * Returns unique labels in the dataset + */ + protected function getLabels(array $classes): array + { + $counts = array_count_values($classes); + + return array_keys($counts); + } + + /** + * Calculates mean of each column for each class and returns + * n by m matrix where n is number of labels and m is number of columns + */ + protected function calculateMeans(array $data, array $classes): array + { + $means = []; + $counts = []; + $overallMean = array_fill(0, count($data[0]), 0.0); + + foreach ($data as $index => $row) { + $label = array_search($classes[$index], $this->labels, true); + + foreach ($row as $col => $val) { + if (!isset($means[$label][$col])) { + $means[$label][$col] = 0.0; + } + + $means[$label][$col] += $val; + $overallMean[$col] += $val; + } + + if (!isset($counts[$label])) { + $counts[$label] = 0; + } + + ++$counts[$label]; + } + + foreach ($means as $index => $row) { + foreach ($row as $col => $sum) { + $means[$index][$col] = $sum / $counts[$index]; + } + } + + // Calculate overall mean of the dataset for each column + $numElements = array_sum($counts); + $map = function ($el) use ($numElements) { + return $el / $numElements; + }; + $this->overallMean = array_map($map, $overallMean); + $this->counts = $counts; + + return $means; + } + + /** + * Returns in-class scatter matrix for each class, which + * is a n by m matrix where n is number of classes and + * m is number of columns + */ + protected function calculateClassVar(array $data, array $classes): Matrix + { + // s is an n (number of classes) by m (number of column) matrix + $s = array_fill(0, count($data[0]), array_fill(0, count($data[0]), 0)); + $sW = new Matrix($s, false); + + foreach ($data as $index => $row) { + $label = array_search($classes[$index], $this->labels, true); + $means = $this->means[$label]; + + $row = $this->calculateVar($row, $means); + + $sW = $sW->add($row); + } + + return $sW; + } + + /** + * Returns between-class scatter matrix for each class, which + * is an n by m matrix where n is number of classes and + * m is number of columns + */ + protected function calculateClassCov(): Matrix + { + // s is an n (number of classes) by m (number of column) matrix + $s = array_fill(0, count($this->overallMean), array_fill(0, count($this->overallMean), 0)); + $sB = new Matrix($s, false); + + foreach ($this->means as $index => $classMeans) { + $row = $this->calculateVar($classMeans, $this->overallMean); + $N = $this->counts[$index]; + $sB = $sB->add($row->multiplyByScalar($N)); + } + + return $sB; + } + + /** + * Returns the result of the calculation (x - m)T.(x - m) + */ + protected function calculateVar(array $row, array $means): Matrix + { + $x = new Matrix($row, false); + $m = new Matrix($means, false); + $diff = $x->subtract($m); + + return $diff->transpose()->multiply($diff); + } +} diff --git a/src/DimensionReduction/PCA.php b/src/DimensionReduction/PCA.php new file mode 100644 index 0000000..5556558 --- /dev/null +++ b/src/DimensionReduction/PCA.php @@ -0,0 +1,131 @@ + + * + * @param float $totalVariance Total explained variance to be preserved + * @param int $numFeatures Number of features to be preserved + * + * @throws InvalidArgumentException + */ + public function __construct(?float $totalVariance = null, ?int $numFeatures = null) + { + if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { + throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99'); + } + + if ($numFeatures !== null && $numFeatures <= 0) { + throw new InvalidArgumentException('Number of features to be preserved should be greater than 0'); + } + + if (($totalVariance !== null) === ($numFeatures !== null)) { + throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm'); + } + + if ($numFeatures !== null) { + $this->numFeatures = $numFeatures; + } + + if ($totalVariance !== null) { + $this->totalVariance = $totalVariance; + } + } + + /** + * Takes a data and returns a lower dimensional version + * of this data while preserving $totalVariance or $numFeatures.
+ * $data is an n-by-m matrix and returned array is + * n-by-k matrix where k <= m + */ + public function fit(array $data): array + { + $n = count($data[0]); + + $data = $this->normalize($data, $n); + + $covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0)); + + $this->eigenDecomposition($covMatrix); + + $this->fit = true; + + return $this->reduce($data); + } + + /** + * Transforms the given sample to a lower dimensional vector by using + * the eigenVectors obtained in the last run of fit. + * + * @throws InvalidOperationException + */ + public function transform(array $sample): array + { + if (!$this->fit) { + throw new InvalidOperationException('PCA has not been fitted with respect to original dataset, please run PCA::fit() first'); + } + + if (!is_array($sample[0])) { + $sample = [$sample]; + } + + $sample = $this->normalize($sample, count($sample[0])); + + return $this->reduce($sample); + } + + protected function calculateMeans(array $data, int $n): void + { + // Calculate means for each dimension + $this->means = []; + for ($i = 0; $i < $n; ++$i) { + $column = array_column($data, $i); + $this->means[] = Mean::arithmetic($column); + } + } + + /** + * Normalization of the data includes subtracting mean from + * each dimension therefore dimensions will be centered to zero + */ + protected function normalize(array $data, int $n): array + { + if (count($this->means) === 0) { + $this->calculateMeans($data, $n); + } + + // Normalize data + foreach (array_keys($data) as $i) { + for ($k = 0; $k < $n; ++$k) { + $data[$i][$k] -= $this->means[$k]; + } + } + + return $data; + } +} diff --git a/src/Estimator.php b/src/Estimator.php new file mode 100644 index 0000000..a054108 --- /dev/null +++ b/src/Estimator.php @@ -0,0 +1,15 @@ +stopWords = array_fill_keys($stopWords, true); + } + + public function isStopWord(string $token): bool + { + return isset($this->stopWords[$token]); + } + + public static function factory(string $language = 'English'): self + { + $className = __NAMESPACE__."\\StopWords\\${language}"; + + if (!class_exists($className)) { + throw new InvalidArgumentException(sprintf('Can\'t find "%s" language for StopWords', $language)); + } + + return new $className(); + } +} diff --git a/src/FeatureExtraction/StopWords/English.php b/src/FeatureExtraction/StopWords/English.php new file mode 100644 index 0000000..fab079b --- /dev/null +++ b/src/FeatureExtraction/StopWords/English.php @@ -0,0 +1,33 @@ +stopWords); + } +} diff --git a/src/FeatureExtraction/StopWords/French.php b/src/FeatureExtraction/StopWords/French.php new file mode 100644 index 0000000..96cc110 --- /dev/null +++ b/src/FeatureExtraction/StopWords/French.php @@ -0,0 +1,29 @@ +stopWords); + } +} diff --git a/src/FeatureExtraction/StopWords/Polish.php b/src/FeatureExtraction/StopWords/Polish.php new file mode 100644 index 0000000..e452ebf --- /dev/null +++ b/src/FeatureExtraction/StopWords/Polish.php @@ -0,0 +1,30 @@ +stopWords); + } +} diff --git a/src/FeatureExtraction/StopWords/Russian.php b/src/FeatureExtraction/StopWords/Russian.php new file mode 100644 index 0000000..d26902d --- /dev/null +++ b/src/FeatureExtraction/StopWords/Russian.php @@ -0,0 +1,30 @@ +stopWords); + } +} diff --git a/src/FeatureExtraction/TfIdfTransformer.php b/src/FeatureExtraction/TfIdfTransformer.php new file mode 100644 index 0000000..34f7533 --- /dev/null +++ b/src/FeatureExtraction/TfIdfTransformer.php @@ -0,0 +1,54 @@ + 0) { + $this->fit($samples); + } + } + + public function fit(array $samples, ?array $targets = null): void + { + $this->countTokensFrequency($samples); + + $count = count($samples); + foreach ($this->idf as &$value) { + $value = log((float) ($count / $value), 10.0); + } + } + + public function transform(array &$samples, ?array &$targets = null): void + { + foreach ($samples as &$sample) { + foreach ($sample as $index => &$feature) { + $feature *= $this->idf[$index]; + } + } + } + + private function countTokensFrequency(array $samples): void + { + $this->idf = array_fill_keys(array_keys($samples[0]), 0); + + foreach ($samples as $sample) { + foreach ($sample as $index => $count) { + if ($count > 0) { + ++$this->idf[$index]; + } + } + } + } +} diff --git a/src/FeatureExtraction/TokenCountVectorizer.php b/src/FeatureExtraction/TokenCountVectorizer.php new file mode 100644 index 0000000..5cc5e8d --- /dev/null +++ b/src/FeatureExtraction/TokenCountVectorizer.php @@ -0,0 +1,166 @@ +tokenizer = $tokenizer; + $this->stopWords = $stopWords; + $this->minDF = $minDF; + } + + public function fit(array $samples, ?array $targets = null): void + { + $this->buildVocabulary($samples); + } + + public function transform(array &$samples, ?array &$targets = null): void + { + array_walk($samples, function (string &$sample): void { + $this->transformSample($sample); + }); + + $this->checkDocumentFrequency($samples); + } + + public function getVocabulary(): array + { + return array_flip($this->vocabulary); + } + + private function buildVocabulary(array &$samples): void + { + foreach ($samples as $sample) { + $tokens = $this->tokenizer->tokenize($sample); + foreach ($tokens as $token) { + $this->addTokenToVocabulary($token); + } + } + } + + private function transformSample(string &$sample): void + { + $counts = []; + $tokens = $this->tokenizer->tokenize($sample); + + foreach ($tokens as $token) { + $index = $this->getTokenIndex($token); + if ($index !== false) { + $this->updateFrequency($token); + if (!isset($counts[$index])) { + $counts[$index] = 0; + } + + ++$counts[$index]; + } + } + + foreach ($this->vocabulary as $index) { + if (!isset($counts[$index])) { + $counts[$index] = 0; + } + } + + ksort($counts); + + $sample = $counts; + } + + /** + * @return int|bool + */ + private function getTokenIndex(string $token) + { + if ($this->isStopWord($token)) { + return false; + } + + return $this->vocabulary[$token] ?? false; + } + + private function addTokenToVocabulary(string $token): void + { + if ($this->isStopWord($token)) { + return; + } + + if (!isset($this->vocabulary[$token])) { + $this->vocabulary[$token] = count($this->vocabulary); + } + } + + private function isStopWord(string $token): bool + { + return $this->stopWords !== null && $this->stopWords->isStopWord($token); + } + + private function updateFrequency(string $token): void + { + if (!isset($this->frequencies[$token])) { + $this->frequencies[$token] = 0; + } + + ++$this->frequencies[$token]; + } + + private function checkDocumentFrequency(array &$samples): void + { + if ($this->minDF > 0) { + $beyondMinimum = $this->getBeyondMinimumIndexes(count($samples)); + foreach ($samples as &$sample) { + $this->resetBeyondMinimum($sample, $beyondMinimum); + } + } + } + + private function resetBeyondMinimum(array &$sample, array $beyondMinimum): void + { + foreach ($beyondMinimum as $index) { + $sample[$index] = 0; + } + } + + private function getBeyondMinimumIndexes(int $samplesCount): array + { + $indexes = []; + foreach ($this->frequencies as $token => $frequency) { + if (($frequency / $samplesCount) < $this->minDF) { + $indexes[] = $this->getTokenIndex((string) $token); + } + } + + return $indexes; + } +} diff --git a/src/FeatureSelection/ScoringFunction.php b/src/FeatureSelection/ScoringFunction.php new file mode 100644 index 0000000..4c925f6 --- /dev/null +++ b/src/FeatureSelection/ScoringFunction.php @@ -0,0 +1,10 @@ + $sample) { + $grouped[$targets[$index]][] = $sample; + } + + return ANOVA::oneWayF(array_values($grouped)); + } +} diff --git a/src/FeatureSelection/ScoringFunction/UnivariateLinearRegression.php b/src/FeatureSelection/ScoringFunction/UnivariateLinearRegression.php new file mode 100644 index 0000000..13bdc99 --- /dev/null +++ b/src/FeatureSelection/ScoringFunction/UnivariateLinearRegression.php @@ -0,0 +1,81 @@ +center = $center; + } + + public function score(array $samples, array $targets): array + { + if ($this->center) { + $this->centerTargets($targets); + $this->centerSamples($samples); + } + + $correlations = []; + foreach (array_keys($samples[0]) as $index) { + $featureColumn = array_column($samples, $index); + $correlations[$index] = + Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm() + / (new Matrix($targets, false))->frobeniusNorm(); + } + + $degreesOfFreedom = count($targets) - ($this->center ? 2 : 1); + + return array_map(function (float $correlation) use ($degreesOfFreedom): float { + return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom; + }, $correlations); + } + + private function centerTargets(array &$targets): void + { + $mean = Mean::arithmetic($targets); + array_walk($targets, function (&$target) use ($mean): void { + $target -= $mean; + }); + } + + private function centerSamples(array &$samples): void + { + $means = []; + foreach ($samples[0] as $index => $feature) { + $means[$index] = Mean::arithmetic(array_column($samples, $index)); + } + + foreach ($samples as &$sample) { + foreach ($sample as $index => &$feature) { + $feature -= $means[$index]; + } + } + } +} diff --git a/src/FeatureSelection/SelectKBest.php b/src/FeatureSelection/SelectKBest.php new file mode 100644 index 0000000..16e5278 --- /dev/null +++ b/src/FeatureSelection/SelectKBest.php @@ -0,0 +1,78 @@ +scoringFunction = $scoringFunction; + $this->k = $k; + } + + public function fit(array $samples, ?array $targets = null): void + { + if ($targets === null || count($targets) === 0) { + throw new InvalidArgumentException('The array has zero elements'); + } + + $this->scores = $sorted = $this->scoringFunction->score($samples, $targets); + if ($this->k >= count($sorted)) { + return; + } + + arsort($sorted); + $this->keepColumns = array_slice($sorted, 0, $this->k, true); + } + + public function transform(array &$samples, ?array &$targets = null): void + { + if ($this->keepColumns === null) { + return; + } + + foreach ($samples as &$sample) { + $sample = array_values(array_intersect_key($sample, $this->keepColumns)); + } + } + + public function scores(): array + { + if ($this->scores === null) { + throw new InvalidOperationException('SelectKBest require to fit first to get scores'); + } + + return $this->scores; + } +} diff --git a/src/FeatureSelection/VarianceThreshold.php b/src/FeatureSelection/VarianceThreshold.php new file mode 100644 index 0000000..0c3154e --- /dev/null +++ b/src/FeatureSelection/VarianceThreshold.php @@ -0,0 +1,57 @@ +threshold = $threshold; + } + + public function fit(array $samples, ?array $targets = null): void + { + $this->variances = array_map(static function (array $column): float { + return Variance::population($column); + }, Matrix::transposeArray($samples)); + + foreach ($this->variances as $column => $variance) { + if ($variance > $this->threshold) { + $this->keepColumns[$column] = true; + } + } + } + + public function transform(array &$samples, ?array &$targets = null): void + { + foreach ($samples as &$sample) { + $sample = array_values(array_intersect_key($sample, $this->keepColumns)); + } + } +} diff --git a/src/FeatureUnion.php b/src/FeatureUnion.php new file mode 100644 index 0000000..645a421 --- /dev/null +++ b/src/FeatureUnion.php @@ -0,0 +1,72 @@ +pipelines = array_map(static function (Pipeline $pipeline): Pipeline { + return $pipeline; + }, $pipelines); + } + + public function fit(array $samples, ?array $targets = null): void + { + $originSamples = $samples; + foreach ($this->pipelines as $pipeline) { + foreach ($pipeline->getTransformers() as $transformer) { + $transformer->fit($samples, $targets); + $transformer->transform($samples, $targets); + } + $samples = $originSamples; + } + } + + public function transform(array &$samples, ?array &$targets = null): void + { + $this->transformSamples($samples, $targets); + } + + public function fitAndTransform(array &$samples, ?array &$targets = null): void + { + $this->transformSamples($samples, $targets, true); + } + + private function transformSamples(array &$samples, ?array &$targets = null, bool $fit = false): void + { + $union = []; + $originSamples = $samples; + foreach ($this->pipelines as $pipeline) { + foreach ($pipeline->getTransformers() as $transformer) { + if ($fit) { + $transformer->fit($samples, $targets); + } + $transformer->transform($samples, $targets); + } + + foreach ($samples as $index => $sample) { + $union[$index] = array_merge($union[$index] ?? [], is_array($sample) ? $sample : [$sample]); + } + $samples = $originSamples; + } + + $samples = $union; + } +} diff --git a/src/Helper/OneVsRest.php b/src/Helper/OneVsRest.php new file mode 100644 index 0000000..691fb64 --- /dev/null +++ b/src/Helper/OneVsRest.php @@ -0,0 +1,169 @@ +reset(); + + $this->trainByLabel($samples, $targets); + } + + /** + * Resets the classifier and the vars internally used by OneVsRest to create multiple classifiers. + */ + public function reset(): void + { + $this->classifiers = []; + $this->allLabels = []; + $this->costValues = []; + + $this->resetBinary(); + } + + protected function trainByLabel(array $samples, array $targets, array $allLabels = []): void + { + // Overwrites the current value if it exist. $allLabels must be provided for each partialTrain run. + $this->allLabels = count($allLabels) === 0 ? array_keys(array_count_values($targets)) : $allLabels; + sort($this->allLabels, SORT_STRING); + + // If there are only two targets, then there is no need to perform OvR + if (count($this->allLabels) === 2) { + // Init classifier if required. + if (count($this->classifiers) === 0) { + $this->classifiers[0] = $this->getClassifierCopy(); + } + + $this->classifiers[0]->trainBinary($samples, $targets, $this->allLabels); + } else { + // Train a separate classifier for each label and memorize them + + foreach ($this->allLabels as $label) { + // Init classifier if required. + if (!isset($this->classifiers[$label])) { + $this->classifiers[$label] = $this->getClassifierCopy(); + } + + [$binarizedTargets, $classifierLabels] = $this->binarizeTargets($targets, $label); + $this->classifiers[$label]->trainBinary($samples, $binarizedTargets, $classifierLabels); + } + } + + // If the underlying classifier is capable of giving the cost values + // during the training, then assign it to the relevant variable + // Adding just the first classifier cost values to avoid complex average calculations. + $classifierref = reset($this->classifiers); + if (method_exists($classifierref, 'getCostValues')) { + $this->costValues = $classifierref->getCostValues(); + } + } + + /** + * Returns an instance of the current class after cleaning up OneVsRest stuff. + */ + protected function getClassifierCopy(): Classifier + { + // Clone the current classifier, so that + // we don't mess up its variables while training + // multiple instances of this classifier + $classifier = clone $this; + $classifier->reset(); + + return $classifier; + } + + /** + * @return mixed + */ + protected function predictSample(array $sample) + { + if (count($this->allLabels) === 2) { + return $this->classifiers[0]->predictSampleBinary($sample); + } + + $probs = []; + + foreach ($this->classifiers as $label => $predictor) { + $probs[$label] = $predictor->predictProbability($sample, $label); + } + + arsort($probs, SORT_NUMERIC); + + return key($probs); + } + + /** + * Each classifier should implement this method instead of train(samples, targets) + */ + abstract protected function trainBinary(array $samples, array $targets, array $labels); + + /** + * To be overwritten by OneVsRest classifiers. + */ + abstract protected function resetBinary(): void; + + /** + * Each classifier that make use of OvR approach should be able to + * return a probability for a sample to belong to the given label. + * + * @return mixed + */ + abstract protected function predictProbability(array $sample, string $label); + + /** + * Each classifier should implement this method instead of predictSample() + * + * @return mixed + */ + abstract protected function predictSampleBinary(array $sample); + + /** + * Groups all targets into two groups: Targets equal to + * the given label and the others + * + * $targets is not passed by reference nor contains objects so this method + * changes will not affect the caller $targets array. + * + * @param mixed $label + * + * @return array Binarized targets and target's labels + */ + private function binarizeTargets(array $targets, $label): array + { + $notLabel = "not_${label}"; + foreach ($targets as $key => $target) { + $targets[$key] = $target == $label ? $label : $notLabel; + } + + $labels = [$label, $notLabel]; + + return [$targets, $labels]; + } +} diff --git a/src/Helper/Optimizer/ConjugateGradient.php b/src/Helper/Optimizer/ConjugateGradient.php new file mode 100644 index 0000000..d7c064f --- /dev/null +++ b/src/Helper/Optimizer/ConjugateGradient.php @@ -0,0 +1,304 @@ +samples = $samples; + $this->targets = $targets; + $this->gradientCb = $gradientCb; + $this->sampleCount = count($samples); + $this->costValues = []; + + $d = MP::muls($this->gradient($this->theta), -1); + + for ($i = 0; $i < $this->maxIterations; ++$i) { + // Obtain α that minimizes f(θ + α.d) + $alpha = $this->getAlpha($d); + + // θ(k+1) = θ(k) + α.d + $thetaNew = $this->getNewTheta($alpha, $d); + + // β = ||∇f(x(k+1))||² ∕ ||∇f(x(k))||² + $beta = $this->getBeta($thetaNew); + + // d(k+1) =–∇f(x(k+1)) + β(k).d(k) + $d = $this->getNewDirection($thetaNew, $beta, $d); + + // Save values for the next iteration + $oldTheta = $this->theta; + $this->costValues[] = $this->cost($thetaNew); + + $this->theta = $thetaNew; + if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) { + break; + } + } + + $this->clear(); + + return $this->theta; + } + + /** + * Executes the callback function for the problem and returns + * sum of the gradient for all samples & targets. + */ + protected function gradient(array $theta): array + { + [, $updates, $penalty] = parent::gradient($theta); + + // Calculate gradient for each dimension + $gradient = []; + for ($i = 0; $i <= $this->dimensions; ++$i) { + if ($i === 0) { + $gradient[$i] = array_sum($updates); + } else { + $col = array_column($this->samples, $i - 1); + $error = 0; + foreach ($col as $index => $val) { + $error += $val * $updates[$index]; + } + + $gradient[$i] = $error + $penalty * $theta[$i]; + } + } + + return $gradient; + } + + /** + * Returns the value of f(x) for given solution + */ + protected function cost(array $theta): float + { + [$cost] = parent::gradient($theta); + + return array_sum($cost) / (int) $this->sampleCount; + } + + /** + * Calculates alpha that minimizes the function f(θ + α.d) + * by performing a line search that does not rely upon the derivation. + * + * There are several alternatives for this function. For now, we + * prefer a method inspired from the bisection method for its simplicity. + * This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01 + * + * Algorithm as follows: + * a) Probe a small alpha (0.0001) and calculate cost function + * b) Probe a larger alpha (0.01) and calculate cost function + * b-1) If cost function decreases, continue enlarging alpha + * b-2) If cost function increases, take the midpoint and try again + */ + protected function getAlpha(array $d): float + { + $small = MP::muls($d, 0.0001); + $large = MP::muls($d, 0.01); + + // Obtain θ + α.d for two initial values, x0 and x1 + $x0 = MP::add($this->theta, $small); + $x1 = MP::add($this->theta, $large); + + $epsilon = 0.0001; + $iteration = 0; + do { + $fx1 = $this->cost($x1); + $fx0 = $this->cost($x0); + + // If the difference between two values is small enough + // then break the loop + if (abs($fx1 - $fx0) <= $epsilon) { + break; + } + + if ($fx1 < $fx0) { + $x0 = $x1; + $x1 = MP::adds($x1, 0.01); // Enlarge second + } else { + $x1 = MP::divs(MP::add($x1, $x0), 2.0); + } // Get to the midpoint + + $error = $fx1 / $this->dimensions; + } while ($error <= $epsilon || $iteration++ < 10); + + // Return α = θ / d + // For accuracy, choose a dimension which maximize |d[i]| + $imax = 0; + for ($i = 1; $i <= $this->dimensions; ++$i) { + if (abs($d[$i]) > abs($d[$imax])) { + $imax = $i; + } + } + + if ($d[$imax] == 0) { + return $x1[$imax] - $this->theta[$imax]; + } + + return ($x1[$imax] - $this->theta[$imax]) / $d[$imax]; + } + + /** + * Calculates new set of solutions with given alpha (for each θ(k)) and + * gradient direction. + * + * θ(k+1) = θ(k) + α.d + */ + protected function getNewTheta(float $alpha, array $d): array + { + return MP::add($this->theta, MP::muls($d, $alpha)); + } + + /** + * Calculates new beta (β) for given set of solutions by using + * Fletcher–Reeves method. + * + * β = ||f(x(k+1))||² ∕ ||f(x(k))||² + * + * See: + * R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154. + */ + protected function getBeta(array $newTheta): float + { + $gNew = $this->gradient($newTheta); + $gOld = $this->gradient($this->theta); + $dNew = 0; + $dOld = 1e-100; + for ($i = 0; $i <= $this->dimensions; ++$i) { + $dNew += $gNew[$i] ** 2; + $dOld += $gOld[$i] ** 2; + } + + return $dNew / $dOld; + } + + /** + * Calculates the new conjugate direction + * + * d(k+1) =–∇f(x(k+1)) + β(k).d(k) + */ + protected function getNewDirection(array $theta, float $beta, array $d): array + { + $grad = $this->gradient($theta); + + return MP::add(MP::muls($grad, -1), MP::muls($d, $beta)); + } +} + +/** + * Handles element-wise vector operations between vector-vector + * and vector-scalar variables + */ +class MP +{ + /** + * Element-wise multiplication of two vectors of the same size + */ + public static function mul(array $m1, array $m2): array + { + $res = []; + foreach ($m1 as $i => $val) { + $res[] = $val * $m2[$i]; + } + + return $res; + } + + /** + * Element-wise division of two vectors of the same size + */ + public static function div(array $m1, array $m2): array + { + $res = []; + foreach ($m1 as $i => $val) { + $res[] = $val / $m2[$i]; + } + + return $res; + } + + /** + * Element-wise addition of two vectors of the same size + */ + public static function add(array $m1, array $m2, int $mag = 1): array + { + $res = []; + foreach ($m1 as $i => $val) { + $res[] = $val + $mag * $m2[$i]; + } + + return $res; + } + + /** + * Element-wise subtraction of two vectors of the same size + */ + public static function sub(array $m1, array $m2): array + { + return self::add($m1, $m2, -1); + } + + /** + * Element-wise multiplication of a vector with a scalar + */ + public static function muls(array $m1, float $m2): array + { + $res = []; + foreach ($m1 as $val) { + $res[] = $val * $m2; + } + + return $res; + } + + /** + * Element-wise division of a vector with a scalar + */ + public static function divs(array $m1, float $m2): array + { + $res = []; + foreach ($m1 as $val) { + $res[] = $val / ($m2 + 1e-32); + } + + return $res; + } + + /** + * Element-wise addition of a vector with a scalar + */ + public static function adds(array $m1, float $m2, int $mag = 1): array + { + $res = []; + foreach ($m1 as $val) { + $res[] = $val + $mag * $m2; + } + + return $res; + } + + /** + * Element-wise subtraction of a vector with a scalar + */ + public static function subs(array $m1, float $m2): array + { + return self::adds($m1, $m2, -1); + } +} diff --git a/src/Helper/Optimizer/GD.php b/src/Helper/Optimizer/GD.php new file mode 100644 index 0000000..40c65c7 --- /dev/null +++ b/src/Helper/Optimizer/GD.php @@ -0,0 +1,111 @@ +samples = $samples; + $this->targets = $targets; + $this->gradientCb = $gradientCb; + $this->sampleCount = count($this->samples); + + // Batch learning is executed: + $currIter = 0; + $this->costValues = []; + while ($this->maxIterations > $currIter++) { + $theta = $this->theta; + + // Calculate update terms for each sample + [$errors, $updates, $totalPenalty] = $this->gradient($theta); + + $this->updateWeightsWithUpdates($updates, $totalPenalty); + + $this->costValues[] = array_sum($errors) / (int) $this->sampleCount; + + if ($this->earlyStop($theta)) { + break; + } + } + + $this->clear(); + + return $this->theta; + } + + /** + * Calculates gradient, cost function and penalty term for each sample + * then returns them as an array of values + */ + protected function gradient(array $theta): array + { + $costs = []; + $gradient = []; + $totalPenalty = 0; + + if ($this->gradientCb === null) { + throw new InvalidOperationException('Gradient callback is not defined'); + } + + foreach ($this->samples as $index => $sample) { + $target = $this->targets[$index]; + + $result = ($this->gradientCb)($theta, $sample, $target); + [$cost, $grad, $penalty] = array_pad($result, 3, 0); + + $costs[] = $cost; + $gradient[] = $grad; + $totalPenalty += $penalty; + } + + $totalPenalty /= $this->sampleCount; + + return [$costs, $gradient, $totalPenalty]; + } + + protected function updateWeightsWithUpdates(array $updates, float $penalty): void + { + // Updates all weights at once + for ($i = 0; $i <= $this->dimensions; ++$i) { + if ($i === 0) { + $this->theta[0] -= $this->learningRate * array_sum($updates); + } else { + $col = array_column($this->samples, $i - 1); + + $error = 0; + foreach ($col as $index => $val) { + $error += $val * $updates[$index]; + } + + $this->theta[$i] -= $this->learningRate * + ($error + $penalty * $this->theta[$i]); + } + } + } + + /** + * Clears the optimizer internal vars after the optimization process. + */ + protected function clear(): void + { + $this->sampleCount = null; + parent::clear(); + } +} diff --git a/src/Helper/Optimizer/Optimizer.php b/src/Helper/Optimizer/Optimizer.php new file mode 100644 index 0000000..54331e9 --- /dev/null +++ b/src/Helper/Optimizer/Optimizer.php @@ -0,0 +1,61 @@ +dimensions = $dimensions; + + // Inits the weights randomly + $this->theta = []; + for ($i = 0; $i < $this->dimensions; ++$i) { + $this->theta[] = (random_int(0, PHP_INT_MAX) / PHP_INT_MAX) + 0.1; + } + } + + public function setTheta(array $theta): self + { + if (count($theta) !== $this->dimensions) { + throw new InvalidArgumentException(sprintf('Number of values in the weights array should be %s', $this->dimensions)); + } + + $this->theta = $theta; + + return $this; + } + + public function theta(): array + { + return $this->theta; + } + + /** + * Executes the optimization with the given samples & targets + * and returns the weights + */ + abstract public function runOptimization(array $samples, array $targets, Closure $gradientCb): array; +} diff --git a/src/Helper/Optimizer/StochasticGD.php b/src/Helper/Optimizer/StochasticGD.php new file mode 100644 index 0000000..9927c3f --- /dev/null +++ b/src/Helper/Optimizer/StochasticGD.php @@ -0,0 +1,278 @@ + + * + * Larger values of lr may overshoot the optimum or even cause divergence + * while small values slows down the convergence and increases the time + * required for the training + * + * @var float + */ + protected $learningRate = 0.001; + + /** + * Minimum amount of change in the weights and error values + * between iterations that needs to be obtained to continue the training + * + * @var float + */ + protected $threshold = 1e-4; + + /** + * Enable/Disable early stopping by checking the weight & cost values + * to see whether they changed large enough to continue the optimization + * + * @var bool + */ + protected $enableEarlyStop = true; + + /** + * List of values obtained by evaluating the cost function at each iteration + * of the algorithm + * + * @var array + */ + protected $costValues = []; + + /** + * Initializes the SGD optimizer for the given number of dimensions + */ + public function __construct(int $dimensions) + { + // Add one more dimension for the bias + parent::__construct($dimensions + 1); + + $this->dimensions = $dimensions; + } + + public function setTheta(array $theta): Optimizer + { + if (count($theta) !== $this->dimensions + 1) { + throw new InvalidArgumentException(sprintf('Number of values in the weights array should be %s', $this->dimensions + 1)); + } + + $this->theta = $theta; + + return $this; + } + + /** + * Sets minimum value for the change in the theta values + * between iterations to continue the iterations.
+ * + * If change in the theta is less than given value then the + * algorithm will stop training + * + * @return $this + */ + public function setChangeThreshold(float $threshold = 1e-5) + { + $this->threshold = $threshold; + + return $this; + } + + /** + * Enable/Disable early stopping by checking at each iteration + * whether changes in theta or cost value are not large enough + * + * @return $this + */ + public function setEarlyStop(bool $enable = true) + { + $this->enableEarlyStop = $enable; + + return $this; + } + + /** + * @return $this + */ + public function setLearningRate(float $learningRate) + { + $this->learningRate = $learningRate; + + return $this; + } + + /** + * @return $this + */ + public function setMaxIterations(int $maxIterations) + { + $this->maxIterations = $maxIterations; + + return $this; + } + + /** + * Optimization procedure finds the unknow variables for the equation A.ϴ = y + * for the given samples (A) and targets (y).
+ * + * The cost function to minimize and the gradient of the function are to be + * handled by the callback function provided as the third parameter of the method. + */ + public function runOptimization(array $samples, array $targets, Closure $gradientCb): array + { + $this->samples = $samples; + $this->targets = $targets; + $this->gradientCb = $gradientCb; + + $currIter = 0; + $bestTheta = null; + $bestScore = 0.0; + $this->costValues = []; + + while ($this->maxIterations > $currIter++) { + $theta = $this->theta; + + // Update the guess + $cost = $this->updateTheta(); + + // Save the best theta in the "pocket" so that + // any future set of theta worse than this will be disregarded + if ($bestTheta === null || $cost <= $bestScore) { + $bestTheta = $theta; + $bestScore = $cost; + } + + // Add the cost value for this iteration to the list + $this->costValues[] = $cost; + + // Check for early stop + if ($this->enableEarlyStop && $this->earlyStop($theta)) { + break; + } + } + + $this->clear(); + + // Solution in the pocket is better than or equal to the last state + // so, we use this solution + return $this->theta = (array) $bestTheta; + } + + /** + * Returns the list of cost values for each iteration executed in + * last run of the optimization + */ + public function getCostValues(): array + { + return $this->costValues; + } + + protected function updateTheta(): float + { + $jValue = 0.0; + $theta = $this->theta; + + if ($this->gradientCb === null) { + throw new InvalidOperationException('Gradient callback is not defined'); + } + + foreach ($this->samples as $index => $sample) { + $target = $this->targets[$index]; + + $result = ($this->gradientCb)($theta, $sample, $target); + + [$error, $gradient, $penalty] = array_pad($result, 3, 0); + + // Update bias + $this->theta[0] -= $this->learningRate * $gradient; + + // Update other values + for ($i = 1; $i <= $this->dimensions; ++$i) { + $this->theta[$i] -= $this->learningRate * + ($gradient * $sample[$i - 1] + $penalty * $this->theta[$i]); + } + + // Sum error rate + $jValue += $error; + } + + return $jValue / count($this->samples); + } + + /** + * Checks if the optimization is not effective enough and can be stopped + * in case large enough changes in the solution do not happen + */ + protected function earlyStop(array $oldTheta): bool + { + // Check for early stop: No change larger than threshold (default 1e-5) + $diff = array_map( + function ($w1, $w2) { + return abs($w1 - $w2) > $this->threshold ? 1 : 0; + }, + $oldTheta, + $this->theta + ); + + if (array_sum($diff) == 0) { + return true; + } + + // Check if the last two cost values are almost the same + $costs = array_slice($this->costValues, -2); + if (count($costs) === 2 && abs($costs[1] - $costs[0]) < $this->threshold) { + return true; + } + + return false; + } + + /** + * Clears the optimizer internal vars after the optimization process. + */ + protected function clear(): void + { + $this->samples = []; + $this->targets = []; + $this->gradientCb = null; + } +} diff --git a/src/Helper/Predictable.php b/src/Helper/Predictable.php new file mode 100644 index 0000000..74d1cc0 --- /dev/null +++ b/src/Helper/Predictable.php @@ -0,0 +1,30 @@ +predictSample($samples); + } + + $predicted = []; + foreach ($samples as $index => $sample) { + $predicted[$index] = $this->predictSample($sample); + } + + return $predicted; + } + + /** + * @return mixed + */ + abstract protected function predictSample(array $sample); +} diff --git a/src/Helper/Trainable.php b/src/Helper/Trainable.php new file mode 100644 index 0000000..1388760 --- /dev/null +++ b/src/Helper/Trainable.php @@ -0,0 +1,24 @@ +samples = array_merge($this->samples, $samples); + $this->targets = array_merge($this->targets, $targets); + } +} diff --git a/src/IncrementalEstimator.php b/src/IncrementalEstimator.php new file mode 100644 index 0000000..600bfbb --- /dev/null +++ b/src/IncrementalEstimator.php @@ -0,0 +1,10 @@ +': + return $a > $b; + case '>=': + return $a >= $b; + case '=': + case '==': + return $a == $b; + case '===': + return $a === $b; + case '<=': + return $a <= $b; + case '<': + return $a < $b; + case '!=': + case '<>': + return $a != $b; + case '!==': + return $a !== $b; + default: + throw new InvalidArgumentException(sprintf('Invalid operator "%s" provided', $operator)); + } + } +} diff --git a/src/Math/Distance.php b/src/Math/Distance.php new file mode 100644 index 0000000..f49bd33 --- /dev/null +++ b/src/Math/Distance.php @@ -0,0 +1,10 @@ +deltas($a, $b)); + } +} diff --git a/src/Math/Distance/Distance.php b/src/Math/Distance/Distance.php new file mode 100644 index 0000000..ad9cdb9 --- /dev/null +++ b/src/Math/Distance/Distance.php @@ -0,0 +1,61 @@ +norm = $norm; + } + + /** + * @throws InvalidArgumentException + */ + public function distance(array $a, array $b): float + { + $distance = 0; + + foreach ($this->deltas($a, $b) as $delta) { + $distance += $delta ** $this->norm; + } + + return $distance ** (1 / $this->norm); + } + + /** + * @throws InvalidArgumentException + */ + protected function deltas(array $a, array $b): array + { + $count = count($a); + + if ($count !== count($b)) { + throw new InvalidArgumentException('Size of given arrays does not match'); + } + + $deltas = []; + + for ($i = 0; $i < $count; $i++) { + $deltas[] = abs($a[$i] - $b[$i]); + } + + return $deltas; + } +} diff --git a/src/Math/Distance/Euclidean.php b/src/Math/Distance/Euclidean.php new file mode 100644 index 0000000..4b7abc4 --- /dev/null +++ b/src/Math/Distance/Euclidean.php @@ -0,0 +1,31 @@ +distance($a, $b) ** 2; + } +} diff --git a/src/Math/Distance/Manhattan.php b/src/Math/Distance/Manhattan.php new file mode 100644 index 0000000..21ddee2 --- /dev/null +++ b/src/Math/Distance/Manhattan.php @@ -0,0 +1,21 @@ +gamma = $gamma; } - /** - * @param float $a - * @param float $b - * - * @return float - */ - public function compute($a, $b) + public function compute($a, $b): float { + if (!is_array($a) || !is_array($b)) { + throw new InvalidArgumentException(sprintf('Arguments of %s must be arrays', __METHOD__)); + } + $score = 2 * Product::scalar($a, $b); $squares = Product::scalar($a, $a) + Product::scalar($b, $b); - $result = exp(-$this->gamma * ($squares - $score)); - return $result; + return exp(-$this->gamma * ($squares - $score)); } } diff --git a/src/Math/LinearAlgebra/EigenvalueDecomposition.php b/src/Math/LinearAlgebra/EigenvalueDecomposition.php new file mode 100644 index 0000000..94f0a9e --- /dev/null +++ b/src/Math/LinearAlgebra/EigenvalueDecomposition.php @@ -0,0 +1,960 @@ +n = count($arg[0]); + $symmetric = true; + + for ($j = 0; ($j < $this->n) & $symmetric; ++$j) { + for ($i = 0; ($i < $this->n) & $symmetric; ++$i) { + $symmetric = $arg[$i][$j] == $arg[$j][$i]; + } + } + + if ($symmetric) { + $this->V = $arg; + // Tridiagonalize. + $this->tred2(); + // Diagonalize. + $this->tql2(); + } else { + $this->H = $arg; + $this->ort = []; + // Reduce to Hessenberg form. + $this->orthes(); + // Reduce Hessenberg to real Schur form. + $this->hqr2(); + } + } + + /** + * Return the eigenvector matrix + */ + public function getEigenvectors(): array + { + $vectors = $this->V; + + // Always return the eigenvectors of length 1.0 + $vectors = new Matrix($vectors); + $vectors = array_map(function ($vect) { + $sum = 0; + $count = count($vect); + for ($i = 0; $i < $count; ++$i) { + $sum += $vect[$i] ** 2; + } + + $sum **= .5; + for ($i = 0; $i < $count; ++$i) { + $vect[$i] /= $sum; + } + + return $vect; + }, $vectors->transpose()->toArray()); + + return $vectors; + } + + /** + * Return the real parts of the eigenvalues
+ * d = real(diag(D)); + */ + public function getRealEigenvalues(): array + { + return $this->d; + } + + /** + * Return the imaginary parts of the eigenvalues
+ * d = imag(diag(D)) + */ + public function getImagEigenvalues(): array + { + return $this->e; + } + + /** + * Return the block diagonal eigenvalue matrix + */ + public function getDiagonalEigenvalues(): array + { + $D = []; + + for ($i = 0; $i < $this->n; ++$i) { + $D[$i] = array_fill(0, $this->n, 0.0); + $D[$i][$i] = $this->d[$i]; + if ($this->e[$i] == 0) { + continue; + } + + $o = $this->e[$i] > 0 ? $i + 1 : $i - 1; + $D[$i][$o] = $this->e[$i]; + } + + return $D; + } + + /** + * Symmetric Householder reduction to tridiagonal form. + */ + private function tred2(): void + { + // This is derived from the Algol procedures tred2 by + // Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + // Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + // Fortran subroutine in EISPACK. + $this->d = $this->V[$this->n - 1]; + // Householder reduction to tridiagonal form. + for ($i = $this->n - 1; $i > 0; --$i) { + $i_ = $i - 1; + // Scale to avoid under/overflow. + $h = $scale = 0.0; + $scale += array_sum(array_map('abs', $this->d)); + if ($scale == 0.0) { + $this->e[$i] = $this->d[$i_]; + $this->d = array_slice($this->V[$i_], 0, $this->n - 1); + for ($j = 0; $j < $i; ++$j) { + $this->V[$j][$i] = $this->V[$i][$j] = 0.0; + } + } else { + // Generate Householder vector. + for ($k = 0; $k < $i; ++$k) { + $this->d[$k] /= $scale; + $h += $this->d[$k] ** 2; + } + + $f = $this->d[$i_]; + $g = $h ** .5; + if ($f > 0) { + $g = -$g; + } + + $this->e[$i] = $scale * $g; + $h -= $f * $g; + $this->d[$i_] = $f - $g; + + for ($j = 0; $j < $i; ++$j) { + $this->e[$j] = 0.0; + } + + // Apply similarity transformation to remaining columns. + for ($j = 0; $j < $i; ++$j) { + $f = $this->d[$j]; + $this->V[$j][$i] = $f; + $g = $this->e[$j] + $this->V[$j][$j] * $f; + + for ($k = $j + 1; $k <= $i_; ++$k) { + $g += $this->V[$k][$j] * $this->d[$k]; + $this->e[$k] += $this->V[$k][$j] * $f; + } + + $this->e[$j] = $g; + } + + $f = 0.0; + + if ($h == 0.0) { + $h = 1e-32; + } + + for ($j = 0; $j < $i; ++$j) { + $this->e[$j] /= $h; + $f += $this->e[$j] * $this->d[$j]; + } + + $hh = $f / (2 * $h); + for ($j = 0; $j < $i; ++$j) { + $this->e[$j] -= $hh * $this->d[$j]; + } + + for ($j = 0; $j < $i; ++$j) { + $f = $this->d[$j]; + $g = $this->e[$j]; + for ($k = $j; $k <= $i_; ++$k) { + $this->V[$k][$j] -= ($f * $this->e[$k] + $g * $this->d[$k]); + } + + $this->d[$j] = $this->V[$i - 1][$j]; + $this->V[$i][$j] = 0.0; + } + } + + $this->d[$i] = $h; + } + + // Accumulate transformations. + for ($i = 0; $i < $this->n - 1; ++$i) { + $this->V[$this->n - 1][$i] = $this->V[$i][$i]; + $this->V[$i][$i] = 1.0; + $h = $this->d[$i + 1]; + if ($h != 0.0) { + for ($k = 0; $k <= $i; ++$k) { + $this->d[$k] = $this->V[$k][$i + 1] / $h; + } + + for ($j = 0; $j <= $i; ++$j) { + $g = 0.0; + for ($k = 0; $k <= $i; ++$k) { + $g += $this->V[$k][$i + 1] * $this->V[$k][$j]; + } + + for ($k = 0; $k <= $i; ++$k) { + $this->V[$k][$j] -= $g * $this->d[$k]; + } + } + } + + for ($k = 0; $k <= $i; ++$k) { + $this->V[$k][$i + 1] = 0.0; + } + } + + $this->d = $this->V[$this->n - 1]; + $this->V[$this->n - 1] = array_fill(0, $this->n, 0.0); + $this->V[$this->n - 1][$this->n - 1] = 1.0; + $this->e[0] = 0.0; + } + + /** + * Symmetric tridiagonal QL algorithm. + * + * This is derived from the Algol procedures tql2, by + * Bowdler, Martin, Reinsch, and Wilkinson, Handbook for + * Auto. Comp., Vol.ii-Linear Algebra, and the corresponding + * Fortran subroutine in EISPACK. + */ + private function tql2(): void + { + for ($i = 1; $i < $this->n; ++$i) { + $this->e[$i - 1] = $this->e[$i]; + } + + $this->e[$this->n - 1] = 0.0; + $f = 0.0; + $tst1 = 0.0; + $eps = 2.0 ** -52.0; + + for ($l = 0; $l < $this->n; ++$l) { + // Find small subdiagonal element + $tst1 = max($tst1, abs($this->d[$l]) + abs($this->e[$l])); + $m = $l; + while ($m < $this->n) { + if (abs($this->e[$m]) <= $eps * $tst1) { + break; + } + + ++$m; + } + + // If m == l, $this->d[l] is an eigenvalue, + // otherwise, iterate. + if ($m > $l) { + do { + // Compute implicit shift + $g = $this->d[$l]; + $p = ($this->d[$l + 1] - $g) / (2.0 * $this->e[$l]); + $r = hypot($p, 1.0); + if ($p < 0) { + $r *= -1; + } + + $this->d[$l] = $this->e[$l] / ($p + $r); + $this->d[$l + 1] = $this->e[$l] * ($p + $r); + $dl1 = $this->d[$l + 1]; + $h = $g - $this->d[$l]; + for ($i = $l + 2; $i < $this->n; ++$i) { + $this->d[$i] -= $h; + } + + $f += $h; + // Implicit QL transformation. + $p = $this->d[$m]; + $c = 1.0; + $c2 = $c3 = $c; + $el1 = $this->e[$l + 1]; + $s = $s2 = 0.0; + for ($i = $m - 1; $i >= $l; --$i) { + $c3 = $c2; + $c2 = $c; + $s2 = $s; + $g = $c * $this->e[$i]; + $h = $c * $p; + $r = hypot($p, $this->e[$i]); + $this->e[$i + 1] = $s * $r; + $s = $this->e[$i] / $r; + $c = $p / $r; + $p = $c * $this->d[$i] - $s * $g; + $this->d[$i + 1] = $h + $s * ($c * $g + $s * $this->d[$i]); + // Accumulate transformation. + for ($k = 0; $k < $this->n; ++$k) { + $h = $this->V[$k][$i + 1]; + $this->V[$k][$i + 1] = $s * $this->V[$k][$i] + $c * $h; + $this->V[$k][$i] = $c * $this->V[$k][$i] - $s * $h; + } + } + + $p = -$s * $s2 * $c3 * $el1 * $this->e[$l] / $dl1; + $this->e[$l] = $s * $p; + $this->d[$l] = $c * $p; + // Check for convergence. + } while (abs($this->e[$l]) > $eps * $tst1); + } + + $this->d[$l] += $f; + $this->e[$l] = 0.0; + } + + // Sort eigenvalues and corresponding vectors. + for ($i = 0; $i < $this->n - 1; ++$i) { + $k = $i; + $p = $this->d[$i]; + for ($j = $i + 1; $j < $this->n; ++$j) { + if ($this->d[$j] < $p) { + $k = $j; + $p = $this->d[$j]; + } + } + + if ($k != $i) { + $this->d[$k] = $this->d[$i]; + $this->d[$i] = $p; + for ($j = 0; $j < $this->n; ++$j) { + $p = $this->V[$j][$i]; + $this->V[$j][$i] = $this->V[$j][$k]; + $this->V[$j][$k] = $p; + } + } + } + } + + /** + * Nonsymmetric reduction to Hessenberg form. + * + * This is derived from the Algol procedures orthes and ortran, + * by Martin and Wilkinson, Handbook for Auto. Comp., + * Vol.ii-Linear Algebra, and the corresponding + * Fortran subroutines in EISPACK. + */ + private function orthes(): void + { + $low = 0; + $high = $this->n - 1; + + for ($m = $low + 1; $m <= $high - 1; ++$m) { + // Scale column. + $scale = 0.0; + for ($i = $m; $i <= $high; ++$i) { + $scale += abs($this->H[$i][$m - 1]); + } + + if ($scale != 0.0) { + // Compute Householder transformation. + $h = 0.0; + for ($i = $high; $i >= $m; --$i) { + $this->ort[$i] = $this->H[$i][$m - 1] / $scale; + $h += $this->ort[$i] * $this->ort[$i]; + } + + $g = $h ** .5; + if ($this->ort[$m] > 0) { + $g *= -1; + } + + $h -= $this->ort[$m] * $g; + $this->ort[$m] -= $g; + // Apply Householder similarity transformation + // H = (I -u * u' / h) * H * (I -u * u') / h) + for ($j = $m; $j < $this->n; ++$j) { + $f = 0.0; + for ($i = $high; $i >= $m; --$i) { + $f += $this->ort[$i] * $this->H[$i][$j]; + } + + $f /= $h; + for ($i = $m; $i <= $high; ++$i) { + $this->H[$i][$j] -= $f * $this->ort[$i]; + } + } + + for ($i = 0; $i <= $high; ++$i) { + $f = 0.0; + for ($j = $high; $j >= $m; --$j) { + $f += $this->ort[$j] * $this->H[$i][$j]; + } + + $f /= $h; + for ($j = $m; $j <= $high; ++$j) { + $this->H[$i][$j] -= $f * $this->ort[$j]; + } + } + + $this->ort[$m] = $scale * $this->ort[$m]; + $this->H[$m][$m - 1] = $scale * $g; + } + } + + // Accumulate transformations (Algol's ortran). + for ($i = 0; $i < $this->n; ++$i) { + for ($j = 0; $j < $this->n; ++$j) { + $this->V[$i][$j] = ($i == $j ? 1.0 : 0.0); + } + } + + for ($m = $high - 1; $m >= $low + 1; --$m) { + if ($this->H[$m][$m - 1] != 0.0) { + for ($i = $m + 1; $i <= $high; ++$i) { + $this->ort[$i] = $this->H[$i][$m - 1]; + } + + for ($j = $m; $j <= $high; ++$j) { + $g = 0.0; + for ($i = $m; $i <= $high; ++$i) { + $g += $this->ort[$i] * $this->V[$i][$j]; + } + + // Double division avoids possible underflow + $g /= $this->ort[$m]; + $g /= $this->H[$m][$m - 1]; + for ($i = $m; $i <= $high; ++$i) { + $this->V[$i][$j] += $g * $this->ort[$i]; + } + } + } + } + } + + /** + * Performs complex division. + * + * @param int|float $xr + * @param int|float $xi + * @param int|float $yr + * @param int|float $yi + */ + private function cdiv($xr, $xi, $yr, $yi): void + { + if (abs($yr) > abs($yi)) { + $r = $yi / $yr; + $d = $yr + $r * $yi; + $this->cdivr = ($xr + $r * $xi) / $d; + $this->cdivi = ($xi - $r * $xr) / $d; + } else { + $r = $yr / $yi; + $d = $yi + $r * $yr; + $this->cdivr = ($r * $xr + $xi) / $d; + $this->cdivi = ($r * $xi - $xr) / $d; + } + } + + /** + * Nonsymmetric reduction from Hessenberg to real Schur form. + * + * Code is derived from the Algol procedure hqr2, + * by Martin and Wilkinson, Handbook for Auto. Comp., + * Vol.ii-Linear Algebra, and the corresponding + * Fortran subroutine in EISPACK. + */ + private function hqr2(): void + { + // Initialize + $nn = $this->n; + $n = $nn - 1; + $low = 0; + $high = $nn - 1; + $eps = 2.0 ** -52.0; + $exshift = 0.0; + $p = $q = $r = $s = $z = 0; + // Store roots isolated by balanc and compute matrix norm + $norm = 0.0; + + for ($i = 0; $i < $nn; ++$i) { + if (($i < $low) or ($i > $high)) { + $this->d[$i] = $this->H[$i][$i]; + $this->e[$i] = 0.0; + } + + for ($j = max($i - 1, 0); $j < $nn; ++$j) { + $norm += abs($this->H[$i][$j]); + } + } + + // Outer loop over eigenvalue index + $iter = 0; + while ($n >= $low) { + // Look for single small sub-diagonal element + $l = $n; + while ($l > $low) { + $s = abs($this->H[$l - 1][$l - 1]) + abs($this->H[$l][$l]); + if ($s == 0.0) { + $s = $norm; + } + + if (abs($this->H[$l][$l - 1]) < $eps * $s) { + break; + } + + --$l; + } + + // Check for convergence + // One root found + if ($l == $n) { + $this->H[$n][$n] += $exshift; + $this->d[$n] = $this->H[$n][$n]; + $this->e[$n] = 0.0; + --$n; + $iter = 0; + // Two roots found + } elseif ($l == $n - 1) { + $w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n]; + $p = ($this->H[$n - 1][$n - 1] - $this->H[$n][$n]) / 2.0; + $q = $p * $p + $w; + $z = abs($q) ** .5; + $this->H[$n][$n] += $exshift; + $this->H[$n - 1][$n - 1] += $exshift; + $x = $this->H[$n][$n]; + // Real pair + if ($q >= 0) { + if ($p >= 0) { + $z = $p + $z; + } else { + $z = $p - $z; + } + + $this->d[$n - 1] = $x + $z; + $this->d[$n] = $this->d[$n - 1]; + if ($z != 0.0) { + $this->d[$n] = $x - $w / $z; + } + + $this->e[$n - 1] = 0.0; + $this->e[$n] = 0.0; + $x = $this->H[$n][$n - 1]; + $s = abs($x) + abs($z); + $p = $x / $s; + $q = $z / $s; + $r = ($p * $p + $q * $q) ** .5; + $p /= $r; + $q /= $r; + // Row modification + for ($j = $n - 1; $j < $nn; ++$j) { + $z = $this->H[$n - 1][$j]; + $this->H[$n - 1][$j] = $q * $z + $p * $this->H[$n][$j]; + $this->H[$n][$j] = $q * $this->H[$n][$j] - $p * $z; + } + + // Column modification + for ($i = 0; $i <= $n; ++$i) { + $z = $this->H[$i][$n - 1]; + $this->H[$i][$n - 1] = $q * $z + $p * $this->H[$i][$n]; + $this->H[$i][$n] = $q * $this->H[$i][$n] - $p * $z; + } + + // Accumulate transformations + for ($i = $low; $i <= $high; ++$i) { + $z = $this->V[$i][$n - 1]; + $this->V[$i][$n - 1] = $q * $z + $p * $this->V[$i][$n]; + $this->V[$i][$n] = $q * $this->V[$i][$n] - $p * $z; + } + + // Complex pair + } else { + $this->d[$n - 1] = $x + $p; + $this->d[$n] = $x + $p; + $this->e[$n - 1] = $z; + $this->e[$n] = -$z; + } + + $n -= 2; + $iter = 0; + // No convergence yet + } else { + // Form shift + $x = $this->H[$n][$n]; + $y = 0.0; + $w = 0.0; + if ($l < $n) { + $y = $this->H[$n - 1][$n - 1]; + $w = $this->H[$n][$n - 1] * $this->H[$n - 1][$n]; + } + + // Wilkinson's original ad hoc shift + if ($iter == 10) { + $exshift += $x; + for ($i = $low; $i <= $n; ++$i) { + $this->H[$i][$i] -= $x; + } + + $s = abs($this->H[$n][$n - 1]) + abs($this->H[$n - 1][$n - 2]); + $x = $y = 0.75 * $s; + $w = -0.4375 * $s * $s; + } + + // MATLAB's new ad hoc shift + if ($iter == 30) { + $s = ($y - $x) / 2.0; + $s *= $s + $w; + if ($s > 0) { + $s **= .5; + if ($y < $x) { + $s = -$s; + } + + $s = $x - $w / (($y - $x) / 2.0 + $s); + for ($i = $low; $i <= $n; ++$i) { + $this->H[$i][$i] -= $s; + } + + $exshift += $s; + $x = $y = $w = 0.964; + } + } + + // Could check iteration count here. + ++$iter; + // Look for two consecutive small sub-diagonal elements + $m = $n - 2; + while ($m >= $l) { + $z = $this->H[$m][$m]; + $r = $x - $z; + $s = $y - $z; + $p = ($r * $s - $w) / $this->H[$m + 1][$m] + $this->H[$m][$m + 1]; + $q = $this->H[$m + 1][$m + 1] - $z - $r - $s; + $r = $this->H[$m + 2][$m + 1]; + $s = abs($p) + abs($q) + abs($r); + $p /= $s; + $q /= $s; + $r /= $s; + if ($m == $l) { + break; + } + + if (abs($this->H[$m][$m - 1]) * (abs($q) + abs($r)) < + $eps * (abs($p) * (abs($this->H[$m - 1][$m - 1]) + abs($z) + abs($this->H[$m + 1][$m + 1])))) { + break; + } + + --$m; + } + + for ($i = $m + 2; $i <= $n; ++$i) { + $this->H[$i][$i - 2] = 0.0; + if ($i > $m + 2) { + $this->H[$i][$i - 3] = 0.0; + } + } + + // Double QR step involving rows l:n and columns m:n + for ($k = $m; $k <= $n - 1; ++$k) { + $notlast = $k != $n - 1; + if ($k != $m) { + $p = $this->H[$k][$k - 1]; + $q = $this->H[$k + 1][$k - 1]; + $r = ($notlast ? $this->H[$k + 2][$k - 1] : 0.0); + $x = abs($p) + abs($q) + abs($r); + if ($x != 0.0) { + $p /= $x; + $q /= $x; + $r /= $x; + } + } + + if ($x == 0.0) { + break; + } + + $s = ($p * $p + $q * $q + $r * $r) ** .5; + if ($p < 0) { + $s = -$s; + } + + if ($s != 0) { + if ($k != $m) { + $this->H[$k][$k - 1] = -$s * $x; + } elseif ($l != $m) { + $this->H[$k][$k - 1] = -$this->H[$k][$k - 1]; + } + + $p += $s; + $x = $p / $s; + $y = $q / $s; + $z = $r / $s; + $q /= $p; + $r /= $p; + // Row modification + for ($j = $k; $j < $nn; ++$j) { + $p = $this->H[$k][$j] + $q * $this->H[$k + 1][$j]; + if ($notlast) { + $p += $r * $this->H[$k + 2][$j]; + $this->H[$k + 2][$j] -= $p * $z; + } + + $this->H[$k][$j] -= $p * $x; + $this->H[$k + 1][$j] -= $p * $y; + } + + // Column modification + for ($i = 0; $i <= min($n, $k + 3); ++$i) { + $p = $x * $this->H[$i][$k] + $y * $this->H[$i][$k + 1]; + if ($notlast) { + $p += $z * $this->H[$i][$k + 2]; + $this->H[$i][$k + 2] -= $p * $r; + } + + $this->H[$i][$k] -= $p; + $this->H[$i][$k + 1] -= $p * $q; + } + + // Accumulate transformations + for ($i = $low; $i <= $high; ++$i) { + $p = $x * $this->V[$i][$k] + $y * $this->V[$i][$k + 1]; + if ($notlast) { + $p += $z * $this->V[$i][$k + 2]; + $this->V[$i][$k + 2] -= $p * $r; + } + + $this->V[$i][$k] -= $p; + $this->V[$i][$k + 1] -= $p * $q; + } + } // ($s != 0) + } // k loop + } // check convergence + } // while ($n >= $low) + + // Backsubstitute to find vectors of upper triangular form + if ($norm == 0.0) { + return; + } + + for ($n = $nn - 1; $n >= 0; --$n) { + $p = $this->d[$n]; + $q = $this->e[$n]; + // Real vector + if ($q == 0) { + $l = $n; + $this->H[$n][$n] = 1.0; + for ($i = $n - 1; $i >= 0; --$i) { + $w = $this->H[$i][$i] - $p; + $r = 0.0; + for ($j = $l; $j <= $n; ++$j) { + $r += $this->H[$i][$j] * $this->H[$j][$n]; + } + + if ($this->e[$i] < 0.0) { + $z = $w; + $s = $r; + } else { + $l = $i; + if ($this->e[$i] == 0.0) { + if ($w != 0.0) { + $this->H[$i][$n] = -$r / $w; + } else { + $this->H[$i][$n] = -$r / ($eps * $norm); + } + + // Solve real equations + } else { + $x = $this->H[$i][$i + 1]; + $y = $this->H[$i + 1][$i]; + $q = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i]; + $t = ($x * $s - $z * $r) / $q; + $this->H[$i][$n] = $t; + if (abs($x) > abs($z)) { + $this->H[$i + 1][$n] = (-$r - $w * $t) / $x; + } else { + $this->H[$i + 1][$n] = (-$s - $y * $t) / $z; + } + } + + // Overflow control + $t = abs($this->H[$i][$n]); + if (($eps * $t) * $t > 1) { + for ($j = $i; $j <= $n; ++$j) { + $this->H[$j][$n] /= $t; + } + } + } + } + + // Complex vector + } elseif ($q < 0) { + $l = $n - 1; + // Last vector component imaginary so matrix is triangular + if (abs($this->H[$n][$n - 1]) > abs($this->H[$n - 1][$n])) { + $this->H[$n - 1][$n - 1] = $q / $this->H[$n][$n - 1]; + $this->H[$n - 1][$n] = -($this->H[$n][$n] - $p) / $this->H[$n][$n - 1]; + } else { + $this->cdiv(0.0, -$this->H[$n - 1][$n], $this->H[$n - 1][$n - 1] - $p, $q); + $this->H[$n - 1][$n - 1] = $this->cdivr; + $this->H[$n - 1][$n] = $this->cdivi; + } + + $this->H[$n][$n - 1] = 0.0; + $this->H[$n][$n] = 1.0; + for ($i = $n - 2; $i >= 0; --$i) { + // double ra,sa,vr,vi; + $ra = 0.0; + $sa = 0.0; + for ($j = $l; $j <= $n; ++$j) { + $ra += $this->H[$i][$j] * $this->H[$j][$n - 1]; + $sa += $this->H[$i][$j] * $this->H[$j][$n]; + } + + $w = $this->H[$i][$i] - $p; + if ($this->e[$i] < 0.0) { + $z = $w; + $r = $ra; + $s = $sa; + } else { + $l = $i; + if ($this->e[$i] == 0) { + $this->cdiv(-$ra, -$sa, $w, $q); + $this->H[$i][$n - 1] = $this->cdivr; + $this->H[$i][$n] = $this->cdivi; + } else { + // Solve complex equations + $x = $this->H[$i][$i + 1]; + $y = $this->H[$i + 1][$i]; + $vr = ($this->d[$i] - $p) * ($this->d[$i] - $p) + $this->e[$i] * $this->e[$i] - $q * $q; + $vi = ($this->d[$i] - $p) * 2.0 * $q; + if ($vr == 0.0 && $vi == 0.0) { + $vr = $eps * $norm * (abs($w) + abs($q) + abs($x) + abs($y) + abs($z)); + } + + $this->cdiv($x * $r - $z * $ra + $q * $sa, $x * $s - $z * $sa - $q * $ra, $vr, $vi); + $this->H[$i][$n - 1] = $this->cdivr; + $this->H[$i][$n] = $this->cdivi; + if (abs($x) > (abs($z) + abs($q))) { + $this->H[$i + 1][$n - 1] = (-$ra - $w * $this->H[$i][$n - 1] + $q * $this->H[$i][$n]) / $x; + $this->H[$i + 1][$n] = (-$sa - $w * $this->H[$i][$n] - $q * $this->H[$i][$n - 1]) / $x; + } else { + $this->cdiv(-$r - $y * $this->H[$i][$n - 1], -$s - $y * $this->H[$i][$n], $z, $q); + $this->H[$i + 1][$n - 1] = $this->cdivr; + $this->H[$i + 1][$n] = $this->cdivi; + } + } + + // Overflow control + $t = max(abs($this->H[$i][$n - 1]), abs($this->H[$i][$n])); + if (($eps * $t) * $t > 1) { + for ($j = $i; $j <= $n; ++$j) { + $this->H[$j][$n - 1] /= $t; + $this->H[$j][$n] /= $t; + } + } + } // end else + } // end for + } // end else for complex case + } // end for + + // Vectors of isolated roots + for ($i = 0; $i < $nn; ++$i) { + if ($i < $low || $i > $high) { + for ($j = $i; $j < $nn; ++$j) { + $this->V[$i][$j] = $this->H[$i][$j]; + } + } + } + + // Back transformation to get eigenvectors of original matrix + for ($j = $nn - 1; $j >= $low; --$j) { + for ($i = $low; $i <= $high; ++$i) { + $z = 0.0; + for ($k = $low; $k <= min($j, $high); ++$k) { + $z += $this->V[$i][$k] * $this->H[$k][$j]; + } + + $this->V[$i][$j] = $z; + } + } + } +} diff --git a/src/Math/LinearAlgebra/LUDecomposition.php b/src/Math/LinearAlgebra/LUDecomposition.php new file mode 100644 index 0000000..61f7c3f --- /dev/null +++ b/src/Math/LinearAlgebra/LUDecomposition.php @@ -0,0 +1,299 @@ += n, the LU decomposition is an m-by-n + * unit lower triangular matrix L, an n-by-n upper triangular matrix U, + * and a permutation vector piv of length m so that A(piv,:) = L*U. + * If m < n, then L is m-by-m and U is m-by-n. + * + * The LU decompostion with pivoting always exists, even if the matrix is + * singular, so the constructor will never fail. The primary use of the + * LU decomposition is in the solution of square systems of simultaneous + * linear equations. This will fail if isNonsingular() returns false. + * + * @author Paul Meagher + * @author Bartosz Matosiuk + * @author Michael Bommarito + * + * @version 1.1 + * + * @license PHP v3.0 + * + * Slightly changed to adapt the original code to PHP-ML library + * @date 2017/04/24 + * + * @author Mustafa Karabulut + */ + +namespace Phpml\Math\LinearAlgebra; + +use Phpml\Exception\MatrixException; +use Phpml\Math\Matrix; + +class LUDecomposition +{ + /** + * Decomposition storage + * + * @var array + */ + private $LU = []; + + /** + * Row dimension. + * + * @var int + */ + private $m; + + /** + * Column dimension. + * + * @var int + */ + private $n; + + /** + * Pivot sign. + * + * @var int + */ + private $pivsign; + + /** + * Internal storage of pivot vector. + * + * @var array + */ + private $piv = []; + + /** + * Constructs Structure to access L, U and piv. + * + * @param Matrix $A Rectangular matrix + * + * @throws MatrixException + */ + public function __construct(Matrix $A) + { + if ($A->getRows() !== $A->getColumns()) { + throw new MatrixException('Matrix is not square matrix'); + } + + // Use a "left-looking", dot-product, Crout/Doolittle algorithm. + $this->LU = $A->toArray(); + $this->m = $A->getRows(); + $this->n = $A->getColumns(); + for ($i = 0; $i < $this->m; ++$i) { + $this->piv[$i] = $i; + } + + $this->pivsign = 1; + $LUcolj = []; + + // Outer loop. + for ($j = 0; $j < $this->n; ++$j) { + // Make a copy of the j-th column to localize references. + for ($i = 0; $i < $this->m; ++$i) { + $LUcolj[$i] = &$this->LU[$i][$j]; + } + + // Apply previous transformations. + for ($i = 0; $i < $this->m; ++$i) { + $LUrowi = $this->LU[$i]; + // Most of the time is spent in the following dot product. + $kmax = min($i, $j); + $s = 0.0; + for ($k = 0; $k < $kmax; ++$k) { + $s += $LUrowi[$k] * $LUcolj[$k]; + } + + $LUrowi[$j] = $LUcolj[$i] -= $s; + } + + // Find pivot and exchange if necessary. + $p = $j; + for ($i = $j + 1; $i < $this->m; ++$i) { + if (abs($LUcolj[$i] ?? 0) > abs($LUcolj[$p] ?? 0)) { + $p = $i; + } + } + + if ($p != $j) { + for ($k = 0; $k < $this->n; ++$k) { + $t = $this->LU[$p][$k]; + $this->LU[$p][$k] = $this->LU[$j][$k]; + $this->LU[$j][$k] = $t; + } + + $k = $this->piv[$p]; + $this->piv[$p] = $this->piv[$j]; + $this->piv[$j] = $k; + $this->pivsign *= -1; + } + + // Compute multipliers. + if (($j < $this->m) && ($this->LU[$j][$j] != 0.0)) { + for ($i = $j + 1; $i < $this->m; ++$i) { + $this->LU[$i][$j] /= $this->LU[$j][$j]; + } + } + } + } + + /** + * Get lower triangular factor. + * + * @return Matrix Lower triangular factor + */ + public function getL(): Matrix + { + $L = []; + for ($i = 0; $i < $this->m; ++$i) { + for ($j = 0; $j < $this->n; ++$j) { + if ($i > $j) { + $L[$i][$j] = $this->LU[$i][$j]; + } elseif ($i == $j) { + $L[$i][$j] = 1.0; + } else { + $L[$i][$j] = 0.0; + } + } + } + + return new Matrix($L); + } + + /** + * Get upper triangular factor. + * + * @return Matrix Upper triangular factor + */ + public function getU(): Matrix + { + $U = []; + for ($i = 0; $i < $this->n; ++$i) { + for ($j = 0; $j < $this->n; ++$j) { + if ($i <= $j) { + $U[$i][$j] = $this->LU[$i][$j]; + } else { + $U[$i][$j] = 0.0; + } + } + } + + return new Matrix($U); + } + + /** + * Return pivot permutation vector. + * + * @return array Pivot vector + */ + public function getPivot(): array + { + return $this->piv; + } + + /** + * Alias for getPivot + * + * @see getPivot + */ + public function getDoublePivot(): array + { + return $this->getPivot(); + } + + /** + * Is the matrix nonsingular? + * + * @return bool true if U, and hence A, is nonsingular. + */ + public function isNonsingular(): bool + { + for ($j = 0; $j < $this->n; ++$j) { + if ($this->LU[$j][$j] == 0) { + return false; + } + } + + return true; + } + + public function det(): float + { + $d = $this->pivsign; + for ($j = 0; $j < $this->n; ++$j) { + $d *= $this->LU[$j][$j]; + } + + return (float) $d; + } + + /** + * Solve A*X = B + * + * @param Matrix $B A Matrix with as many rows as A and any number of columns. + * + * @return array X so that L*U*X = B(piv,:) + * + * @throws MatrixException + */ + public function solve(Matrix $B): array + { + if ($B->getRows() != $this->m) { + throw new MatrixException('Matrix is not square matrix'); + } + + if (!$this->isNonsingular()) { + throw new MatrixException('Matrix is singular'); + } + + // Copy right hand side with pivoting + $nx = $B->getColumns(); + $X = $this->getSubMatrix($B->toArray(), $this->piv, 0, $nx - 1); + // Solve L*Y = B(piv,:) + for ($k = 0; $k < $this->n; ++$k) { + for ($i = $k + 1; $i < $this->n; ++$i) { + for ($j = 0; $j < $nx; ++$j) { + $X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k]; + } + } + } + + // Solve U*X = Y; + for ($k = $this->n - 1; $k >= 0; --$k) { + for ($j = 0; $j < $nx; ++$j) { + $X[$k][$j] /= $this->LU[$k][$k]; + } + + for ($i = 0; $i < $k; ++$i) { + for ($j = 0; $j < $nx; ++$j) { + $X[$i][$j] -= $X[$k][$j] * $this->LU[$i][$k]; + } + } + } + + return $X; + } + + protected function getSubMatrix(array $matrix, array $RL, int $j0, int $jF): array + { + $m = count($RL); + $n = $jF - $j0; + $R = array_fill(0, $m, array_fill(0, $n + 1, 0.0)); + + for ($i = 0; $i < $m; ++$i) { + for ($j = $j0; $j <= $jF; ++$j) { + $R[$i][$j - $j0] = $matrix[$RL[$i]][$j]; + } + } + + return $R; + } +} diff --git a/src/Math/Matrix.php b/src/Math/Matrix.php new file mode 100644 index 0000000..6f07d5d --- /dev/null +++ b/src/Math/Matrix.php @@ -0,0 +1,327 @@ +rows = 1; + $this->columns = count($matrix); + $matrix = [$matrix]; + } else { + $this->rows = count($matrix); + $this->columns = count($matrix[0]); + } + + if ($validate) { + for ($i = 0; $i < $this->rows; ++$i) { + if (count($matrix[$i]) !== $this->columns) { + throw new InvalidArgumentException('Matrix dimensions did not match'); + } + } + } + + $this->matrix = $matrix; + } + + public static function fromFlatArray(array $array): self + { + $matrix = []; + foreach ($array as $value) { + $matrix[] = [$value]; + } + + return new self($matrix); + } + + public function toArray(): array + { + return $this->matrix; + } + + public function toScalar(): float + { + return $this->matrix[0][0]; + } + + public function getRows(): int + { + return $this->rows; + } + + public function getColumns(): int + { + return $this->columns; + } + + /** + * @throws MatrixException + */ + public function getColumnValues(int $column): array + { + if ($column >= $this->columns) { + throw new MatrixException('Column out of range'); + } + + return array_column($this->matrix, $column); + } + + /** + * @return float|int + * + * @throws MatrixException + */ + public function getDeterminant() + { + if ($this->determinant !== null) { + return $this->determinant; + } + + if (!$this->isSquare()) { + throw new MatrixException('Matrix is not square matrix'); + } + + $lu = new LUDecomposition($this); + + return $this->determinant = $lu->det(); + } + + public function isSquare(): bool + { + return $this->columns === $this->rows; + } + + public function transpose(): self + { + if ($this->rows === 1) { + $matrix = array_map(static function ($el): array { + return [$el]; + }, $this->matrix[0]); + } else { + $matrix = array_map(null, ...$this->matrix); + } + + return new self($matrix, false); + } + + public function multiply(self $matrix): self + { + if ($this->columns !== $matrix->getRows()) { + throw new InvalidArgumentException('Inconsistent matrix supplied'); + } + + $array1 = $this->toArray(); + $array2 = $matrix->toArray(); + $colCount = $matrix->columns; + + /* + - To speed-up multiplication, we need to avoid use of array index operator [ ] as much as possible( See #255 for details) + - A combination of "foreach" and "array_column" works much faster then accessing the array via index operator + */ + $product = []; + foreach ($array1 as $row => $rowData) { + for ($col = 0; $col < $colCount; ++$col) { + $columnData = array_column($array2, $col); + $sum = 0; + foreach ($rowData as $key => $valueData) { + $sum += $valueData * $columnData[$key]; + } + + $product[$row][$col] = $sum; + } + } + + return new self($product, false); + } + + /** + * @param float|int $value + */ + public function divideByScalar($value): self + { + $newMatrix = []; + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $newMatrix[$i][$j] = $this->matrix[$i][$j] / $value; + } + } + + return new self($newMatrix, false); + } + + /** + * @param float|int $value + */ + public function multiplyByScalar($value): self + { + $newMatrix = []; + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $newMatrix[$i][$j] = $this->matrix[$i][$j] * $value; + } + } + + return new self($newMatrix, false); + } + + /** + * Element-wise addition of the matrix with another one + */ + public function add(self $other): self + { + return $this->sum($other); + } + + /** + * Element-wise subtracting of another matrix from this one + */ + public function subtract(self $other): self + { + return $this->sum($other, -1); + } + + public function inverse(): self + { + if (!$this->isSquare()) { + throw new MatrixException('Matrix is not square matrix'); + } + + $LU = new LUDecomposition($this); + $identity = $this->getIdentity(); + $inverse = $LU->solve($identity); + + return new self($inverse, false); + } + + public function crossOut(int $row, int $column): self + { + $newMatrix = []; + $r = 0; + for ($i = 0; $i < $this->rows; ++$i) { + $c = 0; + if ($row != $i) { + for ($j = 0; $j < $this->columns; ++$j) { + if ($column != $j) { + $newMatrix[$r][$c] = $this->matrix[$i][$j]; + ++$c; + } + } + + ++$r; + } + } + + return new self($newMatrix, false); + } + + public function isSingular(): bool + { + return $this->getDeterminant() == 0; + } + + /** + * Frobenius norm (Hilbert–Schmidt norm, Euclidean norm) (‖A‖F) + * Square root of the sum of the square of all elements. + * + * https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm + * + * _____________ + * /ᵐ ⁿ + * ‖A‖F = √ Σ Σ |aᵢⱼ|² + * ᵢ₌₁ ᵢ₌₁ + */ + public function frobeniusNorm(): float + { + $squareSum = 0; + for ($i = 0; $i < $this->rows; ++$i) { + for ($j = 0; $j < $this->columns; ++$j) { + $squareSum += $this->matrix[$i][$j] ** 2; + } + } + + return $squareSum ** .5; + } + + /** + * Returns the transpose of given array + */ + public static function transposeArray(array $array): array + { + return (new self($array, false))->transpose()->toArray(); + } + + /** + * Returns the dot product of two arrays
+ * Matrix::dot(x, y) ==> x.y' + */ + public static function dot(array $array1, array $array2): array + { + $m1 = new self($array1, false); + $m2 = new self($array2, false); + + return $m1->multiply($m2->transpose())->toArray()[0]; + } + + /** + * Element-wise addition or substraction depending on the given sign parameter + */ + private function sum(self $other, int $sign = 1): self + { + $a1 = $this->toArray(); + $a2 = $other->toArray(); + + $newMatrix = []; + for ($i = 0; $i < $this->rows; ++$i) { + for ($k = 0; $k < $this->columns; ++$k) { + $newMatrix[$i][$k] = $a1[$i][$k] + $sign * $a2[$i][$k]; + } + } + + return new self($newMatrix, false); + } + + /** + * Returns diagonal identity matrix of the same size of this matrix + */ + private function getIdentity(): self + { + $array = array_fill(0, $this->rows, array_fill(0, $this->columns, 0)); + for ($i = 0; $i < $this->rows; ++$i) { + $array[$i][$i] = 1; + } + + return new self($array, false); + } +} diff --git a/src/Phpml/Math/Product.php b/src/Math/Product.php similarity index 60% rename from src/Phpml/Math/Product.php rename to src/Math/Product.php index 70accb9..78f3693 100644 --- a/src/Phpml/Math/Product.php +++ b/src/Math/Product.php @@ -1,22 +1,21 @@ $value) { - $product += $value * $b[$index]; + if (is_numeric($value) && is_numeric($b[$index])) { + $product += (float) $value * (float) $b[$index]; + } } return $product; diff --git a/src/Math/Set.php b/src/Math/Set.php new file mode 100644 index 0000000..b22d2f8 --- /dev/null +++ b/src/Math/Set.php @@ -0,0 +1,173 @@ +elements = self::sanitize($elements); + } + + /** + * Creates the union of A and B. + */ + public static function union(self $a, self $b): self + { + return new self(array_merge($a->toArray(), $b->toArray())); + } + + /** + * Creates the intersection of A and B. + */ + public static function intersection(self $a, self $b): self + { + return new self(array_intersect($a->toArray(), $b->toArray())); + } + + /** + * Creates the difference of A and B. + */ + public static function difference(self $a, self $b): self + { + return new self(array_diff($a->toArray(), $b->toArray())); + } + + /** + * Creates the Cartesian product of A and B. + * + * @return Set[] + */ + public static function cartesian(self $a, self $b): array + { + $cartesian = []; + + foreach ($a as $multiplier) { + foreach ($b as $multiplicand) { + $cartesian[] = new self(array_merge([$multiplicand], [$multiplier])); + } + } + + return $cartesian; + } + + /** + * Creates the power set of A. + * + * @return Set[] + */ + public static function power(self $a): array + { + $power = [new self()]; + + foreach ($a as $multiplicand) { + foreach ($power as $multiplier) { + $power[] = new self(array_merge([$multiplicand], $multiplier->toArray())); + } + } + + return $power; + } + + /** + * @param string|int|float|bool $element + */ + public function add($element): self + { + return $this->addAll([$element]); + } + + /** + * @param string[]|int[]|float[]|bool[] $elements + */ + public function addAll(array $elements): self + { + $this->elements = self::sanitize(array_merge($this->elements, $elements)); + + return $this; + } + + /** + * @param string|int|float $element + */ + public function remove($element): self + { + return $this->removeAll([$element]); + } + + /** + * @param string[]|int[]|float[] $elements + */ + public function removeAll(array $elements): self + { + $this->elements = self::sanitize(array_diff($this->elements, $elements)); + + return $this; + } + + /** + * @param string|int|float $element + */ + public function contains($element): bool + { + return $this->containsAll([$element]); + } + + /** + * @param string[]|int[]|float[] $elements + */ + public function containsAll(array $elements): bool + { + return count(array_diff($elements, $this->elements)) === 0; + } + + /** + * @return string[]|int[]|float[]|bool[] + */ + public function toArray(): array + { + return $this->elements; + } + + public function getIterator(): ArrayIterator + { + return new ArrayIterator($this->elements); + } + + public function isEmpty(): bool + { + return $this->cardinality() === 0; + } + + public function cardinality(): int + { + return count($this->elements); + } + + /** + * Removes duplicates and rewrites index. + * + * @param string[]|int[]|float[]|bool[] $elements + * + * @return string[]|int[]|float[]|bool[] + */ + private static function sanitize(array $elements): array + { + sort($elements, SORT_ASC); + + return array_values(array_unique($elements, SORT_ASC)); + } +} diff --git a/src/Math/Statistic/ANOVA.php b/src/Math/Statistic/ANOVA.php new file mode 100644 index 0000000..1629181 --- /dev/null +++ b/src/Math/Statistic/ANOVA.php @@ -0,0 +1,141 @@ + $msbValue) { + $f[$index] = $msbValue / $msw[$index]; + } + + return $f; + } + + private static function sumOfSquaresPerFeature(array $samples): array + { + $sum = array_fill(0, count($samples[0][0]), 0); + foreach ($samples as $class) { + foreach ($class as $sample) { + foreach ($sample as $index => $feature) { + $sum[$index] += $feature ** 2; + } + } + } + + return $sum; + } + + private static function sumOfFeaturesPerClass(array $samples): array + { + return array_map(static function (array $class): array { + $sum = array_fill(0, count($class[0]), 0); + foreach ($class as $sample) { + foreach ($sample as $index => $feature) { + $sum[$index] += $feature; + } + } + + return $sum; + }, $samples); + } + + private static function sumOfSquares(array $sums): array + { + $squares = array_fill(0, count($sums[0]), 0); + foreach ($sums as $row) { + foreach ($row as $index => $sum) { + $squares[$index] += $sum; + } + } + + return array_map(static function ($sum) { + return $sum ** 2; + }, $squares); + } + + private static function squaresSum(array $sums): array + { + foreach ($sums as &$row) { + foreach ($row as &$sum) { + $sum **= 2; + } + } + + return $sums; + } + + private static function calculateSsbn(array $samples, array $sumSamplesSquare, array $samplesPerClass, array $squareSumSamples, int $allSamples): array + { + $ssbn = array_fill(0, count($samples[0][0]), 0); + foreach ($sumSamplesSquare as $classIndex => $class) { + foreach ($class as $index => $feature) { + $ssbn[$index] += $feature / $samplesPerClass[$classIndex]; + } + } + + foreach ($squareSumSamples as $index => $sum) { + $ssbn[$index] -= $sum / $allSamples; + } + + return $ssbn; + } + + private static function calculateSswn(array $ssbn, array $ssAllSamples, array $squareSumSamples, int $allSamples): array + { + $sswn = []; + foreach ($ssAllSamples as $index => $ss) { + $sswn[$index] = ($ss - $squareSumSamples[$index] / $allSamples) - $ssbn[$index]; + } + + return $sswn; + } +} diff --git a/src/Phpml/Math/Statistic/Correlation.php b/src/Math/Statistic/Correlation.php similarity index 50% rename from src/Phpml/Math/Statistic/Correlation.php rename to src/Math/Statistic/Correlation.php index e9c0e30..c730c47 100644 --- a/src/Phpml/Math/Statistic/Correlation.php +++ b/src/Math/Statistic/Correlation.php @@ -1,6 +1,6 @@ $xi) { + $yi = $y[$index]; + $sum += ($xi - $meanX) * ($yi - $meanY); + } + + if ($sample) { + --$n; + } + + return $sum / $n; + } + + /** + * Calculates covariance of two dimensions, i and k in the given data. + * + * @throws InvalidArgumentException + * @throws \Exception + */ + public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float + { + if (count($data) === 0) { + throw new InvalidArgumentException('The array has zero elements'); + } + + $n = count($data); + if ($sample && $n === 1) { + throw new InvalidArgumentException('The array must have at least 2 elements'); + } + + if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) { + throw new InvalidArgumentException('Given indices i and k do not match with the dimensionality of data'); + } + + if ($meanX === null || $meanY === null) { + $x = array_column($data, $i); + $y = array_column($data, $k); + + $meanX = Mean::arithmetic($x); + $meanY = Mean::arithmetic($y); + $sum = 0.0; + foreach ($x as $index => $xi) { + $yi = $y[$index]; + $sum += ($xi - $meanX) * ($yi - $meanY); + } + } else { + // In the case, whole dataset given along with dimension indices, i and k, + // we would like to avoid getting column data with array_column and operate + // over this extra copy of column data for memory efficiency purposes. + // + // Instead we traverse through the whole data and get what we actually need + // without copying the data. This way, memory use will be reduced + // with a slight cost of CPU utilization. + $sum = 0.0; + foreach ($data as $row) { + $val = [0, 0]; + foreach ($row as $index => $col) { + if ($index == $i) { + $val[0] = $col - $meanX; + } + + if ($index == $k) { + $val[1] = $col - $meanY; + } + } + + $sum += $val[0] * $val[1]; + } + } + + if ($sample) { + --$n; + } + + return $sum / $n; + } + + /** + * Returns the covariance matrix of n-dimensional data + * + * @param array|null $means + */ + public static function covarianceMatrix(array $data, ?array $means = null): array + { + $n = count($data[0]); + + if ($means === null) { + $means = []; + for ($i = 0; $i < $n; ++$i) { + $means[] = Mean::arithmetic(array_column($data, $i)); + } + } + + $cov = []; + for ($i = 0; $i < $n; ++$i) { + for ($k = 0; $k < $n; ++$k) { + if ($i > $k) { + $cov[$i][$k] = $cov[$k][$i]; + } else { + $cov[$i][$k] = self::fromDataset( + $data, + $i, + $k, + true, + $means[$i], + $means[$k] + ); + } + } + } + + return $cov; + } +} diff --git a/src/Math/Statistic/Gaussian.php b/src/Math/Statistic/Gaussian.php new file mode 100644 index 0000000..649063d --- /dev/null +++ b/src/Math/Statistic/Gaussian.php @@ -0,0 +1,50 @@ +mean = $mean; + $this->std = $std; + } + + /** + * Returns probability density of the given $value + * + * @return float|int + */ + public function pdf(float $value) + { + // Calculate the probability density by use of normal/Gaussian distribution + // Ref: https://en.wikipedia.org/wiki/Normal_distribution + $std2 = $this->std ** 2; + $mean = $this->mean; + + return exp(-(($value - $mean) ** 2) / (2 * $std2)) / ((2 * $std2 * M_PI) ** .5); + } + + /** + * Returns probability density value of the given $value based on + * given standard deviation and the mean + */ + public static function distributionPdf(float $mean, float $std, float $value): float + { + $normal = new self($mean, $std); + + return $normal->pdf($value); + } +} diff --git a/src/Math/Statistic/Mean.php b/src/Math/Statistic/Mean.php new file mode 100644 index 0000000..2ae55aa --- /dev/null +++ b/src/Math/Statistic/Mean.php @@ -0,0 +1,65 @@ +aggregateClassificationResults($actualLabels, $predictedLabels); + $this->computeMetrics(); + $this->computeAverage($average); + } + + public function getPrecision(): array + { + return $this->precision; + } + + public function getRecall(): array + { + return $this->recall; + } + + public function getF1score(): array + { + return $this->f1score; + } + + public function getSupport(): array + { + return $this->support; + } + + public function getAverage(): array + { + return $this->average; + } + + private function aggregateClassificationResults(array $actualLabels, array $predictedLabels): void + { + $truePositive = $falsePositive = $falseNegative = $support = self::getLabelIndexedArray($actualLabels, $predictedLabels); + + foreach ($actualLabels as $index => $actual) { + $predicted = $predictedLabels[$index]; + ++$support[$actual]; + + if ($actual === $predicted) { + ++$truePositive[$actual]; + } else { + ++$falsePositive[$predicted]; + ++$falseNegative[$actual]; + } + } + + $this->truePositive = $truePositive; + $this->falsePositive = $falsePositive; + $this->falseNegative = $falseNegative; + $this->support = $support; + } + + private function computeMetrics(): void + { + foreach ($this->truePositive as $label => $tp) { + $this->precision[$label] = $this->computePrecision($tp, $this->falsePositive[$label]); + $this->recall[$label] = $this->computeRecall($tp, $this->falseNegative[$label]); + $this->f1score[$label] = $this->computeF1Score((float) $this->precision[$label], (float) $this->recall[$label]); + } + } + + private function computeAverage(int $average): void + { + switch ($average) { + case self::MICRO_AVERAGE: + $this->computeMicroAverage(); + + return; + case self::MACRO_AVERAGE: + $this->computeMacroAverage(); + + return; + case self::WEIGHTED_AVERAGE: + $this->computeWeightedAverage(); + + return; + } + } + + private function computeMicroAverage(): void + { + $truePositive = (int) array_sum($this->truePositive); + $falsePositive = (int) array_sum($this->falsePositive); + $falseNegative = (int) array_sum($this->falseNegative); + + $precision = $this->computePrecision($truePositive, $falsePositive); + $recall = $this->computeRecall($truePositive, $falseNegative); + $f1score = $this->computeF1Score($precision, $recall); + + $this->average = compact('precision', 'recall', 'f1score'); + } + + private function computeMacroAverage(): void + { + foreach (['precision', 'recall', 'f1score'] as $metric) { + $values = $this->{$metric}; + if (count($values) == 0) { + $this->average[$metric] = 0.0; + + continue; + } + + $this->average[$metric] = array_sum($values) / count($values); + } + } + + private function computeWeightedAverage(): void + { + foreach (['precision', 'recall', 'f1score'] as $metric) { + $values = $this->{$metric}; + if (count($values) == 0) { + $this->average[$metric] = 0.0; + + continue; + } + + $sum = 0; + foreach ($values as $i => $value) { + $sum += $value * $this->support[$i]; + } + + $this->average[$metric] = $sum / array_sum($this->support); + } + } + + private function computePrecision(int $truePositive, int $falsePositive): float + { + $divider = $truePositive + $falsePositive; + if ($divider == 0) { + return 0.0; + } + + return $truePositive / $divider; + } + + private function computeRecall(int $truePositive, int $falseNegative): float + { + $divider = $truePositive + $falseNegative; + if ($divider == 0) { + return 0.0; + } + + return $truePositive / $divider; + } + + private function computeF1Score(float $precision, float $recall): float + { + $divider = $precision + $recall; + if ($divider == 0) { + return 0.0; + } + + return 2.0 * (($precision * $recall) / $divider); + } + + private static function getLabelIndexedArray(array $actualLabels, array $predictedLabels): array + { + $labels = array_values(array_unique(array_merge($actualLabels, $predictedLabels))); + sort($labels); + + return (array) array_combine($labels, array_fill(0, count($labels), 0)); + } +} diff --git a/src/Metric/ConfusionMatrix.php b/src/Metric/ConfusionMatrix.php new file mode 100644 index 0000000..5b8021c --- /dev/null +++ b/src/Metric/ConfusionMatrix.php @@ -0,0 +1,53 @@ + $actual) { + $predicted = $predictedLabels[$index]; + + if (!isset($labels[$actual], $labels[$predicted])) { + continue; + } + + if ($predicted === $actual) { + $row = $column = $labels[$actual]; + } else { + $row = $labels[$actual]; + $column = $labels[$predicted]; + } + + ++$matrix[$row][$column]; + } + + return $matrix; + } + + private static function generateMatrixWithZeros(array $labels): array + { + $count = count($labels); + $matrix = []; + + for ($i = 0; $i < $count; ++$i) { + $matrix[$i] = array_fill(0, $count, 0); + } + + return $matrix; + } + + private static function getUniqueLabels(array $labels): array + { + $labels = array_values(array_unique($labels)); + sort($labels); + + return array_flip($labels); + } +} diff --git a/src/Metric/Regression.php b/src/Metric/Regression.php new file mode 100644 index 0000000..c833f6a --- /dev/null +++ b/src/Metric/Regression.php @@ -0,0 +1,86 @@ + $target) { + $errors[] = (($target - $predictions[$index]) ** 2); + } + + return Mean::arithmetic($errors); + } + + public static function meanSquaredLogarithmicError(array $targets, array $predictions): float + { + self::assertCountEquals($targets, $predictions); + + $errors = []; + foreach ($targets as $index => $target) { + $errors[] = log((1 + $target) / (1 + $predictions[$index])) ** 2; + } + + return Mean::arithmetic($errors); + } + + public static function meanAbsoluteError(array $targets, array $predictions): float + { + self::assertCountEquals($targets, $predictions); + + $errors = []; + foreach ($targets as $index => $target) { + $errors[] = abs($target - $predictions[$index]); + } + + return Mean::arithmetic($errors); + } + + public static function medianAbsoluteError(array $targets, array $predictions): float + { + self::assertCountEquals($targets, $predictions); + + $errors = []; + foreach ($targets as $index => $target) { + $errors[] = abs($target - $predictions[$index]); + } + + return (float) Mean::median($errors); + } + + public static function r2Score(array $targets, array $predictions): float + { + self::assertCountEquals($targets, $predictions); + + return Correlation::pearson($targets, $predictions) ** 2; + } + + public static function maxError(array $targets, array $predictions): float + { + self::assertCountEquals($targets, $predictions); + + $errors = []; + foreach ($targets as $index => $target) { + $errors[] = abs($target - $predictions[$index]); + } + + return (float) max($errors); + } + + private static function assertCountEquals(array &$targets, array &$predictions): void + { + if (count($targets) !== count($predictions)) { + throw new InvalidArgumentException('Targets count must be equal with predictions count'); + } + } +} diff --git a/src/ModelManager.php b/src/ModelManager.php new file mode 100644 index 0000000..e0f5be5 --- /dev/null +++ b/src/ModelManager.php @@ -0,0 +1,42 @@ += 0 ? 1.0 : 0.0; + } + + /** + * @param float|int $value + * @param float|int $computedvalue + */ + public function differentiate($value, $computedvalue): float + { + if ($value === 0 || $value === 0.0) { + return 1; + } + + return 0; + } +} diff --git a/src/NeuralNetwork/ActivationFunction/Gaussian.php b/src/NeuralNetwork/ActivationFunction/Gaussian.php new file mode 100644 index 0000000..29cfef2 --- /dev/null +++ b/src/NeuralNetwork/ActivationFunction/Gaussian.php @@ -0,0 +1,27 @@ +beta = $beta; + } + + /** + * @param float|int $value + */ + public function compute($value): float + { + return tanh($this->beta * $value); + } + + /** + * @param float|int $value + * @param float|int $computedvalue + */ + public function differentiate($value, $computedvalue): float + { + return 1 - $computedvalue ** 2; + } +} diff --git a/src/NeuralNetwork/ActivationFunction/PReLU.php b/src/NeuralNetwork/ActivationFunction/PReLU.php new file mode 100644 index 0000000..88212d1 --- /dev/null +++ b/src/NeuralNetwork/ActivationFunction/PReLU.php @@ -0,0 +1,37 @@ +beta = $beta; + } + + /** + * @param float|int $value + */ + public function compute($value): float + { + return $value >= 0 ? $value : $this->beta * $value; + } + + /** + * @param float|int $value + * @param float|int $computedvalue + */ + public function differentiate($value, $computedvalue): float + { + return $computedvalue >= 0 ? 1.0 : $this->beta; + } +} diff --git a/src/NeuralNetwork/ActivationFunction/Sigmoid.php b/src/NeuralNetwork/ActivationFunction/Sigmoid.php new file mode 100644 index 0000000..edad3d6 --- /dev/null +++ b/src/NeuralNetwork/ActivationFunction/Sigmoid.php @@ -0,0 +1,37 @@ +beta = $beta; + } + + /** + * @param float|int $value + */ + public function compute($value): float + { + return 1 / (1 + exp(-$this->beta * $value)); + } + + /** + * @param float|int $value + * @param float|int $computedvalue + */ + public function differentiate($value, $computedvalue): float + { + return $computedvalue * (1 - $computedvalue); + } +} diff --git a/src/NeuralNetwork/ActivationFunction/ThresholdedReLU.php b/src/NeuralNetwork/ActivationFunction/ThresholdedReLU.php new file mode 100644 index 0000000..f8f8247 --- /dev/null +++ b/src/NeuralNetwork/ActivationFunction/ThresholdedReLU.php @@ -0,0 +1,37 @@ +theta = $theta; + } + + /** + * @param float|int $value + */ + public function compute($value): float + { + return $value > $this->theta ? $value : 0.0; + } + + /** + * @param float|int $value + * @param float|int $calculatedvalue + */ + public function differentiate($value, $calculatedvalue): float + { + return $calculatedvalue >= $this->theta ? 1.0 : 0.0; + } +} diff --git a/src/NeuralNetwork/Layer.php b/src/NeuralNetwork/Layer.php new file mode 100644 index 0000000..1c67c04 --- /dev/null +++ b/src/NeuralNetwork/Layer.php @@ -0,0 +1,52 @@ +nodes[] = $this->createNode($nodeClass, $activationFunction); + } + } + + public function addNode(Node $node): void + { + $this->nodes[] = $node; + } + + /** + * @return Node[] + */ + public function getNodes(): array + { + return $this->nodes; + } + + private function createNode(string $nodeClass, ?ActivationFunction $activationFunction = null): Node + { + if ($nodeClass === Neuron::class) { + return new Neuron($activationFunction); + } + + return new $nodeClass(); + } +} diff --git a/src/NeuralNetwork/Network.php b/src/NeuralNetwork/Network.php new file mode 100644 index 0000000..0b0ce65 --- /dev/null +++ b/src/NeuralNetwork/Network.php @@ -0,0 +1,22 @@ +layers[] = $layer; + } + + /** + * @return Layer[] + */ + public function getLayers(): array + { + return $this->layers; + } + + public function removeLayers(): void + { + unset($this->layers); + } + + public function getOutputLayer(): Layer + { + return $this->layers[count($this->layers) - 1]; + } + + public function getOutput(): array + { + $result = []; + foreach ($this->getOutputLayer()->getNodes() as $neuron) { + $result[] = $neuron->getOutput(); + } + + return $result; + } + + /** + * @param mixed $input + */ + public function setInput($input): Network + { + $firstLayer = $this->layers[0]; + + foreach ($firstLayer->getNodes() as $key => $neuron) { + if ($neuron instanceof Input) { + $neuron->setInput($input[$key]); + } + } + + foreach ($this->getLayers() as $layer) { + foreach ($layer->getNodes() as $node) { + if ($node instanceof Neuron) { + $node->reset(); + } + } + } + + return $this; + } +} diff --git a/src/NeuralNetwork/Network/MultilayerPerceptron.php b/src/NeuralNetwork/Network/MultilayerPerceptron.php new file mode 100644 index 0000000..beefb1e --- /dev/null +++ b/src/NeuralNetwork/Network/MultilayerPerceptron.php @@ -0,0 +1,231 @@ +classes = array_values($classes); + $this->iterations = $iterations; + $this->inputLayerFeatures = $inputLayerFeatures; + $this->hiddenLayers = $hiddenLayers; + $this->activationFunction = $activationFunction; + $this->learningRate = $learningRate; + + $this->initNetwork(); + } + + public function train(array $samples, array $targets): void + { + $this->reset(); + $this->initNetwork(); + $this->partialTrain($samples, $targets, $this->classes); + } + + /** + * @throws InvalidArgumentException + */ + public function partialTrain(array $samples, array $targets, array $classes = []): void + { + if (count($classes) > 0 && array_values($classes) !== $this->classes) { + // We require the list of classes in the constructor. + throw new InvalidArgumentException( + 'The provided classes don\'t match the classes provided in the constructor' + ); + } + + for ($i = 0; $i < $this->iterations; ++$i) { + $this->trainSamples($samples, $targets); + } + } + + public function setLearningRate(float $learningRate): void + { + $this->learningRate = $learningRate; + $this->backpropagation->setLearningRate($this->learningRate); + } + + public function getOutput(): array + { + $result = []; + foreach ($this->getOutputLayer()->getNodes() as $i => $neuron) { + $result[$this->classes[$i]] = $neuron->getOutput(); + } + + return $result; + } + + public function getLearningRate(): float + { + return $this->learningRate; + } + + public function getBackpropagation(): Backpropagation + { + return $this->backpropagation; + } + + /** + * @param mixed $target + */ + abstract protected function trainSample(array $sample, $target): void; + + /** + * @return mixed + */ + abstract protected function predictSample(array $sample); + + protected function reset(): void + { + $this->removeLayers(); + } + + private function initNetwork(): void + { + $this->addInputLayer($this->inputLayerFeatures); + $this->addNeuronLayers($this->hiddenLayers, $this->activationFunction); + + // Sigmoid function for the output layer as we want a value from 0 to 1. + $sigmoid = new Sigmoid(); + $this->addNeuronLayers([count($this->classes)], $sigmoid); + + $this->addBiasNodes(); + $this->generateSynapses(); + + $this->backpropagation = new Backpropagation($this->learningRate); + } + + private function addInputLayer(int $nodes): void + { + $this->addLayer(new Layer($nodes, Input::class)); + } + + private function addNeuronLayers(array $layers, ?ActivationFunction $defaultActivationFunction = null): void + { + foreach ($layers as $layer) { + if (is_array($layer)) { + $function = $layer[1] instanceof ActivationFunction ? $layer[1] : $defaultActivationFunction; + $this->addLayer(new Layer($layer[0], Neuron::class, $function)); + } elseif ($layer instanceof Layer) { + $this->addLayer($layer); + } else { + $this->addLayer(new Layer($layer, Neuron::class, $defaultActivationFunction)); + } + } + } + + private function generateSynapses(): void + { + $layersNumber = count($this->layers) - 1; + for ($i = 0; $i < $layersNumber; ++$i) { + $currentLayer = $this->layers[$i]; + $nextLayer = $this->layers[$i + 1]; + $this->generateLayerSynapses($nextLayer, $currentLayer); + } + } + + private function addBiasNodes(): void + { + $biasLayers = count($this->layers) - 1; + for ($i = 0; $i < $biasLayers; ++$i) { + $this->layers[$i]->addNode(new Bias()); + } + } + + private function generateLayerSynapses(Layer $nextLayer, Layer $currentLayer): void + { + foreach ($nextLayer->getNodes() as $nextNeuron) { + if ($nextNeuron instanceof Neuron) { + $this->generateNeuronSynapses($currentLayer, $nextNeuron); + } + } + } + + private function generateNeuronSynapses(Layer $currentLayer, Neuron $nextNeuron): void + { + foreach ($currentLayer->getNodes() as $currentNeuron) { + $nextNeuron->addSynapse(new Synapse($currentNeuron)); + } + } + + private function trainSamples(array $samples, array $targets): void + { + foreach ($targets as $key => $target) { + $this->trainSample($samples[$key], $target); + } + } +} diff --git a/src/NeuralNetwork/Node.php b/src/NeuralNetwork/Node.php new file mode 100644 index 0000000..0b7726f --- /dev/null +++ b/src/NeuralNetwork/Node.php @@ -0,0 +1,10 @@ +input = $input; + } + + public function getOutput(): float + { + return $this->input; + } + + public function setInput(float $input): void + { + $this->input = $input; + } +} diff --git a/src/NeuralNetwork/Node/Neuron.php b/src/NeuralNetwork/Node/Neuron.php new file mode 100644 index 0000000..6681e66 --- /dev/null +++ b/src/NeuralNetwork/Node/Neuron.php @@ -0,0 +1,76 @@ +activationFunction = $activationFunction ?? new Sigmoid(); + } + + public function addSynapse(Synapse $synapse): void + { + $this->synapses[] = $synapse; + } + + /** + * @return Synapse[] + */ + public function getSynapses(): array + { + return $this->synapses; + } + + public function getOutput(): float + { + if ($this->output === 0.0) { + $this->z = 0; + foreach ($this->synapses as $synapse) { + $this->z += $synapse->getOutput(); + } + + $this->output = $this->activationFunction->compute($this->z); + } + + return $this->output; + } + + public function getDerivative(): float + { + return $this->activationFunction->differentiate($this->z, $this->output); + } + + public function reset(): void + { + $this->output = 0.0; + $this->z = 0.0; + } +} diff --git a/src/NeuralNetwork/Node/Neuron/Synapse.php b/src/NeuralNetwork/Node/Neuron/Synapse.php new file mode 100644 index 0000000..d749937 --- /dev/null +++ b/src/NeuralNetwork/Node/Neuron/Synapse.php @@ -0,0 +1,54 @@ +node = $node; + $this->weight = $weight ?? $this->generateRandomWeight(); + } + + public function getOutput(): float + { + return $this->weight * $this->node->getOutput(); + } + + public function changeWeight(float $delta): void + { + $this->weight += $delta; + } + + public function getWeight(): float + { + return $this->weight; + } + + public function getNode(): Node + { + return $this->node; + } + + protected function generateRandomWeight(): float + { + return (1 / random_int(5, 25) * random_int(0, 1)) > 0 ? -1 : 1; + } +} diff --git a/src/NeuralNetwork/Training/Backpropagation.php b/src/NeuralNetwork/Training/Backpropagation.php new file mode 100644 index 0000000..69a3e2a --- /dev/null +++ b/src/NeuralNetwork/Training/Backpropagation.php @@ -0,0 +1,100 @@ +setLearningRate($learningRate); + } + + public function setLearningRate(float $learningRate): void + { + $this->learningRate = $learningRate; + } + + public function getLearningRate(): float + { + return $this->learningRate; + } + + /** + * @param mixed $targetClass + */ + public function backpropagate(array $layers, $targetClass): void + { + $layersNumber = count($layers); + + // Backpropagation. + for ($i = $layersNumber; $i > 1; --$i) { + $this->sigmas = []; + foreach ($layers[$i - 1]->getNodes() as $key => $neuron) { + if ($neuron instanceof Neuron) { + $sigma = $this->getSigma($neuron, $targetClass, $key, $i == $layersNumber); + foreach ($neuron->getSynapses() as $synapse) { + $synapse->changeWeight($this->learningRate * $sigma * $synapse->getNode()->getOutput()); + } + } + } + + $this->prevSigmas = $this->sigmas; + } + + // Clean some memory (also it helps make MLP persistency & children more maintainable). + $this->sigmas = []; + $this->prevSigmas = []; + } + + private function getSigma(Neuron $neuron, int $targetClass, int $key, bool $lastLayer): float + { + $neuronOutput = $neuron->getOutput(); + $sigma = $neuron->getDerivative(); + + if ($lastLayer) { + $value = 0; + if ($targetClass === $key) { + $value = 1; + } + + $sigma *= ($value - $neuronOutput); + } else { + $sigma *= $this->getPrevSigma($neuron); + } + + $this->sigmas[] = new Sigma($neuron, $sigma); + + return $sigma; + } + + private function getPrevSigma(Neuron $neuron): float + { + $sigma = 0.0; + + foreach ($this->prevSigmas as $neuronSigma) { + $sigma += $neuronSigma->getSigmaForNeuron($neuron); + } + + return $sigma; + } +} diff --git a/src/NeuralNetwork/Training/Backpropagation/Sigma.php b/src/NeuralNetwork/Training/Backpropagation/Sigma.php new file mode 100644 index 0000000..f21c7b1 --- /dev/null +++ b/src/NeuralNetwork/Training/Backpropagation/Sigma.php @@ -0,0 +1,49 @@ +neuron = $neuron; + $this->sigma = $sigma; + } + + public function getNeuron(): Neuron + { + return $this->neuron; + } + + public function getSigma(): float + { + return $this->sigma; + } + + public function getSigmaForNeuron(Neuron $neuron): float + { + $sigma = 0.0; + + foreach ($this->neuron->getSynapses() as $synapse) { + if ($synapse->getNode() == $neuron) { + $sigma += $synapse->getWeight() * $this->getSigma(); + } + } + + return $sigma; + } +} diff --git a/src/Phpml/Classification/Classifier.php b/src/Phpml/Classification/Classifier.php deleted file mode 100644 index 00e6779..0000000 --- a/src/Phpml/Classification/Classifier.php +++ /dev/null @@ -1,21 +0,0 @@ -labels as $index => $label) { - $predictions[$label] = 0; - foreach ($sample as $token => $count) { - if (array_key_exists($token, $this->samples[$index])) { - $predictions[$label] += $count * $this->samples[$index][$token]; - } - } - } - - arsort($predictions, SORT_NUMERIC); - reset($predictions); - - return key($predictions); - } -} diff --git a/src/Phpml/Classification/SupportVectorMachine.php b/src/Phpml/Classification/SupportVectorMachine.php deleted file mode 100644 index 5eb84e6..0000000 --- a/src/Phpml/Classification/SupportVectorMachine.php +++ /dev/null @@ -1,78 +0,0 @@ -kernel = $kernel; - $this->C = $C; - $this->tolerance = $tolerance; - $this->upperBound = $upperBound; - - $this->binPath = realpath(implode(DIRECTORY_SEPARATOR, array(dirname(__FILE__), '..', '..', '..', 'bin'))) . DIRECTORY_SEPARATOR; - } - - /** - * @param array $samples - * @param array $labels - */ - public function train(array $samples, array $labels) - { - $this->samples = $samples; - $this->labels = $labels; - } - - /** - * @param array $sample - * - * @return mixed - */ - protected function predictSample(array $sample) - { - } -} diff --git a/src/Phpml/Classification/Traits/Predictable.php b/src/Phpml/Classification/Traits/Predictable.php deleted file mode 100644 index 804b54a..0000000 --- a/src/Phpml/Classification/Traits/Predictable.php +++ /dev/null @@ -1,34 +0,0 @@ -predictSample($samples); - } else { - $predicted = []; - foreach ($samples as $index => $sample) { - $predicted[$index] = $this->predictSample($sample); - } - } - - return $predicted; - } - - /** - * @param array $sample - * - * @return mixed - */ - abstract protected function predictSample(array $sample); -} diff --git a/src/Phpml/Classification/Traits/Trainable.php b/src/Phpml/Classification/Traits/Trainable.php deleted file mode 100644 index 8fa97f2..0000000 --- a/src/Phpml/Classification/Traits/Trainable.php +++ /dev/null @@ -1,28 +0,0 @@ -samples = $samples; - $this->labels = $labels; - } -} diff --git a/src/Phpml/Clustering/Clusterer.php b/src/Phpml/Clustering/Clusterer.php deleted file mode 100644 index 8324b41..0000000 --- a/src/Phpml/Clustering/Clusterer.php +++ /dev/null @@ -1,15 +0,0 @@ -epsilon = $epsilon; - $this->minSamples = $minSamples; - $this->distanceMetric = $distanceMetric; - } - - /** - * @param array $samples - * - * @return array - */ - public function cluster(array $samples) - { - $clusters = []; - $visited = []; - - foreach ($samples as $index => $sample) { - if (isset($visited[$index])) { - continue; - } - $visited[$index] = true; - - $regionSamples = $this->getSamplesInRegion($sample, $samples); - if (count($regionSamples) >= $this->minSamples) { - $clusters[] = $this->expandCluster($regionSamples, $visited); - } - } - - return $clusters; - } - - /** - * @param array $localSample - * @param array $samples - * - * @return array - */ - private function getSamplesInRegion($localSample, $samples) - { - $region = []; - - foreach ($samples as $index => $sample) { - if ($this->distanceMetric->distance($localSample, $sample) < $this->epsilon) { - $region[$index] = $sample; - } - } - - return $region; - } - - /** - * @param array $samples - * @param array $visited - * - * @return array - */ - private function expandCluster($samples, &$visited) - { - $cluster = []; - - foreach ($samples as $index => $sample) { - if (!isset($visited[$index])) { - $visited[$index] = true; - $regionSamples = $this->getSamplesInRegion($sample, $samples); - if (count($regionSamples) > $this->minSamples) { - $cluster = array_merge($regionSamples, $cluster); - } - } - - $cluster[] = $sample; - } - - return $cluster; - } -} diff --git a/src/Phpml/CrossValidation/RandomSplit.php b/src/Phpml/CrossValidation/RandomSplit.php deleted file mode 100644 index c5a24bd..0000000 --- a/src/Phpml/CrossValidation/RandomSplit.php +++ /dev/null @@ -1,105 +0,0 @@ -= $testSize || 1 <= $testSize) { - throw InvalidArgumentException::percentNotInRange('testSize'); - } - $this->seedGenerator($seed); - - $samples = $dataset->getSamples(); - $labels = $dataset->getLabels(); - $datasetSize = count($samples); - - for ($i = $datasetSize; $i > 0; --$i) { - $key = mt_rand(0, $datasetSize - 1); - $setName = count($this->testSamples) / $datasetSize >= $testSize ? 'train' : 'test'; - - $this->{$setName.'Samples'}[] = $samples[$key]; - $this->{$setName.'Labels'}[] = $labels[$key]; - - $samples = array_values($samples); - $labels = array_values($labels); - } - } - - /** - * @return array - */ - public function getTrainSamples() - { - return $this->trainSamples; - } - - /** - * @return array - */ - public function getTestSamples() - { - return $this->testSamples; - } - - /** - * @return array - */ - public function getTrainLabels() - { - return $this->trainLabels; - } - - /** - * @return array - */ - public function getTestLabels() - { - return $this->testLabels; - } - - /** - * @param int|null $seed - */ - private function seedGenerator(int $seed = null) - { - if (null === $seed) { - mt_srand(); - } else { - mt_srand($seed); - } - } -} diff --git a/src/Phpml/Dataset/ArrayDataset.php b/src/Phpml/Dataset/ArrayDataset.php deleted file mode 100644 index 7c5c2b5..0000000 --- a/src/Phpml/Dataset/ArrayDataset.php +++ /dev/null @@ -1,52 +0,0 @@ -samples = $samples; - $this->labels = $labels; - } - - /** - * @return array - */ - public function getSamples(): array - { - return $this->samples; - } - - /** - * @return array - */ - public function getLabels(): array - { - return $this->labels; - } -} diff --git a/src/Phpml/Dataset/CsvDataset.php b/src/Phpml/Dataset/CsvDataset.php deleted file mode 100644 index 7d1f91e..0000000 --- a/src/Phpml/Dataset/CsvDataset.php +++ /dev/null @@ -1,43 +0,0 @@ -samples[] = array_slice($data, 0, $features); - $this->labels[] = $data[$features]; - } - fclose($handle); - } -} diff --git a/src/Phpml/Dataset/Dataset.php b/src/Phpml/Dataset/Dataset.php deleted file mode 100644 index 2bc4043..0000000 --- a/src/Phpml/Dataset/Dataset.php +++ /dev/null @@ -1,19 +0,0 @@ -tokenizer = $tokenizer; - $this->minDF = $minDF; - $this->vocabulary = []; - $this->frequencies = []; - } - - /** - * @param array $samples - * - * @return array - */ - public function transform(array $samples): array - { - foreach ($samples as $index => $sample) { - $samples[$index] = $this->transformSample($sample); - } - - $samples = $this->checkDocumentFrequency($samples); - - return $samples; - } - - /** - * @return array - */ - public function getVocabulary() - { - return array_flip($this->vocabulary); - } - - /** - * @param string $sample - * - * @return array - */ - private function transformSample(string $sample) - { - $counts = []; - $tokens = $this->tokenizer->tokenize($sample); - foreach ($tokens as $token) { - $index = $this->getTokenIndex($token); - $this->updateFrequency($token); - if (!isset($counts[$index])) { - $counts[$index] = 0; - } - - ++$counts[$index]; - } - - return $counts; - } - - /** - * @param string $token - * - * @return mixed - */ - private function getTokenIndex(string $token) - { - if (!isset($this->vocabulary[$token])) { - $this->vocabulary[$token] = count($this->vocabulary); - } - - return $this->vocabulary[$token]; - } - - /** - * @param string $token - */ - private function updateFrequency(string $token) - { - if (!isset($this->frequencies[$token])) { - $this->frequencies[$token] = 0; - } - - ++$this->frequencies[$token]; - } - - /** - * @param array $samples - * - * @return array - */ - private function checkDocumentFrequency(array $samples) - { - if ($this->minDF > 0) { - $beyondMinimum = $this->getBeyondMinimumIndexes(count($samples)); - foreach ($samples as $index => $sample) { - $samples[$index] = $this->unsetBeyondMinimum($sample, $beyondMinimum); - } - } - - return $samples; - } - - /** - * @param array $sample - * @param array $beyondMinimum - * - * @return array - */ - private function unsetBeyondMinimum(array $sample, array $beyondMinimum) - { - foreach ($beyondMinimum as $index) { - unset($sample[$index]); - } - - return $sample; - } - - /** - * @param int $samplesCount - * - * @return array - */ - private function getBeyondMinimumIndexes(int $samplesCount) - { - $indexes = []; - foreach ($this->frequencies as $token => $frequency) { - if (($frequency / $samplesCount) < $this->minDF) { - $indexes[] = $this->getTokenIndex($token); - } - } - - return $indexes; - } -} diff --git a/src/Phpml/FeatureExtraction/Vectorizer.php b/src/Phpml/FeatureExtraction/Vectorizer.php deleted file mode 100644 index 04a8bea..0000000 --- a/src/Phpml/FeatureExtraction/Vectorizer.php +++ /dev/null @@ -1,15 +0,0 @@ -lambda = $lambda; - } - - /** - * @param array $a - * @param array $b - * - * @return float - * - * @throws InvalidArgumentException - */ - public function distance(array $a, array $b): float - { - if (count($a) !== count($b)) { - throw InvalidArgumentException::arraySizeNotMatch(); - } - - $distance = 0; - $count = count($a); - - for ($i = 0; $i < $count; ++$i) { - $distance += pow(abs($a[$i] - $b[$i]), $this->lambda); - } - - return pow($distance, 1 / $this->lambda); - } -} diff --git a/src/Phpml/Math/Matrix.php b/src/Phpml/Math/Matrix.php deleted file mode 100644 index 208b10d..0000000 --- a/src/Phpml/Math/Matrix.php +++ /dev/null @@ -1,273 +0,0 @@ -rows = count($matrix); - $this->columns = count($matrix[0]); - - if ($validate) { - for ($i = 0; $i < $this->rows; ++$i) { - if (count($matrix[$i]) !== $this->columns) { - throw InvalidArgumentException::matrixDimensionsDidNotMatch(); - } - } - } - - $this->matrix = $matrix; - } - - /** - * @param array $array - * - * @return Matrix - */ - public static function fromFlatArray(array $array) - { - $matrix = []; - foreach ($array as $value) { - $matrix[] = [$value]; - } - - return new self($matrix); - } - - /** - * @return array - */ - public function toArray() - { - return $this->matrix; - } - - /** - * @return int - */ - public function getRows() - { - return $this->rows; - } - - /** - * @return int - */ - public function getColumns() - { - return $this->columns; - } - - /** - * @param $column - * - * @return array - * - * @throws MatrixException - */ - public function getColumnValues($column) - { - if ($column >= $this->columns) { - throw MatrixException::columnOutOfRange(); - } - - $values = []; - for ($i = 0; $i < $this->rows; ++$i) { - $values[] = $this->matrix[$i][$column]; - } - - return $values; - } - - /** - * @return float|int - * - * @throws MatrixException - */ - public function getDeterminant() - { - if ($this->determinant) { - return $this->determinant; - } - - if (!$this->isSquare()) { - throw MatrixException::notSquareMatrix(); - } - - return $this->determinant = $this->calculateDeterminant(); - } - - /** - * @return float|int - * - * @throws MatrixException - */ - private function calculateDeterminant() - { - $determinant = 0; - if ($this->rows == 1 && $this->columns == 1) { - $determinant = $this->matrix[0][0]; - } elseif ($this->rows == 2 && $this->columns == 2) { - $determinant = - $this->matrix[0][0] * $this->matrix[1][1] - - $this->matrix[0][1] * $this->matrix[1][0]; - } else { - for ($j = 0; $j < $this->columns; ++$j) { - $subMatrix = $this->crossOut(0, $j); - $minor = $this->matrix[0][$j] * $subMatrix->getDeterminant(); - $determinant += fmod((float) $j, 2.0) == 0 ? $minor : -$minor; - } - } - - return $determinant; - } - - /** - * @return bool - */ - public function isSquare() - { - return $this->columns === $this->rows; - } - - /** - * @return Matrix - */ - public function transpose() - { - $newMatrix = []; - for ($i = 0; $i < $this->rows; ++$i) { - for ($j = 0; $j < $this->columns; ++$j) { - $newMatrix[$j][$i] = $this->matrix[$i][$j]; - } - } - - return new self($newMatrix, false); - } - - /** - * @param Matrix $matrix - * - * @return Matrix - * - * @throws InvalidArgumentException - */ - public function multiply(Matrix $matrix) - { - if ($this->columns != $matrix->getRows()) { - throw InvalidArgumentException::inconsistentMatrixSupplied(); - } - - $product = []; - $multiplier = $matrix->toArray(); - for ($i = 0; $i < $this->rows; ++$i) { - for ($j = 0; $j < $matrix->getColumns(); ++$j) { - $product[$i][$j] = 0; - for ($k = 0; $k < $this->columns; ++$k) { - $product[$i][$j] += $this->matrix[$i][$k] * $multiplier[$k][$j]; - } - } - } - - return new self($product, false); - } - - /** - * @param $value - * - * @return Matrix - */ - public function divideByScalar($value) - { - $newMatrix = array(); - for ($i = 0; $i < $this->rows; ++$i) { - for ($j = 0; $j < $this->columns; ++$j) { - $newMatrix[$i][$j] = $this->matrix[$i][$j] / $value; - } - } - - return new self($newMatrix, false); - } - - /** - * @return Matrix - * - * @throws MatrixException - */ - public function inverse() - { - if (!$this->isSquare()) { - throw MatrixException::notSquareMatrix(); - } - - $newMatrix = array(); - for ($i = 0; $i < $this->rows; ++$i) { - for ($j = 0; $j < $this->columns; ++$j) { - $minor = $this->crossOut($i, $j)->getDeterminant(); - $newMatrix[$i][$j] = fmod((float) ($i + $j), 2.0) == 0 ? $minor : -$minor; - } - } - - $cofactorMatrix = new self($newMatrix, false); - - return $cofactorMatrix->transpose()->divideByScalar($this->getDeterminant()); - } - - /** - * @param int $row - * @param int $column - * - * @return Matrix - */ - public function crossOut(int $row, int $column) - { - $newMatrix = []; - $r = 0; - for ($i = 0; $i < $this->rows; ++$i) { - $c = 0; - if ($row != $i) { - for ($j = 0; $j < $this->columns; ++$j) { - if ($column != $j) { - $newMatrix[$r][$c] = $this->matrix[$i][$j]; - ++$c; - } - } - ++$r; - } - } - - return new self($newMatrix, false); - } -} diff --git a/src/Phpml/Math/Statistic/Mean.php b/src/Phpml/Math/Statistic/Mean.php deleted file mode 100644 index 2716b78..0000000 --- a/src/Phpml/Math/Statistic/Mean.php +++ /dev/null @@ -1,18 +0,0 @@ -transformers = array_map(static function (Transformer $transformer): Transformer { + return $transformer; + }, $transformers); + $this->estimator = $estimator; + } + + /** + * @return Transformer[] + */ + public function getTransformers(): array + { + return $this->transformers; + } + + public function getEstimator(): ?Estimator + { + return $this->estimator; + } + + public function train(array $samples, array $targets): void + { + if ($this->estimator === null) { + throw new InvalidOperationException('Pipeline without estimator can\'t use train method'); + } + + foreach ($this->transformers as $transformer) { + $transformer->fit($samples, $targets); + $transformer->transform($samples, $targets); + } + + $this->estimator->train($samples, $targets); + } + + /** + * @return mixed + */ + public function predict(array $samples) + { + $this->transform($samples); + + if ($this->estimator === null) { + throw new InvalidOperationException('Pipeline without estimator can\'t use predict method'); + } + + return $this->estimator->predict($samples); + } + + public function fit(array $samples, ?array $targets = null): void + { + foreach ($this->transformers as $transformer) { + $transformer->fit($samples, $targets); + $transformer->transform($samples, $targets); + } + } + + public function transform(array &$samples, ?array &$targets = null): void + { + foreach ($this->transformers as $transformer) { + $transformer->transform($samples, $targets); + } + } +} diff --git a/src/Preprocessing/ColumnFilter.php b/src/Preprocessing/ColumnFilter.php new file mode 100644 index 0000000..afe2db7 --- /dev/null +++ b/src/Preprocessing/ColumnFilter.php @@ -0,0 +1,42 @@ +datasetColumns = array_map(static function (string $column): string { + return $column; + }, $datasetColumns); + $this->filterColumns = array_map(static function (string $column): string { + return $column; + }, $filterColumns); + } + + public function fit(array $samples, ?array $targets = null): void + { + //nothing to do + } + + public function transform(array &$samples, ?array &$targets = null): void + { + $keys = array_intersect($this->datasetColumns, $this->filterColumns); + + foreach ($samples as &$sample) { + $sample = array_values(array_intersect_key($sample, $keys)); + } + } +} diff --git a/src/Preprocessing/Imputer.php b/src/Preprocessing/Imputer.php new file mode 100644 index 0000000..88ee2dd --- /dev/null +++ b/src/Preprocessing/Imputer.php @@ -0,0 +1,87 @@ +missingValue = $missingValue; + $this->strategy = $strategy; + $this->axis = $axis; + $this->samples = $samples; + } + + public function fit(array $samples, ?array $targets = null): void + { + $this->samples = $samples; + } + + public function transform(array &$samples, ?array &$targets = null): void + { + if ($this->samples === []) { + throw new InvalidOperationException('Missing training samples for Imputer.'); + } + + foreach ($samples as &$sample) { + $this->preprocessSample($sample); + } + } + + private function preprocessSample(array &$sample): void + { + foreach ($sample as $column => &$value) { + if ($value === $this->missingValue) { + $value = $this->strategy->replaceValue($this->getAxis($column, $sample)); + } + } + } + + private function getAxis(int $column, array $currentSample): array + { + if ($this->axis === self::AXIS_ROW) { + return array_diff($currentSample, [$this->missingValue]); + } + + $axis = []; + foreach ($this->samples as $sample) { + if ($sample[$column] !== $this->missingValue) { + $axis[] = $sample[$column]; + } + } + + return $axis; + } +} diff --git a/src/Preprocessing/Imputer/Strategy.php b/src/Preprocessing/Imputer/Strategy.php new file mode 100644 index 0000000..96397c1 --- /dev/null +++ b/src/Preprocessing/Imputer/Strategy.php @@ -0,0 +1,13 @@ +classes = []; + + foreach ($samples as $sample) { + if (!isset($this->classes[(string) $sample])) { + $this->classes[(string) $sample] = count($this->classes); + } + } + } + + public function transform(array &$samples, ?array &$targets = null): void + { + foreach ($samples as &$sample) { + $sample = $this->classes[(string) $sample]; + } + } + + public function inverseTransform(array &$samples): void + { + $classes = array_flip($this->classes); + foreach ($samples as &$sample) { + $sample = $classes[$sample]; + } + } + + /** + * @return string[] + */ + public function classes(): array + { + return array_keys($this->classes); + } +} diff --git a/src/Preprocessing/LambdaTransformer.php b/src/Preprocessing/LambdaTransformer.php new file mode 100644 index 0000000..f6b5a8b --- /dev/null +++ b/src/Preprocessing/LambdaTransformer.php @@ -0,0 +1,30 @@ +lambda = $lambda; + } + + public function fit(array $samples, ?array $targets = null): void + { + // nothing to do + } + + public function transform(array &$samples, ?array &$targets = null): void + { + foreach ($samples as &$sample) { + $sample = call_user_func($this->lambda, $sample); + } + } +} diff --git a/src/Preprocessing/Normalizer.php b/src/Preprocessing/Normalizer.php new file mode 100644 index 0000000..5ba43e6 --- /dev/null +++ b/src/Preprocessing/Normalizer.php @@ -0,0 +1,131 @@ +norm = $norm; + } + + public function fit(array $samples, ?array $targets = null): void + { + if ($this->fitted) { + return; + } + + if ($this->norm === self::NORM_STD) { + $features = range(0, count($samples[0]) - 1); + foreach ($features as $i) { + $values = array_column($samples, $i); + $this->std[$i] = StandardDeviation::population($values); + $this->mean[$i] = Mean::arithmetic($values); + } + } + + $this->fitted = true; + } + + public function transform(array &$samples, ?array &$targets = null): void + { + $methods = [ + self::NORM_L1 => 'normalizeL1', + self::NORM_L2 => 'normalizeL2', + self::NORM_STD => 'normalizeSTD', + ]; + $method = $methods[$this->norm]; + + $this->fit($samples); + + foreach ($samples as &$sample) { + $this->{$method}($sample); + } + } + + private function normalizeL1(array &$sample): void + { + $norm1 = 0; + foreach ($sample as $feature) { + $norm1 += abs($feature); + } + + if ($norm1 == 0) { + $count = count($sample); + $sample = array_fill(0, $count, 1.0 / $count); + } else { + array_walk($sample, function (&$feature) use ($norm1): void { + $feature /= $norm1; + }); + } + } + + private function normalizeL2(array &$sample): void + { + $norm2 = 0; + foreach ($sample as $feature) { + $norm2 += $feature * $feature; + } + + $norm2 **= .5; + + if ($norm2 == 0) { + $sample = array_fill(0, count($sample), 1); + } else { + array_walk($sample, function (&$feature) use ($norm2): void { + $feature /= $norm2; + }); + } + } + + private function normalizeSTD(array &$sample): void + { + foreach (array_keys($sample) as $i) { + if ($this->std[$i] != 0) { + $sample[$i] = ($sample[$i] - $this->mean[$i]) / $this->std[$i]; + } else { + // Same value for all samples. + $sample[$i] = 0; + } + } + } +} diff --git a/src/Preprocessing/NumberConverter.php b/src/Preprocessing/NumberConverter.php new file mode 100644 index 0000000..68247b1 --- /dev/null +++ b/src/Preprocessing/NumberConverter.php @@ -0,0 +1,47 @@ +transformTargets = $transformTargets; + $this->nonNumericPlaceholder = $nonNumericPlaceholder; + } + + public function fit(array $samples, ?array $targets = null): void + { + //nothing to do + } + + public function transform(array &$samples, ?array &$targets = null): void + { + foreach ($samples as &$sample) { + foreach ($sample as &$feature) { + $feature = is_numeric($feature) ? (float) $feature : $this->nonNumericPlaceholder; + } + } + + if ($this->transformTargets && is_array($targets)) { + foreach ($targets as &$target) { + $target = is_numeric($target) ? (float) $target : $this->nonNumericPlaceholder; + } + } + } +} diff --git a/src/Preprocessing/OneHotEncoder.php b/src/Preprocessing/OneHotEncoder.php new file mode 100644 index 0000000..c9d4d0a --- /dev/null +++ b/src/Preprocessing/OneHotEncoder.php @@ -0,0 +1,66 @@ +ignoreUnknown = $ignoreUnknown; + } + + public function fit(array $samples, ?array $targets = null): void + { + foreach (array_keys(array_values(current($samples))) as $column) { + $this->fitColumn($column, array_values(array_unique(array_column($samples, $column)))); + } + } + + public function transform(array &$samples, ?array &$targets = null): void + { + foreach ($samples as &$sample) { + $sample = $this->transformSample(array_values($sample)); + } + } + + private function fitColumn(int $column, array $values): void + { + $count = count($values); + foreach ($values as $index => $value) { + $map = array_fill(0, $count, 0); + $map[$index] = 1; + $this->categories[$column][$value] = $map; + } + } + + private function transformSample(array $sample): array + { + $encoded = []; + foreach ($sample as $column => $feature) { + if (!isset($this->categories[$column][$feature]) && !$this->ignoreUnknown) { + throw new InvalidArgumentException(sprintf('Missing category "%s" for column %s in trained encoder', $feature, $column)); + } + + $encoded = array_merge( + $encoded, + $this->categories[$column][$feature] ?? array_fill(0, count($this->categories[$column]), 0) + ); + } + + return $encoded; + } +} diff --git a/src/Preprocessing/Preprocessor.php b/src/Preprocessing/Preprocessor.php new file mode 100644 index 0000000..3ec1566 --- /dev/null +++ b/src/Preprocessing/Preprocessor.php @@ -0,0 +1,11 @@ +maxFeatures = $maxFeatures; + $this->tolerance = $tolerance; + + parent::__construct($maxDepth, $maxLeafSize, $minPurityIncrease); + } + + public function train(array $samples, array $targets): void + { + $features = count($samples[0]); + + $this->columns = range(0, $features - 1); + $this->maxFeatures = $this->maxFeatures ?? (int) round(sqrt($features)); + + $this->grow($samples, $targets); + + $this->columns = []; + } + + public function predict(array $samples) + { + if ($this->bare()) { + throw new InvalidOperationException('Regressor must be trained first'); + } + + $predictions = []; + + foreach ($samples as $sample) { + $node = $this->search($sample); + + $predictions[] = $node instanceof AverageNode + ? $node->outcome() + : null; + } + + return $predictions; + } + + protected function split(array $samples, array $targets): DecisionNode + { + $bestVariance = INF; + $bestColumn = $bestValue = null; + $bestGroups = []; + + shuffle($this->columns); + + foreach (array_slice($this->columns, 0, $this->maxFeatures) as $column) { + $values = array_unique(array_column($samples, $column)); + + foreach ($values as $value) { + $groups = $this->partition($column, $value, $samples, $targets); + + $variance = $this->splitImpurity($groups); + + if ($variance < $bestVariance) { + $bestColumn = $column; + $bestValue = $value; + $bestGroups = $groups; + $bestVariance = $variance; + } + + if ($variance <= $this->tolerance) { + break 2; + } + } + } + + return new DecisionNode($bestColumn, $bestValue, $bestGroups, $bestVariance); + } + + protected function terminate(array $targets): BinaryNode + { + return new AverageNode(Mean::arithmetic($targets), Variance::population($targets), count($targets)); + } + + protected function splitImpurity(array $groups): float + { + $samplesCount = (int) array_sum(array_map(static function (array $group): int { + return count($group[0]); + }, $groups)); + + $impurity = 0.; + + foreach ($groups as $group) { + $k = count($group[1]); + + if ($k < 2) { + continue 1; + } + + $variance = Variance::population($group[1]); + + $impurity += ($k / $samplesCount) * $variance; + } + + return $impurity; + } + + /** + * @param int|float $value + */ + private function partition(int $column, $value, array $samples, array $targets): array + { + $leftSamples = $leftTargets = $rightSamples = $rightTargets = []; + foreach ($samples as $index => $sample) { + if ($sample[$column] < $value) { + $leftSamples[] = $sample; + $leftTargets[] = $targets[$index]; + } else { + $rightSamples[] = $sample; + $rightTargets[] = $targets[$index]; + } + } + + return [ + [$leftSamples, $leftTargets], + [$rightSamples, $rightTargets], + ]; + } +} diff --git a/src/Phpml/Regression/LeastSquares.php b/src/Regression/LeastSquares.php similarity index 68% rename from src/Phpml/Regression/LeastSquares.php rename to src/Regression/LeastSquares.php index cd0251f..d00ebf5 100644 --- a/src/Phpml/Regression/LeastSquares.php +++ b/src/Regression/LeastSquares.php @@ -1,22 +1,25 @@ samples = $samples; - $this->targets = $targets; + $this->samples = array_merge($this->samples, $samples); + $this->targets = array_merge($this->targets, $targets); $this->computeCoefficients(); } /** - * @param array $sample - * * @return mixed */ - public function predict($sample) + public function predictSample(array $sample) { $result = $this->intercept; foreach ($this->coefficients as $index => $coefficient) { @@ -55,18 +52,12 @@ class LeastSquares implements Regression return $result; } - /** - * @return array - */ - public function getCoefficients() + public function getCoefficients(): array { return $this->coefficients; } - /** - * @return float - */ - public function getIntercept() + public function getIntercept(): float { return $this->intercept; } @@ -74,7 +65,7 @@ class LeastSquares implements Regression /** * coefficient(b) = (X'X)-1X'Y. */ - private function computeCoefficients() + private function computeCoefficients(): void { $samplesMatrix = $this->getSamplesMatrix(); $targetsMatrix = $this->getTargetsMatrix(); @@ -88,10 +79,8 @@ class LeastSquares implements Regression /** * Add one dimension for intercept calculation. - * - * @return Matrix */ - private function getSamplesMatrix() + private function getSamplesMatrix(): Matrix { $samples = []; foreach ($this->samples as $sample) { @@ -102,10 +91,7 @@ class LeastSquares implements Regression return new Matrix($samples); } - /** - * @return Matrix - */ - private function getTargetsMatrix() + private function getTargetsMatrix(): Matrix { if (is_array($this->targets[0])) { return new Matrix($this->targets); diff --git a/src/Regression/Regression.php b/src/Regression/Regression.php new file mode 100644 index 0000000..542685c --- /dev/null +++ b/src/Regression/Regression.php @@ -0,0 +1,11 @@ + $label) { + $set .= sprintf('%s %s %s', ($targets ? $label : $numericLabels[$label]), self::sampleRow($samples[$index]), PHP_EOL); + } + + return $set; + } + + public static function testSet(array $samples): string + { + if (count($samples) === 0) { + throw new InvalidArgumentException('The array has zero elements'); + } + + if (!is_array($samples[0])) { + $samples = [$samples]; + } + + $set = ''; + foreach ($samples as $sample) { + $set .= sprintf('0 %s %s', self::sampleRow($sample), PHP_EOL); + } + + return $set; + } + + public static function predictions(string $rawPredictions, array $labels): array + { + $numericLabels = self::numericLabels($labels); + $results = []; + foreach (explode(PHP_EOL, $rawPredictions) as $result) { + if (isset($result[0])) { + $results[] = array_search((int) $result, $numericLabels, true); + } + } + + return $results; + } + + public static function probabilities(string $rawPredictions, array $labels): array + { + $numericLabels = self::numericLabels($labels); + + $predictions = explode(PHP_EOL, trim($rawPredictions)); + + $header = array_shift($predictions); + $headerColumns = explode(' ', (string) $header); + array_shift($headerColumns); + + $columnLabels = []; + foreach ($headerColumns as $numericLabel) { + $columnLabels[] = array_search((int) $numericLabel, $numericLabels, true); + } + + $results = []; + foreach ($predictions as $rawResult) { + $probabilities = explode(' ', $rawResult); + array_shift($probabilities); + + $result = []; + foreach ($probabilities as $i => $prob) { + $result[$columnLabels[$i]] = (float) $prob; + } + + $results[] = $result; + } + + return $results; + } + + public static function numericLabels(array $labels): array + { + $numericLabels = []; + foreach ($labels as $label) { + if (isset($numericLabels[$label])) { + continue; + } + + $numericLabels[$label] = count($numericLabels); + } + + return $numericLabels; + } + + private static function sampleRow(array $sample): string + { + $row = []; + foreach ($sample as $index => $feature) { + $row[] = sprintf('%s:%F', $index + 1, $feature); + } + + return implode(' ', $row); + } +} diff --git a/src/SupportVectorMachine/Kernel.php b/src/SupportVectorMachine/Kernel.php new file mode 100644 index 0000000..af76e6d --- /dev/null +++ b/src/SupportVectorMachine/Kernel.php @@ -0,0 +1,28 @@ +type = $type; + $this->kernel = $kernel; + $this->cost = $cost; + $this->nu = $nu; + $this->degree = $degree; + $this->gamma = $gamma; + $this->coef0 = $coef0; + $this->epsilon = $epsilon; + $this->tolerance = $tolerance; + $this->cacheSize = $cacheSize; + $this->shrinking = $shrinking; + $this->probabilityEstimates = $probabilityEstimates; + + $rootPath = realpath(implode(DIRECTORY_SEPARATOR, [__DIR__, '..', '..'])).DIRECTORY_SEPARATOR; + + $this->binPath = $rootPath.'bin'.DIRECTORY_SEPARATOR.'libsvm'.DIRECTORY_SEPARATOR; + $this->varPath = $rootPath.'var'.DIRECTORY_SEPARATOR; + } + + public function setBinPath(string $binPath): void + { + $this->ensureDirectorySeparator($binPath); + $this->verifyBinPath($binPath); + + $this->binPath = $binPath; + } + + public function setVarPath(string $varPath): void + { + if (!is_writable($varPath)) { + throw new InvalidArgumentException(sprintf('The specified path "%s" is not writable', $varPath)); + } + + $this->ensureDirectorySeparator($varPath); + $this->varPath = $varPath; + } + + public function train(array $samples, array $targets): void + { + $this->samples = array_merge($this->samples, $samples); + $this->targets = array_merge($this->targets, $targets); + + $trainingSet = DataTransformer::trainingSet($this->samples, $this->targets, in_array($this->type, [Type::EPSILON_SVR, Type::NU_SVR], true)); + file_put_contents($trainingSetFileName = $this->varPath.uniqid('phpml', true), $trainingSet); + $modelFileName = $trainingSetFileName.'-model'; + + $command = $this->buildTrainCommand($trainingSetFileName, $modelFileName); + $output = []; + exec(escapeshellcmd($command).' 2>&1', $output, $return); + + unlink($trainingSetFileName); + + if ($return !== 0) { + throw new LibsvmCommandException( + sprintf('Failed running libsvm command: "%s" with reason: "%s"', $command, array_pop($output)) + ); + } + + $this->model = (string) file_get_contents($modelFileName); + + unlink($modelFileName); + } + + public function getModel(): string + { + return $this->model; + } + + /** + * @return array|string + * + * @throws LibsvmCommandException + */ + public function predict(array $samples) + { + $predictions = $this->runSvmPredict($samples, false); + + if (in_array($this->type, [Type::C_SVC, Type::NU_SVC], true)) { + $predictions = DataTransformer::predictions($predictions, $this->targets); + } else { + $predictions = explode(PHP_EOL, trim($predictions)); + } + + if (!is_array($samples[0])) { + return $predictions[0]; + } + + return $predictions; + } + + /** + * @return array|string + * + * @throws LibsvmCommandException + */ + public function predictProbability(array $samples) + { + if (!$this->probabilityEstimates) { + throw new InvalidOperationException('Model does not support probabiliy estimates'); + } + + $predictions = $this->runSvmPredict($samples, true); + + if (in_array($this->type, [Type::C_SVC, Type::NU_SVC], true)) { + $predictions = DataTransformer::probabilities($predictions, $this->targets); + } else { + $predictions = explode(PHP_EOL, trim($predictions)); + } + + if (!is_array($samples[0])) { + return $predictions[0]; + } + + return $predictions; + } + + private function runSvmPredict(array $samples, bool $probabilityEstimates): string + { + $testSet = DataTransformer::testSet($samples); + file_put_contents($testSetFileName = $this->varPath.uniqid('phpml', true), $testSet); + file_put_contents($modelFileName = $testSetFileName.'-model', $this->model); + $outputFileName = $testSetFileName.'-output'; + + $command = $this->buildPredictCommand( + $testSetFileName, + $modelFileName, + $outputFileName, + $probabilityEstimates + ); + $output = []; + exec(escapeshellcmd($command).' 2>&1', $output, $return); + + unlink($testSetFileName); + unlink($modelFileName); + $predictions = (string) file_get_contents($outputFileName); + + unlink($outputFileName); + + if ($return !== 0) { + throw new LibsvmCommandException( + sprintf('Failed running libsvm command: "%s" with reason: "%s"', $command, array_pop($output)) + ); + } + + return $predictions; + } + + private function getOSExtension(): string + { + $os = strtoupper(substr(PHP_OS, 0, 3)); + if ($os === 'WIN') { + return '.exe'; + } elseif ($os === 'DAR') { + return '-osx'; + } + + return ''; + } + + private function buildTrainCommand(string $trainingSetFileName, string $modelFileName): string + { + return sprintf( + '%ssvm-train%s -s %s -t %s -c %s -n %F -d %s%s -r %s -p %F -m %F -e %F -h %d -b %d %s %s', + $this->binPath, + $this->getOSExtension(), + $this->type, + $this->kernel, + $this->cost, + $this->nu, + $this->degree, + $this->gamma !== null ? ' -g '.$this->gamma : '', + $this->coef0, + $this->epsilon, + $this->cacheSize, + $this->tolerance, + $this->shrinking, + $this->probabilityEstimates, + escapeshellarg($trainingSetFileName), + escapeshellarg($modelFileName) + ); + } + + private function buildPredictCommand( + string $testSetFileName, + string $modelFileName, + string $outputFileName, + bool $probabilityEstimates + ): string { + return sprintf( + '%ssvm-predict%s -b %d %s %s %s', + $this->binPath, + $this->getOSExtension(), + $probabilityEstimates ? 1 : 0, + escapeshellarg($testSetFileName), + escapeshellarg($modelFileName), + escapeshellarg($outputFileName) + ); + } + + private function ensureDirectorySeparator(string &$path): void + { + if (substr($path, -1) !== DIRECTORY_SEPARATOR) { + $path .= DIRECTORY_SEPARATOR; + } + } + + private function verifyBinPath(string $path): void + { + if (!is_dir($path)) { + throw new InvalidArgumentException(sprintf('The specified path "%s" does not exist', $path)); + } + + $osExtension = $this->getOSExtension(); + foreach (['svm-predict', 'svm-scale', 'svm-train'] as $filename) { + $filePath = $path.$filename.$osExtension; + if (!file_exists($filePath)) { + throw new InvalidArgumentException(sprintf('File "%s" not found', $filePath)); + } + + if (!is_executable($filePath)) { + throw new InvalidArgumentException(sprintf('File "%s" is not executable', $filePath)); + } + } + } +} diff --git a/src/SupportVectorMachine/Type.php b/src/SupportVectorMachine/Type.php new file mode 100644 index 0000000..1dea9df --- /dev/null +++ b/src/SupportVectorMachine/Type.php @@ -0,0 +1,33 @@ + $maxGram) { + throw new InvalidArgumentException(sprintf('Invalid (%s, %s) minGram and maxGram value combination', $minGram, $maxGram)); + } + + $this->minGram = $minGram; + $this->maxGram = $maxGram; + } + + /** + * {@inheritdoc} + */ + public function tokenize(string $text): array + { + $words = []; + preg_match_all('/\w\w+/u', $text, $words); + + $nGrams = []; + foreach ($words[0] as $word) { + $this->generateNGrams($word, $nGrams); + } + + return $nGrams; + } + + private function generateNGrams(string $word, array &$nGrams): void + { + $length = mb_strlen($word); + + for ($j = 1; $j <= $this->maxGram; $j++) { + for ($k = 0; $k < $length - $j + 1; $k++) { + if ($j >= $this->minGram) { + $nGrams[] = mb_substr($word, $k, $j); + } + } + } + } +} diff --git a/src/Tokenization/NGramWordTokenizer.php b/src/Tokenization/NGramWordTokenizer.php new file mode 100644 index 0000000..20ee28c --- /dev/null +++ b/src/Tokenization/NGramWordTokenizer.php @@ -0,0 +1,64 @@ + $maxGram) { + throw new InvalidArgumentException(sprintf('Invalid (%s, %s) minGram and maxGram value combination', $minGram, $maxGram)); + } + + $this->minGram = $minGram; + $this->maxGram = $maxGram; + } + + /** + * {@inheritdoc} + */ + public function tokenize(string $text): array + { + preg_match_all('/\w\w+/u', $text, $words); + + $words = $words[0]; + + $nGrams = []; + for ($j = $this->minGram; $j <= $this->maxGram; $j++) { + $nGrams = array_merge($nGrams, $this->getNgrams($words, $j)); + } + + return $nGrams; + } + + private function getNgrams(array $match, int $n = 2): array + { + $ngrams = []; + $len = count($match); + for ($i = 0; $i < $len; $i++) { + if ($i > ($n - 2)) { + $ng = ''; + for ($j = $n - 1; $j >= 0; $j--) { + $ng .= ' '.$match[$i - $j]; + } + $ngrams[] = trim($ng); + } + } + + return $ngrams; + } +} diff --git a/src/Phpml/Tokenization/Tokenizer.php b/src/Tokenization/Tokenizer.php similarity index 53% rename from src/Phpml/Tokenization/Tokenizer.php rename to src/Tokenization/Tokenizer.php index 5539a85..f2dffd9 100644 --- a/src/Phpml/Tokenization/Tokenizer.php +++ b/src/Tokenization/Tokenizer.php @@ -1,15 +1,10 @@ maxDepth = $maxDepth; + $this->maxLeafSize = $maxLeafSize; + $this->minPurityIncrease = $minPurityIncrease; + } + + public function root(): ?DecisionNode + { + return $this->root; + } + + public function height(): int + { + return $this->root !== null ? $this->root->height() : 0; + } + + public function balance(): int + { + return $this->root !== null ? $this->root->balance() : 0; + } + + public function bare(): bool + { + return $this->root === null; + } + + public function grow(array $samples, array $targets): void + { + $this->featureCount = count($samples[0]); + $depth = 1; + $this->root = $this->split($samples, $targets); + $stack = [[$this->root, $depth]]; + + while ($stack) { + [$current, $depth] = array_pop($stack) ?? []; + + [$left, $right] = $current->groups(); + + $current->cleanup(); + + $depth++; + + if ($left[1] === [] || $right[1] === []) { + $node = $this->terminate(array_merge($left[1], $right[1])); + + $current->attachLeft($node); + $current->attachRight($node); + + continue 1; + } + + if ($depth >= $this->maxDepth) { + $current->attachLeft($this->terminate($left[1])); + $current->attachRight($this->terminate($right[1])); + + continue 1; + } + + if (count($left[1]) > $this->maxLeafSize) { + $node = $this->split($left[0], $left[1]); + + if ($node->purityIncrease() + 1e-8 > $this->minPurityIncrease) { + $current->attachLeft($node); + + $stack[] = [$node, $depth]; + } else { + $current->attachLeft($this->terminate($left[1])); + } + } else { + $current->attachLeft($this->terminate($left[1])); + } + + if (count($right[1]) > $this->maxLeafSize) { + $node = $this->split($right[0], $right[1]); + + if ($node->purityIncrease() + 1e-8 > $this->minPurityIncrease) { + $current->attachRight($node); + + $stack[] = [$node, $depth]; + } else { + $current->attachRight($this->terminate($right[1])); + } + } else { + $current->attachRight($this->terminate($right[1])); + } + } + } + + public function search(array $sample): ?BinaryNode + { + $current = $this->root; + + while ($current) { + if ($current instanceof DecisionNode) { + $value = $current->value(); + + if (is_string($value)) { + if ($sample[$current->column()] === $value) { + $current = $current->left(); + } else { + $current = $current->right(); + } + } else { + if ($sample[$current->column()] < $value) { + $current = $current->left(); + } else { + $current = $current->right(); + } + } + + continue 1; + } + + if ($current instanceof LeafNode) { + break 1; + } + } + + return $current; + } + + abstract protected function split(array $samples, array $targets): DecisionNode; + + abstract protected function terminate(array $targets): BinaryNode; +} diff --git a/src/Tree/Node.php b/src/Tree/Node.php new file mode 100644 index 0000000..3176b62 --- /dev/null +++ b/src/Tree/Node.php @@ -0,0 +1,9 @@ +outcome = $outcome; + $this->impurity = $impurity; + $this->samplesCount = $samplesCount; + } + + public function outcome(): float + { + return $this->outcome; + } + + public function impurity(): float + { + return $this->impurity; + } + + public function samplesCount(): int + { + return $this->samplesCount; + } +} diff --git a/src/Tree/Node/BinaryNode.php b/src/Tree/Node/BinaryNode.php new file mode 100644 index 0000000..c6797b5 --- /dev/null +++ b/src/Tree/Node/BinaryNode.php @@ -0,0 +1,83 @@ +parent; + } + + public function left(): ?self + { + return $this->left; + } + + public function right(): ?self + { + return $this->right; + } + + public function height(): int + { + return 1 + max($this->left !== null ? $this->left->height() : 0, $this->right !== null ? $this->right->height() : 0); + } + + public function balance(): int + { + return ($this->right !== null ? $this->right->height() : 0) - ($this->left !== null ? $this->left->height() : 0); + } + + public function setParent(?self $node = null): void + { + $this->parent = $node; + } + + public function attachLeft(self $node): void + { + $node->setParent($this); + $this->left = $node; + } + + public function detachLeft(): void + { + if ($this->left !== null) { + $this->left->setParent(); + $this->left = null; + } + } + + public function attachRight(self $node): void + { + $node->setParent($this); + $this->right = $node; + } + + public function detachRight(): void + { + if ($this->right !== null) { + $this->right->setParent(); + $this->right = null; + } + } +} diff --git a/src/Tree/Node/DecisionNode.php b/src/Tree/Node/DecisionNode.php new file mode 100644 index 0000000..311e0e7 --- /dev/null +++ b/src/Tree/Node/DecisionNode.php @@ -0,0 +1,107 @@ +column = $column; + $this->value = $value; + $this->groups = $groups; + $this->impurity = $impurity; + $this->samplesCount = (int) array_sum(array_map(static function (array $group): int { + return count($group[0]); + }, $groups)); + } + + public function column(): int + { + return $this->column; + } + + /** + * @return mixed + */ + public function value() + { + return $this->value; + } + + public function groups(): array + { + return $this->groups; + } + + public function impurity(): float + { + return $this->impurity; + } + + public function samplesCount(): int + { + return $this->samplesCount; + } + + public function purityIncrease(): float + { + $impurity = $this->impurity; + + if ($this->left() instanceof PurityNode) { + $impurity -= $this->left()->impurity() + * ($this->left()->samplesCount() / $this->samplesCount); + } + + if ($this->right() instanceof PurityNode) { + $impurity -= $this->right()->impurity() + * ($this->right()->samplesCount() / $this->samplesCount); + } + + return $impurity; + } + + public function cleanup(): void + { + $this->groups = [[], []]; + } +} diff --git a/src/Tree/Node/LeafNode.php b/src/Tree/Node/LeafNode.php new file mode 100644 index 0000000..ebb848e --- /dev/null +++ b/src/Tree/Node/LeafNode.php @@ -0,0 +1,9 @@ +train($this->sampleGreek, []); + + $predicted = $apriori->predict([['alpha', 'epsilon'], ['beta', 'theta']]); + + self::assertCount(2, $predicted); + self::assertEquals([['beta']], $predicted[0]); + self::assertEquals([['alpha']], $predicted[1]); + } + + public function testPowerSet(): void + { + $apriori = new Apriori(); + + self::assertCount(8, self::invoke($apriori, 'powerSet', [['a', 'b', 'c']])); + } + + public function testApriori(): void + { + $apriori = new Apriori(3 / 7); + $apriori->train($this->sampleBasket, []); + + $L = $apriori->apriori(); + + self::assertCount(4, $L[2]); + self::assertTrue(self::invoke($apriori, 'contains', [$L[2], [1, 2]])); + self::assertFalse(self::invoke($apriori, 'contains', [$L[2], [1, 3]])); + self::assertFalse(self::invoke($apriori, 'contains', [$L[2], [1, 4]])); + self::assertTrue(self::invoke($apriori, 'contains', [$L[2], [2, 3]])); + self::assertTrue(self::invoke($apriori, 'contains', [$L[2], [2, 4]])); + self::assertTrue(self::invoke($apriori, 'contains', [$L[2], [3, 4]])); + } + + public function testAprioriEmpty(): void + { + $sample = []; + + $apriori = new Apriori(0, 0); + $apriori->train($sample, []); + + $L = $apriori->apriori(); + + self::assertEmpty($L); + } + + public function testAprioriSingleItem(): void + { + $sample = [['a']]; + + $apriori = new Apriori(0, 0); + $apriori->train($sample, []); + + $L = $apriori->apriori(); + + self::assertEquals([1], array_keys($L)); + self::assertEquals([['a']], $L[1]); + } + + public function testAprioriL3(): void + { + $sample = [['a', 'b', 'c']]; + + $apriori = new Apriori(0, 0); + $apriori->train($sample, []); + + $L = $apriori->apriori(); + + self::assertEquals([['a', 'b', 'c']], $L[3]); + } + + public function testGetRules(): void + { + $apriori = new Apriori(0.4, 0.8); + $apriori->train($this->sampleChars, []); + + self::assertCount(19, $apriori->getRules()); + } + + public function testGetRulesSupportAndConfidence(): void + { + $sample = [['a', 'b'], ['a', 'c']]; + + $apriori = new Apriori(0, 0); + $apriori->train($sample, []); + + $rules = $apriori->getRules(); + + self::assertCount(4, $rules); + self::assertContains([ + Apriori::ARRAY_KEY_ANTECEDENT => ['a'], + Apriori::ARRAY_KEY_CONSEQUENT => ['b'], + Apriori::ARRAY_KEY_SUPPORT => 0.5, + Apriori::ARRAY_KEY_CONFIDENCE => 0.5, + ], $rules); + self::assertContains([ + Apriori::ARRAY_KEY_ANTECEDENT => ['b'], + Apriori::ARRAY_KEY_CONSEQUENT => ['a'], + Apriori::ARRAY_KEY_SUPPORT => 0.5, + Apriori::ARRAY_KEY_CONFIDENCE => 1.0, + ], $rules); + } + + public function testAntecedents(): void + { + $apriori = new Apriori(); + + self::assertCount(6, self::invoke($apriori, 'antecedents', [['a', 'b', 'c']])); + } + + public function testItems(): void + { + $apriori = new Apriori(); + $apriori->train($this->sampleGreek, []); + self::assertCount(4, self::invoke($apriori, 'items', [])); + } + + public function testFrequent(): void + { + $apriori = new Apriori(0.51); + $apriori->train($this->sampleGreek, []); + + self::assertCount(0, self::invoke($apriori, 'frequent', [[['epsilon'], ['theta']]])); + self::assertCount(2, self::invoke($apriori, 'frequent', [[['alpha'], ['beta']]])); + } + + public function testCandidates(): void + { + $apriori = new Apriori(); + $apriori->train($this->sampleGreek, []); + + $candidates = self::invoke($apriori, 'candidates', [[['alpha'], ['beta'], ['theta']]]); + + self::assertCount(3, $candidates); + self::assertEquals(['alpha', 'beta'], $candidates[0]); + self::assertEquals(['alpha', 'theta'], $candidates[1]); + self::assertEquals(['beta', 'theta'], $candidates[2]); + } + + public function testConfidence(): void + { + $apriori = new Apriori(); + $apriori->train($this->sampleGreek, []); + + self::assertEquals(0.5, self::invoke($apriori, 'confidence', [['alpha', 'beta', 'theta'], ['alpha', 'beta']])); + self::assertEquals(1, self::invoke($apriori, 'confidence', [['alpha', 'beta'], ['alpha']])); + } + + public function testSupport(): void + { + $apriori = new Apriori(); + $apriori->train($this->sampleGreek, []); + + self::assertEquals(1.0, self::invoke($apriori, 'support', [['alpha', 'beta']])); + self::assertEquals(0.5, self::invoke($apriori, 'support', [['epsilon']])); + } + + public function testFrequency(): void + { + $apriori = new Apriori(); + $apriori->train($this->sampleGreek, []); + + self::assertEquals(4, self::invoke($apriori, 'frequency', [['alpha', 'beta']])); + self::assertEquals(2, self::invoke($apriori, 'frequency', [['epsilon']])); + } + + public function testContains(): void + { + $apriori = new Apriori(); + + self::assertTrue(self::invoke($apriori, 'contains', [[['a'], ['b']], ['a']])); + self::assertTrue(self::invoke($apriori, 'contains', [[[1, 2]], [1, 2]])); + self::assertFalse(self::invoke($apriori, 'contains', [[['a'], ['b']], ['c']])); + } + + public function testSubset(): void + { + $apriori = new Apriori(); + + self::assertTrue(self::invoke($apriori, 'subset', [['a', 'b'], ['a']])); + self::assertTrue(self::invoke($apriori, 'subset', [['a'], ['a']])); + self::assertFalse(self::invoke($apriori, 'subset', [['a'], ['a', 'b']])); + } + + public function testEquals(): void + { + $apriori = new Apriori(); + + self::assertTrue(self::invoke($apriori, 'equals', [['a'], ['a']])); + self::assertFalse(self::invoke($apriori, 'equals', [['a'], []])); + self::assertFalse(self::invoke($apriori, 'equals', [['a'], ['b', 'a']])); + } + + public function testSaveAndRestore(): void + { + $classifier = new Apriori(0.5, 0.5); + $classifier->train($this->sampleGreek, []); + + $testSamples = [['alpha', 'epsilon'], ['beta', 'theta']]; + $predicted = $classifier->predict($testSamples); + + $filename = 'apriori-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } + + /** + * Invokes objects method. Private/protected will be set accessible. + * + * @param string $method Method name to be called + * @param array $params Array of params to be passed + * + * @return mixed + */ + private static function invoke(Apriori $object, string $method, array $params = []) + { + $reflection = new ReflectionClass(get_class($object)); + $method = $reflection->getMethod($method); + $method->setAccessible(true); + + return $method->invokeArgs($object, $params); + } +} diff --git a/tests/Classification/DecisionTree/DecisionTreeLeafTest.php b/tests/Classification/DecisionTree/DecisionTreeLeafTest.php new file mode 100644 index 0000000..05139ee --- /dev/null +++ b/tests/Classification/DecisionTree/DecisionTreeLeafTest.php @@ -0,0 +1,53 @@ +value = 1; + $leaf->columnIndex = 0; + + $rightLeaf = new DecisionTreeLeaf(); + $rightLeaf->value = '<= 2'; + $rightLeaf->columnIndex = 1; + + $leaf->rightLeaf = $rightLeaf; + + self::assertEquals( + '
col_0 =1
Gini: 0.00
 No |
col_1 <= 2
Gini: 0.00
', + $leaf->getHTML() + ); + } + + public function testNodeImpurityDecreaseShouldBeZeroWhenLeafIsTerminal(): void + { + $leaf = new DecisionTreeLeaf(); + $leaf->isTerminal = true; + + self::assertEquals(0.0, $leaf->getNodeImpurityDecrease(1)); + } + + public function testNodeImpurityDecrease(): void + { + $leaf = new DecisionTreeLeaf(); + $leaf->giniIndex = 0.5; + $leaf->records = [1, 2, 3]; + + $leaf->leftLeaf = new DecisionTreeLeaf(); + $leaf->leftLeaf->records = [5, 2]; + + $leaf->rightLeaf = new DecisionTreeLeaf(); + $leaf->rightLeaf->records = []; + $leaf->rightLeaf->giniIndex = 0.3; + + self::assertSame(0.75, $leaf->getNodeImpurityDecrease(2)); + } +} diff --git a/tests/Classification/DecisionTreeTest.php b/tests/Classification/DecisionTreeTest.php new file mode 100644 index 0000000..3f0a763 --- /dev/null +++ b/tests/Classification/DecisionTreeTest.php @@ -0,0 +1,92 @@ +getData($this->data); + $classifier = new DecisionTree(5); + $classifier->train($data, $targets); + self::assertEquals('Dont_play', $classifier->predict(['sunny', 78, 72, 'false'])); + self::assertEquals('Play', $classifier->predict(['overcast', 60, 60, 'false'])); + self::assertEquals('Dont_play', $classifier->predict(['rain', 60, 60, 'true'])); + + [$data, $targets] = $this->getData($this->extraData); + $classifier->train($data, $targets); + self::assertEquals('Dont_play', $classifier->predict(['scorching', 95, 90, 'true'])); + self::assertEquals('Play', $classifier->predict(['overcast', 60, 60, 'false'])); + } + + public function testSaveAndRestore(): void + { + [$data, $targets] = $this->getData($this->data); + $classifier = new DecisionTree(5); + $classifier->train($data, $targets); + + $testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']]; + $predicted = $classifier->predict($testSamples); + + $filename = 'decision-tree-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } + + public function testTreeDepth(): void + { + [$data, $targets] = $this->getData($this->data); + $classifier = new DecisionTree(5); + $classifier->train($data, $targets); + self::assertTrue($classifier->actualDepth <= 5); + } + + private function getData(array $input): array + { + $targets = array_column($input, 4); + array_walk($input, function (&$v): void { + array_splice($v, 4, 1); + }); + + return [$input, $targets]; + } +} diff --git a/tests/Classification/Ensemble/AdaBoostTest.php b/tests/Classification/Ensemble/AdaBoostTest.php new file mode 100644 index 0000000..173df6c --- /dev/null +++ b/tests/Classification/Ensemble/AdaBoostTest.php @@ -0,0 +1,82 @@ +expectException(InvalidArgumentException::class); + $classifier->train($samples, $targets); + } + + public function testPredictSingleSample(): void + { + // AND problem + $samples = [[0.1, 0.3], [1, 0], [0, 1], [1, 1], [0.9, 0.8], [1.1, 1.1]]; + $targets = [0, 0, 0, 1, 1, 1]; + $classifier = new AdaBoost(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.2])); + self::assertEquals(0, $classifier->predict([0.1, 0.99])); + self::assertEquals(1, $classifier->predict([1.1, 0.8])); + + // OR problem + $samples = [[0, 0], [0.1, 0.2], [0.2, 0.1], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 0, 0, 1, 1, 1]; + $classifier = new AdaBoost(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.2])); + self::assertEquals(1, $classifier->predict([0.1, 0.99])); + self::assertEquals(1, $classifier->predict([1.1, 0.8])); + + // XOR problem + $samples = [[0.1, 0.2], [1., 1.], [0.9, 0.8], [0., 1.], [1., 0.], [0.2, 0.8]]; + $targets = [0, 0, 0, 1, 1, 1]; + $classifier = new AdaBoost(5); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.1])); + self::assertEquals(1, $classifier->predict([0, 0.999])); + self::assertEquals(0, $classifier->predict([1.1, 0.8])); + } + + public function testSaveAndRestore(): void + { + // Instantinate new Percetron trained for OR problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 1, 1, 1]; + $classifier = new AdaBoost(); + $classifier->train($samples, $targets); + $testSamples = [[0, 1], [1, 1], [0.2, 0.1]]; + $predicted = $classifier->predict($testSamples); + + $filename = 'adaboost-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } +} diff --git a/tests/Classification/Ensemble/BaggingTest.php b/tests/Classification/Ensemble/BaggingTest.php new file mode 100644 index 0000000..d879fba --- /dev/null +++ b/tests/Classification/Ensemble/BaggingTest.php @@ -0,0 +1,148 @@ +getClassifier(); + + $this->expectException(InvalidArgumentException::class); + $classifier->setSubsetRatio(0); + } + + public function testPredictSingleSample(): void + { + [$data, $targets] = $this->getData($this->data); + $classifier = $this->getClassifier(); + // Testing with default options + $classifier->train($data, $targets); + self::assertEquals('Dont_play', $classifier->predict(['sunny', 78, 72, 'false'])); + self::assertEquals('Play', $classifier->predict(['overcast', 60, 60, 'false'])); + self::assertEquals('Dont_play', $classifier->predict(['rain', 60, 60, 'true'])); + + [$data, $targets] = $this->getData($this->extraData); + $classifier->train($data, $targets); + self::assertEquals('Dont_play', $classifier->predict(['scorching', 95, 90, 'true'])); + self::assertEquals('Play', $classifier->predict(['overcast', 60, 60, 'false'])); + } + + public function testSaveAndRestore(): void + { + [$data, $targets] = $this->getData($this->data); + $classifier = $this->getClassifier(5); + $classifier->train($data, $targets); + + $testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']]; + $predicted = $classifier->predict($testSamples); + + $filename = 'bagging-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } + + public function testBaseClassifiers(): void + { + [$data, $targets] = $this->getData($this->data); + $baseClassifiers = $this->getAvailableBaseClassifiers(); + + foreach ($baseClassifiers as $base => $params) { + $classifier = $this->getClassifier(); + $classifier->setClassifer($base, $params); + $classifier->train($data, $targets); + + $baseClassifier = new $base(...array_values($params)); + $baseClassifier->train($data, $targets); + $testData = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false'], ['rain', 60, 60, 'true']]; + foreach ($testData as $test) { + $result = $classifier->predict($test); + $baseResult = $classifier->predict($test); + self::assertEquals($result, $baseResult); + } + } + } + + /** + * @return Bagging + */ + protected function getClassifier(int $numBaseClassifiers = 50): Classifier + { + $classifier = new Bagging($numBaseClassifiers); + $classifier->setSubsetRatio(1.0); + $classifier->setClassifer(DecisionTree::class, ['depth' => 10]); + + return $classifier; + } + + protected function getAvailableBaseClassifiers(): array + { + return [ + DecisionTree::class => [ + 'depth' => 5, + ], + NaiveBayes::class => [], + ]; + } + + private function getData(array $input): array + { + // Populating input data to a size large enough + // for base classifiers that they can work with a subset of it + $populated = []; + for ($i = 0; $i < 20; ++$i) { + $populated = array_merge($populated, $input); + } + + shuffle($populated); + $targets = array_column($populated, 4); + array_walk($populated, function (&$v): void { + array_splice($v, 4, 1); + }); + + return [$populated, $targets]; + } +} diff --git a/tests/Classification/Ensemble/RandomForestTest.php b/tests/Classification/Ensemble/RandomForestTest.php new file mode 100644 index 0000000..abff973 --- /dev/null +++ b/tests/Classification/Ensemble/RandomForestTest.php @@ -0,0 +1,70 @@ +expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('RandomForest can only use DecisionTree as base classifier'); + + $classifier = new RandomForest(); + $classifier->setClassifer(NaiveBayes::class); + } + + public function testThrowExceptionWithInvalidFeatureSubsetRatioType(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Feature subset ratio must be a string or a float'); + + $classifier = new RandomForest(); + $classifier->setFeatureSubsetRatio(1); + } + + public function testThrowExceptionWithInvalidFeatureSubsetRatioFloat(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('When a float is given, feature subset ratio should be between 0.1 and 1.0'); + + $classifier = new RandomForest(); + $classifier->setFeatureSubsetRatio(1.1); + } + + public function testThrowExceptionWithInvalidFeatureSubsetRatioString(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage("When a string is given, feature subset ratio can only be 'sqrt' or 'log'"); + + $classifier = new RandomForest(); + $classifier->setFeatureSubsetRatio('pow'); + } + + /** + * @return RandomForest + */ + protected function getClassifier(int $numBaseClassifiers = 50): Classifier + { + $classifier = new RandomForest($numBaseClassifiers); + $classifier->setFeatureSubsetRatio('log'); + + return $classifier; + } + + protected function getAvailableBaseClassifiers(): array + { + return [ + DecisionTree::class => [ + 'depth' => 5, + ], + ]; + } +} diff --git a/tests/Classification/KNearestNeighborsTest.php b/tests/Classification/KNearestNeighborsTest.php new file mode 100644 index 0000000..5be9a3d --- /dev/null +++ b/tests/Classification/KNearestNeighborsTest.php @@ -0,0 +1,84 @@ +train($samples, $labels); + + self::assertEquals('b', $classifier->predict([3, 2])); + self::assertEquals('b', $classifier->predict([5, 1])); + self::assertEquals('b', $classifier->predict([4, 3])); + self::assertEquals('b', $classifier->predict([4, -5])); + + self::assertEquals('a', $classifier->predict([2, 3])); + self::assertEquals('a', $classifier->predict([1, 2])); + self::assertEquals('a', $classifier->predict([1, 5])); + self::assertEquals('a', $classifier->predict([3, 10])); + } + + public function testPredictArrayOfSamples(): void + { + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; + $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a']; + + $classifier = new KNearestNeighbors(); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + self::assertEquals($testLabels, $predicted); + } + + public function testPredictArrayOfSamplesUsingChebyshevDistanceMetric(): void + { + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; + $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a']; + + $classifier = new KNearestNeighbors(3, new Chebyshev()); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + self::assertEquals($testLabels, $predicted); + } + + public function testSaveAndRestore(): void + { + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; + + // Using non-default constructor parameters to check that their values are restored. + $classifier = new KNearestNeighbors(3, new Chebyshev()); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + $filename = 'knearest-neighbors-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } +} diff --git a/tests/Classification/Linear/AdalineTest.php b/tests/Classification/Linear/AdalineTest.php new file mode 100644 index 0000000..7bc8f9d --- /dev/null +++ b/tests/Classification/Linear/AdalineTest.php @@ -0,0 +1,100 @@ +expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Adaline can only be trained with batch and online/stochastic gradient descent algorithm'); + + new Adaline( + 0.001, + 1000, + true, + 0 + ); + } + + public function testPredictSingleSample(): void + { + // AND problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 0, 0, 1]; + $classifier = new Adaline(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.2])); + self::assertEquals(0, $classifier->predict([0.1, 0.99])); + self::assertEquals(1, $classifier->predict([1.1, 0.8])); + + // OR problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 1, 1, 1]; + $classifier = new Adaline(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.2])); + self::assertEquals(1, $classifier->predict([0.1, 0.99])); + self::assertEquals(1, $classifier->predict([1.1, 0.8])); + + // By use of One-v-Rest, Adaline can perform multi-class classification + // The samples should be separable by lines perpendicular to the dimensions + $samples = [ + [0, 0], [0, 1], [1, 0], [1, 1], // First group : a cluster at bottom-left corner in 2D + [5, 5], [6, 5], [5, 6], [7, 5], // Second group: another cluster at the middle-right + [3, 10], [3, 10], [3, 8], [3, 9], // Third group : cluster at the top-middle + ]; + $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]; + + $classifier = new Adaline(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.5, 0.5])); + self::assertEquals(1, $classifier->predict([6.0, 5.0])); + self::assertEquals(2, $classifier->predict([3.0, 9.5])); + + // Extra partial training should lead to the same results. + $classifier->partialTrain([[0, 1], [1, 0]], [0, 0], [0, 1, 2]); + self::assertEquals(0, $classifier->predict([0.5, 0.5])); + self::assertEquals(1, $classifier->predict([6.0, 5.0])); + self::assertEquals(2, $classifier->predict([3.0, 9.5])); + + // Train should clear previous data. + $samples = [ + [0, 0], [0, 1], [1, 0], [1, 1], // First group : a cluster at bottom-left corner in 2D + [5, 5], [6, 5], [5, 6], [7, 5], // Second group: another cluster at the middle-right + [3, 10], [3, 10], [3, 8], [3, 9], // Third group : cluster at the top-middle + ]; + $targets = [2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1]; + $classifier->train($samples, $targets); + self::assertEquals(2, $classifier->predict([0.5, 0.5])); + self::assertEquals(0, $classifier->predict([6.0, 5.0])); + self::assertEquals(1, $classifier->predict([3.0, 9.5])); + } + + public function testSaveAndRestore(): void + { + // Instantinate new Percetron trained for OR problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 1, 1, 1]; + $classifier = new Adaline(); + $classifier->train($samples, $targets); + $testSamples = [[0, 1], [1, 1], [0.2, 0.1]]; + $predicted = $classifier->predict($testSamples); + + $filename = 'adaline-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } +} diff --git a/tests/Classification/Linear/DecisionStumpTest.php b/tests/Classification/Linear/DecisionStumpTest.php new file mode 100644 index 0000000..7cd8250 --- /dev/null +++ b/tests/Classification/Linear/DecisionStumpTest.php @@ -0,0 +1,86 @@ +setSampleWeights([0.1, 0.1, 0.1]); + + $this->expectException(InvalidArgumentException::class); + $classifier->train($samples, $targets); + } + + public function testPredictSingleSample(): void + { + // Samples should be separable with a line perpendicular + // to any dimension given in the dataset + // + // First: horizontal test + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 0, 1, 1]; + $classifier = new DecisionStump(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.2])); + self::assertEquals(0, $classifier->predict([1.1, 0.2])); + self::assertEquals(1, $classifier->predict([0.1, 0.99])); + self::assertEquals(1, $classifier->predict([1.1, 0.8])); + + // Then: vertical test + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 1, 0, 1]; + $classifier = new DecisionStump(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.2])); + self::assertEquals(0, $classifier->predict([0.1, 1.1])); + self::assertEquals(1, $classifier->predict([1.0, 0.99])); + self::assertEquals(1, $classifier->predict([1.1, 0.1])); + + // By use of One-v-Rest, DecisionStump can perform multi-class classification + // The samples should be separable by lines perpendicular to the dimensions + $samples = [ + [0, 0], [0, 1], [1, 0], [1, 1], // First group : a cluster at bottom-left corner in 2D + [5, 5], [6, 5], [5, 6], [7, 5], // Second group: another cluster at the middle-right + [3, 10], [3, 10], [3, 8], [3, 9], // Third group : cluster at the top-middle + ]; + $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]; + + $classifier = new DecisionStump(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.5, 0.5])); + self::assertEquals(1, $classifier->predict([6.0, 5.0])); + self::assertEquals(2, $classifier->predict([3.5, 9.5])); + } + + public function testSaveAndRestore(): void + { + // Instantinate new Percetron trained for OR problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 1, 1, 1]; + $classifier = new DecisionStump(); + $classifier->train($samples, $targets); + $testSamples = [[0, 1], [1, 1], [0.2, 0.1]]; + $predicted = $classifier->predict($testSamples); + + $filename = 'dstump-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } +} diff --git a/tests/Classification/Linear/LogisticRegressionTest.php b/tests/Classification/Linear/LogisticRegressionTest.php new file mode 100644 index 0000000..812870c --- /dev/null +++ b/tests/Classification/Linear/LogisticRegressionTest.php @@ -0,0 +1,222 @@ +expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Logistic regression can only be trained with '. + 'batch (gradient descent), online (stochastic gradient descent) '. + 'or conjugate batch (conjugate gradients) algorithms'); + + new LogisticRegression( + 500, + true, + -1, + 'log', + 'L2' + ); + } + + public function testConstructorThrowWhenInvalidCost(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage("Logistic regression cost function can be one of the following: \n". + "'log' for log-likelihood and 'sse' for sum of squared errors"); + + new LogisticRegression( + 500, + true, + LogisticRegression::CONJUGATE_GRAD_TRAINING, + 'invalid', + 'L2' + ); + } + + public function testConstructorThrowWhenInvalidPenalty(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Logistic regression supports only \'L2\' regularization'); + + new LogisticRegression( + 500, + true, + LogisticRegression::CONJUGATE_GRAD_TRAINING, + 'log', + 'invalid' + ); + } + + public function testPredictSingleSample(): void + { + // AND problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]]; + $targets = [0, 0, 0, 1, 0, 1]; + $classifier = new LogisticRegression(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.1])); + self::assertEquals(1, $classifier->predict([0.9, 0.9])); + } + + public function testPredictSingleSampleWithBatchTraining(): void + { + $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]]; + $targets = [0, 0, 0, 1, 0, 1]; + + // $maxIterations is set to 10000 as batch training needs more + // iteration to converge than CG method in general. + $classifier = new LogisticRegression( + 10000, + true, + LogisticRegression::BATCH_TRAINING, + 'log', + 'L2' + ); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.1])); + self::assertEquals(1, $classifier->predict([0.9, 0.9])); + } + + public function testPredictSingleSampleWithOnlineTraining(): void + { + $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]]; + $targets = [0, 0, 0, 1, 0, 1]; + + // $penalty is set to empty (no penalty) because L2 penalty seems to + // prevent convergence in online training for this dataset. + $classifier = new LogisticRegression( + 10000, + true, + LogisticRegression::ONLINE_TRAINING, + 'log', + '' + ); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.1])); + self::assertEquals(1, $classifier->predict([0.9, 0.9])); + } + + public function testPredictSingleSampleWithSSECost(): void + { + $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]]; + $targets = [0, 0, 0, 1, 0, 1]; + $classifier = new LogisticRegression( + 500, + true, + LogisticRegression::CONJUGATE_GRAD_TRAINING, + 'sse', + 'L2' + ); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.1])); + self::assertEquals(1, $classifier->predict([0.9, 0.9])); + } + + public function testPredictSingleSampleWithoutPenalty(): void + { + $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]]; + $targets = [0, 0, 0, 1, 0, 1]; + $classifier = new LogisticRegression( + 500, + true, + LogisticRegression::CONJUGATE_GRAD_TRAINING, + 'log', + '' + ); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.1])); + self::assertEquals(1, $classifier->predict([0.9, 0.9])); + } + + public function testPredictMultiClassSample(): void + { + // By use of One-v-Rest, Perceptron can perform multi-class classification + // The samples should be separable by lines perpendicular to the dimensions + $samples = [ + [0, 0], [0, 1], [1, 0], [1, 1], // First group : a cluster at bottom-left corner in 2D + [5, 5], [6, 5], [5, 6], [7, 5], // Second group: another cluster at the middle-right + [3, 10], [3, 10], [3, 8], [3, 9], // Third group : cluster at the top-middle + ]; + $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]; + + $classifier = new LogisticRegression(); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.5, 0.5])); + self::assertEquals(1, $classifier->predict([6.0, 5.0])); + self::assertEquals(2, $classifier->predict([3.0, 9.5])); + } + + public function testPredictProbabilitySingleSample(): void + { + $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.4, 0.4], [0.6, 0.6]]; + $targets = [0, 0, 0, 1, 0, 1]; + $classifier = new LogisticRegression(); + $classifier->train($samples, $targets); + + $property = new ReflectionProperty($classifier, 'classifiers'); + $property->setAccessible(true); + $predictor = $property->getValue($classifier)[0]; + $method = new ReflectionMethod($predictor, 'predictProbability'); + $method->setAccessible(true); + + $zero = $method->invoke($predictor, [0.1, 0.1], 0); + $one = $method->invoke($predictor, [0.1, 0.1], 1); + self::assertEqualsWithDelta(1, $zero + $one, 1e-6); + self::assertTrue($zero > $one); + + $zero = $method->invoke($predictor, [0.9, 0.9], 0); + $one = $method->invoke($predictor, [0.9, 0.9], 1); + self::assertEqualsWithDelta(1, $zero + $one, 1e-6); + self::assertTrue($zero < $one); + } + + public function testPredictProbabilityMultiClassSample(): void + { + $samples = [ + [0, 0], [0, 1], [1, 0], [1, 1], + [5, 5], [6, 5], [5, 6], [6, 6], + [3, 10], [3, 10], [3, 8], [3, 9], + ]; + $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]; + + $classifier = new LogisticRegression(); + $classifier->train($samples, $targets); + + $property = new ReflectionProperty($classifier, 'classifiers'); + $property->setAccessible(true); + + $predictor = $property->getValue($classifier)[0]; + $method = new ReflectionMethod($predictor, 'predictProbability'); + $method->setAccessible(true); + $zero = $method->invoke($predictor, [3.0, 9.5], 0); + $not_zero = $method->invoke($predictor, [3.0, 9.5], 'not_0'); + + $predictor = $property->getValue($classifier)[1]; + $method = new ReflectionMethod($predictor, 'predictProbability'); + $method->setAccessible(true); + $one = $method->invoke($predictor, [3.0, 9.5], 1); + $not_one = $method->invoke($predictor, [3.0, 9.5], 'not_1'); + + $predictor = $property->getValue($classifier)[2]; + $method = new ReflectionMethod($predictor, 'predictProbability'); + $method->setAccessible(true); + $two = $method->invoke($predictor, [3.0, 9.5], 2); + $not_two = $method->invoke($predictor, [3.0, 9.5], 'not_2'); + + self::assertEqualsWithDelta(1, $zero + $not_zero, 1e-6); + self::assertEqualsWithDelta(1, $one + $not_one, 1e-6); + self::assertEqualsWithDelta(1, $two + $not_two, 1e-6); + self::assertTrue($zero < $two); + self::assertTrue($one < $two); + } +} diff --git a/tests/Classification/Linear/PerceptronTest.php b/tests/Classification/Linear/PerceptronTest.php new file mode 100644 index 0000000..fa118f3 --- /dev/null +++ b/tests/Classification/Linear/PerceptronTest.php @@ -0,0 +1,104 @@ +expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Learning rate should be a float value between 0.0(exclusive) and 1.0(inclusive)'); + new Perceptron(0, 5000); + } + + public function testPerceptronThrowWhenMaxIterationsOutOfRange(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Maximum number of iterations must be an integer greater than 0'); + new Perceptron(0.001, 0); + } + + public function testPredictSingleSample(): void + { + // AND problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1], [0.6, 0.6]]; + $targets = [0, 0, 0, 1, 1]; + $classifier = new Perceptron(0.001, 5000); + $classifier->setEarlyStop(false); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.1, 0.2])); + self::assertEquals(0, $classifier->predict([0, 1])); + self::assertEquals(1, $classifier->predict([1.1, 0.8])); + + // OR problem + $samples = [[0.1, 0.1], [0.4, 0.], [0., 0.3], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 0, 0, 1, 1, 1]; + $classifier = new Perceptron(0.001, 5000, false); + $classifier->setEarlyStop(false); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0., 0.])); + self::assertEquals(1, $classifier->predict([0.1, 0.99])); + self::assertEquals(1, $classifier->predict([1.1, 0.8])); + + // By use of One-v-Rest, Perceptron can perform multi-class classification + // The samples should be separable by lines perpendicular to the dimensions + $samples = [ + [0, 0], [0, 1], [1, 0], [1, 1], // First group : a cluster at bottom-left corner in 2D + [5, 5], [6, 5], [5, 6], [7, 5], // Second group: another cluster at the middle-right + [3, 10], [3, 10], [3, 8], [3, 9], // Third group : cluster at the top-middle + ]; + $targets = [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]; + + $classifier = new Perceptron(); + $classifier->setEarlyStop(false); + $classifier->train($samples, $targets); + self::assertEquals(0, $classifier->predict([0.5, 0.5])); + self::assertEquals(1, $classifier->predict([6.0, 5.0])); + self::assertEquals(2, $classifier->predict([3.0, 9.5])); + + // Extra partial training should lead to the same results. + $classifier->partialTrain([[0, 1], [1, 0]], [0, 0], [0, 1, 2]); + self::assertEquals(0, $classifier->predict([0.5, 0.5])); + self::assertEquals(1, $classifier->predict([6.0, 5.0])); + self::assertEquals(2, $classifier->predict([3.0, 9.5])); + + // Train should clear previous data. + $samples = [ + [0, 0], [0, 1], [1, 0], [1, 1], // First group : a cluster at bottom-left corner in 2D + [5, 5], [6, 5], [5, 6], [7, 5], // Second group: another cluster at the middle-right + [3, 10], [3, 10], [3, 8], [3, 9], // Third group : cluster at the top-middle + ]; + $targets = [2, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 1]; + $classifier->train($samples, $targets); + self::assertEquals(2, $classifier->predict([0.5, 0.5])); + self::assertEquals(0, $classifier->predict([6.0, 5.0])); + self::assertEquals(1, $classifier->predict([3.0, 9.5])); + } + + public function testSaveAndRestore(): void + { + // Instantinate new Percetron trained for OR problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 1, 1, 1]; + $classifier = new Perceptron(); + $classifier->train($samples, $targets); + $testSamples = [[0, 1], [1, 1], [0.2, 0.1]]; + $predicted = $classifier->predict($testSamples); + + $filename = 'perceptron-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } +} diff --git a/tests/Classification/MLPClassifierTest.php b/tests/Classification/MLPClassifierTest.php new file mode 100644 index 0000000..2998ac9 --- /dev/null +++ b/tests/Classification/MLPClassifierTest.php @@ -0,0 +1,264 @@ +getLayers()); + + $layers = $mlp->getLayers(); + + // input layer + self::assertCount(3, $layers[0]->getNodes()); + self::assertNotContainsOnly(Neuron::class, $layers[0]->getNodes()); + + // hidden layer + self::assertCount(3, $layers[1]->getNodes()); + self::assertNotContainsOnly(Neuron::class, $layers[1]->getNodes()); + + // output layer + self::assertCount(2, $layers[2]->getNodes()); + self::assertContainsOnly(Neuron::class, $layers[2]->getNodes()); + } + + public function testSynapsesGeneration(): void + { + $mlp = new MLPClassifier(2, [2], [0, 1]); + $layers = $mlp->getLayers(); + + foreach ($layers[1]->getNodes() as $node) { + if ($node instanceof Neuron) { + $synapses = $node->getSynapses(); + self::assertCount(3, $synapses); + + $synapsesNodes = $this->getSynapsesNodes($synapses); + foreach ($layers[0]->getNodes() as $prevNode) { + self::assertContains($prevNode, $synapsesNodes); + } + } + } + } + + public function testBackpropagationLearning(): void + { + // Single layer 2 classes. + $network = new MLPClassifier(2, [2], ['a', 'b'], 1000); + $network->train( + [[1, 0], [0, 1], [1, 1], [0, 0]], + ['a', 'b', 'a', 'b'] + ); + + self::assertEquals('a', $network->predict([1, 0])); + self::assertEquals('b', $network->predict([0, 1])); + self::assertEquals('a', $network->predict([1, 1])); + self::assertEquals('b', $network->predict([0, 0])); + } + + public function testBackpropagationTrainingReset(): void + { + // Single layer 2 classes. + $network = new MLPClassifier(2, [2], ['a', 'b'], 1000); + $network->train( + [[1, 0], [0, 1]], + ['a', 'b'] + ); + + self::assertEquals('a', $network->predict([1, 0])); + self::assertEquals('b', $network->predict([0, 1])); + + $network->train( + [[1, 0], [0, 1]], + ['b', 'a'] + ); + + self::assertEquals('b', $network->predict([1, 0])); + self::assertEquals('a', $network->predict([0, 1])); + } + + public function testBackpropagationPartialTraining(): void + { + // Single layer 2 classes. + $network = new MLPClassifier(2, [2], ['a', 'b'], 1000); + $network->partialTrain( + [[1, 0], [0, 1]], + ['a', 'b'] + ); + + self::assertEquals('a', $network->predict([1, 0])); + self::assertEquals('b', $network->predict([0, 1])); + + $network->partialTrain( + [[1, 1], [0, 0]], + ['a', 'b'] + ); + + self::assertEquals('a', $network->predict([1, 0])); + self::assertEquals('b', $network->predict([0, 1])); + self::assertEquals('a', $network->predict([1, 1])); + self::assertEquals('b', $network->predict([0, 0])); + } + + public function testBackpropagationLearningMultilayer(): void + { + // Multi-layer 2 classes. + $network = new MLPClassifier(5, [3, 2], ['a', 'b', 'c'], 2000); + $network->train( + [[1, 0, 0, 0, 0], [0, 1, 1, 0, 0], [1, 1, 1, 1, 1], [0, 0, 0, 0, 0]], + ['a', 'b', 'a', 'c'] + ); + + self::assertEquals('a', $network->predict([1, 0, 0, 0, 0])); + self::assertEquals('b', $network->predict([0, 1, 1, 0, 0])); + self::assertEquals('a', $network->predict([1, 1, 1, 1, 1])); + self::assertEquals('c', $network->predict([0, 0, 0, 0, 0])); + } + + public function testBackpropagationLearningMulticlass(): void + { + // Multi-layer more than 2 classes. + $network = new MLPClassifier(5, [3, 2], ['a', 'b', 4], 1000); + $network->train( + [[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [1, 1, 1, 1, 1], [0, 0, 0, 0, 0]], + ['a', 'b', 'a', 'a', 4] + ); + + self::assertEquals('a', $network->predict([1, 0, 0, 0, 0])); + self::assertEquals('b', $network->predict([0, 1, 0, 0, 0])); + self::assertEquals('a', $network->predict([0, 0, 1, 1, 0])); + self::assertEquals('a', $network->predict([1, 1, 1, 1, 1])); + self::assertEquals(4, $network->predict([0, 0, 0, 0, 0])); + } + + /** + * @dataProvider activationFunctionsProvider + */ + public function testBackpropagationActivationFunctions(ActivationFunction $activationFunction): void + { + $network = new MLPClassifier(5, [3], ['a', 'b'], 1000, $activationFunction); + $network->train( + [[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 1, 0], [1, 1, 1, 1, 1]], + ['a', 'b', 'a', 'a'] + ); + + self::assertEquals('a', $network->predict([1, 0, 0, 0, 0])); + self::assertEquals('b', $network->predict([0, 1, 0, 0, 0])); + self::assertEquals('a', $network->predict([0, 0, 1, 1, 0])); + self::assertEquals('a', $network->predict([1, 1, 1, 1, 1])); + } + + public function activationFunctionsProvider(): array + { + return [ + [new Sigmoid()], + [new HyperbolicTangent()], + [new PReLU()], + [new ThresholdedReLU()], + ]; + } + + public function testSaveAndRestore(): void + { + // Instantinate new Percetron trained for OR problem + $samples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $targets = [0, 1, 1, 1]; + $classifier = new MLPClassifier(2, [2], [0, 1], 1000); + $classifier->train($samples, $targets); + $testSamples = [[0, 0], [1, 0], [0, 1], [1, 1]]; + $predicted = $classifier->predict($testSamples); + + $filename = 'perceptron-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } + + public function testSaveAndRestoreWithPartialTraining(): void + { + $network = new MLPClassifier(2, [2], ['a', 'b'], 1000); + $network->partialTrain( + [[1, 0], [0, 1]], + ['a', 'b'] + ); + + self::assertEquals('a', $network->predict([1, 0])); + self::assertEquals('b', $network->predict([0, 1])); + + $filename = 'perceptron-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($network, $filepath); + + /** @var MLPClassifier $restoredNetwork */ + $restoredNetwork = $modelManager->restoreFromFile($filepath); + $restoredNetwork->partialTrain( + [[1, 1], [0, 0]], + ['a', 'b'] + ); + + self::assertEquals('a', $restoredNetwork->predict([1, 0])); + self::assertEquals('b', $restoredNetwork->predict([0, 1])); + self::assertEquals('a', $restoredNetwork->predict([1, 1])); + self::assertEquals('b', $restoredNetwork->predict([0, 0])); + } + + public function testThrowExceptionOnInvalidLayersNumber(): void + { + $this->expectException(InvalidArgumentException::class); + new MLPClassifier(2, [], [0, 1]); + } + + public function testThrowExceptionOnInvalidPartialTrainingClasses(): void + { + $this->expectException(InvalidArgumentException::class); + $classifier = new MLPClassifier(2, [2], [0, 1]); + $classifier->partialTrain( + [[0, 1], [1, 0]], + [0, 2], + [0, 1, 2] + ); + } + + public function testThrowExceptionOnInvalidClassesNumber(): void + { + $this->expectException(InvalidArgumentException::class); + new MLPClassifier(2, [2], [0]); + } + + public function testOutputWithLabels(): void + { + $output = (new MLPClassifier(2, [2, 2], ['T', 'F']))->getOutput(); + + self::assertEquals(['T', 'F'], array_keys($output)); + } + + private function getSynapsesNodes(array $synapses): array + { + $nodes = []; + foreach ($synapses as $synapse) { + $nodes[] = $synapse->getNode(); + } + + return $nodes; + } +} diff --git a/tests/Classification/NaiveBayesTest.php b/tests/Classification/NaiveBayesTest.php new file mode 100644 index 0000000..076a70d --- /dev/null +++ b/tests/Classification/NaiveBayesTest.php @@ -0,0 +1,144 @@ +train($samples, $labels); + + self::assertEquals('a', $classifier->predict([3, 1, 1])); + self::assertEquals('b', $classifier->predict([1, 4, 1])); + self::assertEquals('c', $classifier->predict([1, 1, 6])); + } + + public function testPredictArrayOfSamples(): void + { + $trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; + $trainLabels = ['a', 'b', 'c']; + + $testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]]; + $testLabels = ['a', 'a', 'c', 'c', 'b', 'b', 'a', 'a']; + + $classifier = new NaiveBayes(); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + self::assertEquals($testLabels, $predicted); + + // Feed an extra set of training data. + $samples = [[1, 1, 6]]; + $labels = ['d']; + $classifier->train($samples, $labels); + + $testSamples = [[1, 1, 6], [5, 1, 1]]; + $testLabels = ['d', 'a']; + self::assertEquals($testLabels, $classifier->predict($testSamples)); + } + + public function testSaveAndRestore(): void + { + $trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; + $trainLabels = ['a', 'b', 'c']; + + $testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8]]; + + $classifier = new NaiveBayes(); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + $filename = 'naive-bayes-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } + + public function testPredictSimpleNumericLabels(): void + { + $samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; + $labels = ['1996', '1997', '1998']; + + $classifier = new NaiveBayes(); + $classifier->train($samples, $labels); + + self::assertEquals('1996', $classifier->predict([3, 1, 1])); + self::assertEquals('1997', $classifier->predict([1, 4, 1])); + self::assertEquals('1998', $classifier->predict([1, 1, 6])); + } + + public function testPredictArrayOfSamplesNumericalLabels(): void + { + $trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; + $trainLabels = ['1996', '1997', '1998']; + + $testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]]; + $testLabels = ['1996', '1996', '1998', '1998', '1997', '1997', '1996', '1996']; + + $classifier = new NaiveBayes(); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + self::assertEquals($testLabels, $predicted); + + // Feed an extra set of training data. + $samples = [[1, 1, 6]]; + $labels = ['1999']; + $classifier->train($samples, $labels); + + $testSamples = [[1, 1, 6], [5, 1, 1]]; + $testLabels = ['1999', '1996']; + self::assertEquals($testLabels, $classifier->predict($testSamples)); + } + + public function testSaveAndRestoreNumericLabels(): void + { + $trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; + $trainLabels = ['1996', '1997', '1998']; + + $testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8]]; + + $classifier = new NaiveBayes(); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + $filename = 'naive-bayes-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + } + + public function testInconsistentFeaturesInSamples(): void + { + $trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; + $trainLabels = ['1996', '1997', '1998']; + + $testSamples = [[3, 1, 1], [5, 1], [4, 3, 8]]; + + $classifier = new NaiveBayes(); + $classifier->train($trainSamples, $trainLabels); + + $this->expectException(InvalidArgumentException::class); + + $classifier->predict($testSamples); + } +} diff --git a/tests/Classification/SVCTest.php b/tests/Classification/SVCTest.php new file mode 100644 index 0000000..5fbcff8 --- /dev/null +++ b/tests/Classification/SVCTest.php @@ -0,0 +1,88 @@ +train($samples, $labels); + + self::assertEquals('b', $classifier->predict([3, 2])); + self::assertEquals('b', $classifier->predict([5, 1])); + self::assertEquals('b', $classifier->predict([4, 3])); + self::assertEquals('b', $classifier->predict([4, -5])); + + self::assertEquals('a', $classifier->predict([2, 3])); + self::assertEquals('a', $classifier->predict([1, 2])); + self::assertEquals('a', $classifier->predict([1, 5])); + self::assertEquals('a', $classifier->predict([3, 10])); + } + + public function testPredictArrayOfSamplesWithLinearKernel(): void + { + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; + $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a']; + + $classifier = new SVC(Kernel::LINEAR, $cost = 1000); + $classifier->train($trainSamples, $trainLabels); + $predictions = $classifier->predict($testSamples); + + self::assertEquals($testLabels, $predictions); + } + + public function testSaveAndRestore(): void + { + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $testSamples = [[3, 2], [5, 1], [4, 3]]; + $testLabels = ['b', 'b', 'b']; + + $classifier = new SVC(Kernel::LINEAR, $cost = 1000); + $classifier->train($trainSamples, $trainLabels); + $predicted = $classifier->predict($testSamples); + + $filepath = (string) tempnam(sys_get_temp_dir(), uniqid('svc-test', true)); + $modelManager = new ModelManager(); + $modelManager->saveToFile($classifier, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($classifier, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + self::assertEquals($predicted, $testLabels); + } + + public function testWithNonDotDecimalLocale(): void + { + $currentLocale = setlocale(LC_NUMERIC, '0'); + setlocale(LC_NUMERIC, 'pl_PL.utf8'); + + $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $testSamples = [[3, 2], [5, 1], [4, 3]]; + $testLabels = ['b', 'b', 'b']; + + $classifier = new SVC(Kernel::LINEAR, $cost = 1000); + $classifier->train($trainSamples, $trainLabels); + + self::assertEquals($classifier->predict($testSamples), $testLabels); + + setlocale(LC_NUMERIC, (string) $currentLocale); + } +} diff --git a/tests/Clustering/DBSCANTest.php b/tests/Clustering/DBSCANTest.php new file mode 100644 index 0000000..1132203 --- /dev/null +++ b/tests/Clustering/DBSCANTest.php @@ -0,0 +1,96 @@ +cluster($samples)); + + $samples = [[1, 1], [6, 6], [1, -1], [5, 6], [-1, -1], [7, 8], [-1, 1], [7, 7]]; + $clustered = [ + [[1, 1], [1, -1], [-1, -1], [-1, 1]], + [[6, 6], [5, 6], [7, 8], [7, 7]], + ]; + + $dbscan = new DBSCAN($epsilon = 3, $minSamples = 4); + + self::assertEquals($clustered, $dbscan->cluster($samples)); + } + + public function testDBSCANSamplesClusteringAssociative(): void + { + $samples = [ + 'a' => [1, 1], + 'b' => [9, 9], + 'c' => [1, 2], + 'd' => [9, 8], + 'e' => [7, 7], + 'f' => [8, 7], + ]; + $clustered = [ + [ + 'a' => [1, 1], + 'c' => [1, 2], + ], + [ + 'b' => [9, 9], + 'd' => [9, 8], + 'e' => [7, 7], + 'f' => [8, 7], + ], + ]; + + $dbscan = new DBSCAN($epsilon = 3, $minSamples = 2); + + self::assertEquals($clustered, $dbscan->cluster($samples)); + } + + public function testClusterEpsilonSmall(): void + { + $samples = [[0], [1], [2]]; + $clustered = [ + ]; + + $dbscan = new DBSCAN($epsilon = 0.5, $minSamples = 2); + + self::assertEquals($clustered, $dbscan->cluster($samples)); + } + + public function testClusterEpsilonBoundary(): void + { + $samples = [[0], [1], [2]]; + $clustered = [ + ]; + + $dbscan = new DBSCAN($epsilon = 1.0, $minSamples = 2); + + self::assertEquals($clustered, $dbscan->cluster($samples)); + } + + public function testClusterEpsilonLarge(): void + { + $samples = [[0], [1], [2]]; + $clustered = [ + [[0], [1], [2]], + ]; + + $dbscan = new DBSCAN($epsilon = 1.5, $minSamples = 2); + + self::assertEquals($clustered, $dbscan->cluster($samples)); + } +} diff --git a/tests/Clustering/FuzzyCMeansTest.php b/tests/Clustering/FuzzyCMeansTest.php new file mode 100644 index 0000000..638e99c --- /dev/null +++ b/tests/Clustering/FuzzyCMeansTest.php @@ -0,0 +1,64 @@ +cluster($samples); + self::assertCount(2, $clusters); + foreach ($samples as $index => $sample) { + if (in_array($sample, $clusters[0], true) || in_array($sample, $clusters[1], true)) { + unset($samples[$index]); + } + } + + self::assertCount(0, $samples); + } + + public function testMembershipMatrix(): void + { + $fcm = new FuzzyCMeans(2); + $fcm->cluster([[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]]); + + $clusterCount = 2; + $sampleCount = 6; + $matrix = $fcm->getMembershipMatrix(); + self::assertCount($clusterCount, $matrix); + foreach ($matrix as $row) { + self::assertCount($sampleCount, $row); + } + + // Transpose of the matrix + array_unshift($matrix, null); + $matrix = array_map(...$matrix); + // All column totals should be equal to 1 (100% membership) + foreach ($matrix as $col) { + self::assertEquals(1, array_sum($col)); + } + } + + /** + * @dataProvider invalidClusterNumberProvider + */ + public function testInvalidClusterNumber(int $clusters): void + { + $this->expectException(InvalidArgumentException::class); + + new FuzzyCMeans($clusters); + } + + public function invalidClusterNumberProvider(): array + { + return [[0], [-1]]; + } +} diff --git a/tests/Clustering/KMeans/ClusterTest.php b/tests/Clustering/KMeans/ClusterTest.php new file mode 100644 index 0000000..2b57d0b --- /dev/null +++ b/tests/Clustering/KMeans/ClusterTest.php @@ -0,0 +1,49 @@ +expectException(LogicException::class); + $this->expectExceptionMessage('Cannot attach a cluster to another'); + + $cluster = new Cluster(new Space(1), []); + $cluster->attach(clone $cluster); + } + + public function testToArray(): void + { + $cluster = new Cluster(new Space(2), [1, 2]); + $cluster->attach(new Point([1, 1])); + + self::assertSame([ + 'centroid' => [1, 2], + 'points' => [ + [1, 1], + ], + ], $cluster->toArray()); + } + + public function testDetach(): void + { + $cluster = new Cluster(new Space(2), []); + $cluster->attach(new Point([1, 2])); + $cluster->attach($point = new Point([1, 1])); + + $detachedPoint = $cluster->detach($point); + + self::assertSame($detachedPoint, $point); + self::assertNotContains($point, $cluster->getPoints()); + self::assertCount(1, $cluster); + } +} diff --git a/tests/Clustering/KMeansTest.php b/tests/Clustering/KMeansTest.php new file mode 100644 index 0000000..0265f7d --- /dev/null +++ b/tests/Clustering/KMeansTest.php @@ -0,0 +1,86 @@ +cluster($samples); + + self::assertCount(2, $clusters); + + foreach ($samples as $index => $sample) { + if (in_array($sample, $clusters[0], true) || in_array($sample, $clusters[1], true)) { + unset($samples[$index]); + } + } + + self::assertCount(0, $samples); + } + + public function testKMeansSamplesLabeledClustering(): void + { + $samples = [ + '555' => [1, 1], + '666' => [8, 7], + 'ABC' => [1, 2], + 'DEF' => [7, 8], + 668 => [2, 1], + [8, 9], + ]; + + $kmeans = new KMeans(2); + $clusters = $kmeans->cluster($samples); + + self::assertCount(2, $clusters); + + foreach ($samples as $index => $sample) { + if (in_array($sample, $clusters[0], true) || in_array($sample, $clusters[1], true)) { + self::assertArrayHasKey($index, $clusters[0] + $clusters[1]); + unset($samples[$index]); + } + } + + self::assertCount(0, $samples); + } + + public function testKMeansInitializationMethods(): void + { + $samples = [ + [180, 155], [186, 159], [119, 185], [141, 147], [157, 158], + [176, 122], [194, 160], [113, 193], [190, 148], [152, 154], + [162, 146], [188, 144], [185, 124], [163, 114], [151, 140], + [175, 131], [186, 162], [181, 195], [147, 122], [143, 195], + [171, 119], [117, 165], [169, 121], [159, 160], [159, 112], + [115, 122], [149, 193], [156, 135], [118, 120], [139, 159], + [150, 115], [181, 136], [167, 162], [132, 115], [175, 165], + [110, 147], [175, 118], [113, 145], [130, 162], [195, 179], + [164, 111], [192, 114], [194, 149], [139, 113], [160, 168], + [162, 110], [174, 144], [137, 142], [197, 160], [147, 173], + ]; + + $kmeans = new KMeans(4, KMeans::INIT_KMEANS_PLUS_PLUS); + $clusters = $kmeans->cluster($samples); + self::assertCount(4, $clusters); + + $kmeans = new KMeans(4, KMeans::INIT_RANDOM); + $clusters = $kmeans->cluster($samples); + self::assertCount(4, $clusters); + } + + public function testThrowExceptionOnInvalidClusterNumber(): void + { + $this->expectException(InvalidArgumentException::class); + new KMeans(0); + } +} diff --git a/tests/CrossValidation/RandomSplitTest.php b/tests/CrossValidation/RandomSplitTest.php new file mode 100644 index 0000000..88928cc --- /dev/null +++ b/tests/CrossValidation/RandomSplitTest.php @@ -0,0 +1,92 @@ +expectException(InvalidArgumentException::class); + new RandomSplit(new ArrayDataset([], []), 0); + } + + public function testThrowExceptionOnToBigTestSize(): void + { + $this->expectException(InvalidArgumentException::class); + new RandomSplit(new ArrayDataset([], []), 1); + } + + public function testDatasetRandomSplitWithoutSeed(): void + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4]], + $labels = ['a', 'a', 'b', 'b'] + ); + + $randomSplit = new RandomSplit($dataset, 0.5); + + self::assertCount(2, $randomSplit->getTestSamples()); + self::assertCount(2, $randomSplit->getTrainSamples()); + + $randomSplit2 = new RandomSplit($dataset, 0.25); + + self::assertCount(1, $randomSplit2->getTestSamples()); + self::assertCount(3, $randomSplit2->getTrainSamples()); + } + + public function testDatasetRandomSplitWithSameSeed(): void + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], + $labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] + ); + + $seed = 123; + + $randomSplit1 = new RandomSplit($dataset, 0.5, $seed); + $randomSplit2 = new RandomSplit($dataset, 0.5, $seed); + + self::assertEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels()); + self::assertEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples()); + self::assertEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels()); + self::assertEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples()); + } + + public function testDatasetRandomSplitWithDifferentSeed(): void + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], + $labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] + ); + + $randomSplit1 = new RandomSplit($dataset, 0.5, 4321); + $randomSplit2 = new RandomSplit($dataset, 0.5, 1234); + + self::assertNotEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels()); + self::assertNotEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples()); + self::assertNotEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels()); + self::assertNotEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples()); + } + + public function testRandomSplitCorrectSampleAndLabelPosition(): void + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4]], + $labels = [1, 2, 3, 4] + ); + + $randomSplit = new RandomSplit($dataset, 0.5); + + self::assertEquals($randomSplit->getTestSamples()[0][0], $randomSplit->getTestLabels()[0]); + self::assertEquals($randomSplit->getTestSamples()[1][0], $randomSplit->getTestLabels()[1]); + self::assertEquals($randomSplit->getTrainSamples()[0][0], $randomSplit->getTrainLabels()[0]); + self::assertEquals($randomSplit->getTrainSamples()[1][0], $randomSplit->getTrainLabels()[1]); + } +} diff --git a/tests/CrossValidation/StratifiedRandomSplitTest.php b/tests/CrossValidation/StratifiedRandomSplitTest.php new file mode 100644 index 0000000..909f15f --- /dev/null +++ b/tests/CrossValidation/StratifiedRandomSplitTest.php @@ -0,0 +1,63 @@ +countSamplesByTarget($split->getTestLabels(), 'a')); + self::assertEquals(2, $this->countSamplesByTarget($split->getTestLabels(), 'b')); + + $split = new StratifiedRandomSplit($dataset, 0.25); + + self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 'a')); + self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 'b')); + } + + public function testDatasetStratifiedRandomSplitWithEvenDistributionAndNumericTargets(): void + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], + $labels = [1, 2, 1, 2, 1, 2, 1, 2] + ); + + $split = new StratifiedRandomSplit($dataset, 0.5); + + self::assertEquals(2, $this->countSamplesByTarget($split->getTestLabels(), 1)); + self::assertEquals(2, $this->countSamplesByTarget($split->getTestLabels(), 2)); + + $split = new StratifiedRandomSplit($dataset, 0.25); + + self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 1)); + self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 2)); + } + + /** + * @param string|int $countTarget + */ + private function countSamplesByTarget(array $splitTargets, $countTarget): int + { + $count = 0; + foreach ($splitTargets as $target) { + if ($target === $countTarget) { + ++$count; + } + } + + return $count; + } +} diff --git a/tests/Dataset/ArrayDatasetTest.php b/tests/Dataset/ArrayDatasetTest.php new file mode 100644 index 0000000..98792c7 --- /dev/null +++ b/tests/Dataset/ArrayDatasetTest.php @@ -0,0 +1,40 @@ +expectException(InvalidArgumentException::class); + new ArrayDataset([0, 1], [0]); + } + + public function testArrayDataset(): void + { + $dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4]], + $labels = ['a', 'a', 'b', 'b'] + ); + + self::assertEquals($samples, $dataset->getSamples()); + self::assertEquals($labels, $dataset->getTargets()); + } + + public function testRemoveColumns(): void + { + $dataset = new ArrayDataset( + [[1, 2, 3, 4], [2, 3, 4, 5], [3, 4, 5, 6], [4, 5, 6, 7]], + ['a', 'a', 'b', 'b'] + ); + $dataset->removeColumns([0, 2]); + + self::assertEquals([[2, 4], [3, 5], [4, 6], [5, 7]], $dataset->getSamples()); + } +} diff --git a/tests/Dataset/CsvDatasetTest.php b/tests/Dataset/CsvDatasetTest.php new file mode 100644 index 0000000..a178726 --- /dev/null +++ b/tests/Dataset/CsvDatasetTest.php @@ -0,0 +1,48 @@ +expectException(FileException::class); + new CsvDataset('missingFile', 3); + } + + public function testSampleCsvDatasetWithHeaderRow(): void + { + $filePath = dirname(__FILE__).'/Resources/dataset.csv'; + + $dataset = new CsvDataset($filePath, 2, true); + + self::assertCount(10, $dataset->getSamples()); + self::assertCount(10, $dataset->getTargets()); + } + + public function testSampleCsvDatasetWithoutHeaderRow(): void + { + $filePath = dirname(__FILE__).'/Resources/dataset.csv'; + + $dataset = new CsvDataset($filePath, 2, false); + + self::assertCount(11, $dataset->getSamples()); + self::assertCount(11, $dataset->getTargets()); + } + + public function testLongCsvDataset(): void + { + $filePath = dirname(__FILE__).'/Resources/longdataset.csv'; + + $dataset = new CsvDataset($filePath, 1000, false); + + self::assertCount(1000, $dataset->getSamples()[0]); + self::assertEquals('label', $dataset->getTargets()[0]); + } +} diff --git a/tests/Dataset/Demo/GlassDatasetTest.php b/tests/Dataset/Demo/GlassDatasetTest.php new file mode 100644 index 0000000..3ef182a --- /dev/null +++ b/tests/Dataset/Demo/GlassDatasetTest.php @@ -0,0 +1,23 @@ +getSamples()); + self::assertCount(214, $glass->getTargets()); + + // one sample features count + self::assertCount(9, $glass->getSamples()[0]); + } +} diff --git a/tests/Dataset/Demo/IrisDatasetTest.php b/tests/Dataset/Demo/IrisDatasetTest.php new file mode 100644 index 0000000..171bc38 --- /dev/null +++ b/tests/Dataset/Demo/IrisDatasetTest.php @@ -0,0 +1,23 @@ +getSamples()); + self::assertCount(150, $iris->getTargets()); + + // one sample features count + self::assertCount(4, $iris->getSamples()[0]); + } +} diff --git a/tests/Dataset/Demo/WineDatasetTest.php b/tests/Dataset/Demo/WineDatasetTest.php new file mode 100644 index 0000000..0119294 --- /dev/null +++ b/tests/Dataset/Demo/WineDatasetTest.php @@ -0,0 +1,23 @@ +getSamples()); + self::assertCount(178, $wine->getTargets()); + + // one sample features count + self::assertCount(13, $wine->getSamples()[0]); + } +} diff --git a/tests/Dataset/FilesDatasetTest.php b/tests/Dataset/FilesDatasetTest.php new file mode 100644 index 0000000..a7ecd97 --- /dev/null +++ b/tests/Dataset/FilesDatasetTest.php @@ -0,0 +1,43 @@ +expectException(DatasetException::class); + new FilesDataset('some/not/existed/path'); + } + + public function testLoadFilesDatasetWithBBCData(): void + { + $rootPath = dirname(__FILE__).'/Resources/bbc'; + + $dataset = new FilesDataset($rootPath); + + self::assertCount(50, $dataset->getSamples()); + self::assertCount(50, $dataset->getTargets()); + + $targets = ['business', 'entertainment', 'politics', 'sport', 'tech']; + self::assertEquals($targets, array_values(array_unique($dataset->getTargets()))); + + $firstSample = file_get_contents($rootPath.'/business/001.txt'); + self::assertEquals($firstSample, $dataset->getSamples()[0]); + + $firstTarget = 'business'; + self::assertEquals($firstTarget, $dataset->getTargets()[0]); + + $lastSample = file_get_contents($rootPath.'/tech/010.txt'); + self::assertEquals($lastSample, $dataset->getSamples()[49]); + + $lastTarget = 'tech'; + self::assertEquals($lastTarget, $dataset->getTargets()[49]); + } +} diff --git a/tests/Dataset/MnistDatasetTest.php b/tests/Dataset/MnistDatasetTest.php new file mode 100644 index 0000000..5fc7374 --- /dev/null +++ b/tests/Dataset/MnistDatasetTest.php @@ -0,0 +1,33 @@ +getSamples()); + self::assertCount(10, $dataset->getTargets()); + } + + public function testCheckSamplesAndTargetsCountMatch(): void + { + $this->expectException(InvalidArgumentException::class); + + new MnistDataset( + __DIR__.'/Resources/mnist/images-idx-ubyte', + __DIR__.'/Resources/mnist/labels-11-idx-ubyte' + ); + } +} diff --git a/tests/Dataset/Resources/bbc/business/001.txt b/tests/Dataset/Resources/bbc/business/001.txt new file mode 100644 index 0000000..f4e2242 --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/001.txt @@ -0,0 +1,11 @@ +Ad sales boost Time Warner profit + +Quarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (£600m) for the three months to December, from $639m year-earlier. + +The firm, which is now one of the biggest investors in Google, benefited from sales of high-speed internet connections and higher advert sales. TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Its profits were buoyed by one-off gains which offset a profit dip at Warner Bros, and less users for AOL. + +Time Warner said on Friday that it now owns 8% of search-engine Google. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. However, the company said AOL's underlying profit before exceptional items rose 8% on the back of stronger internet advertising revenues. It hopes to increase subscribers by offering the online service free to TimeWarner internet customers and will try to sign up AOL's existing customers for high-speed broadband. TimeWarner also has to restate 2000 and 2003 results following a probe by the US Securities Exchange Commission (SEC), which is close to concluding. + +Time Warner's fourth quarter profits were slightly better than analysts' expectations. But its film division saw profits slump 27% to $284m, helped by box-office flops Alexander and Catwoman, a sharp contrast to year-earlier, when the third and final film in the Lord of the Rings trilogy boosted results. For the full-year, TimeWarner posted a profit of $3.36bn, up 27% from its 2003 performance, while revenues grew 6.4% to $42.09bn. "Our financial performance was strong, meeting or exceeding all of our full-year objectives and greatly enhancing our flexibility," chairman and chief executive Richard Parsons said. For 2005, TimeWarner is projecting operating earnings growth of around 5%, and also expects higher revenue and wider profit margins. + +TimeWarner is to restate its accounts as part of efforts to resolve an inquiry into AOL by US market regulators. It has already offered to pay $300m to settle charges, in a deal that is under review by the SEC. The company said it was unable to estimate the amount it needed to set aside for legal reserves, which it previously set at $500m. It intends to adjust the way it accounts for a deal with German music publisher Bertelsmann's purchase of a stake in AOL Europe, which it had reported as advertising revenue. It will now book the sale of its stake in AOL Europe as a loss on the value of that stake. diff --git a/tests/Dataset/Resources/bbc/business/002.txt b/tests/Dataset/Resources/bbc/business/002.txt new file mode 100644 index 0000000..0aa9c6f --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/002.txt @@ -0,0 +1,7 @@ +Dollar gains on Greenspan speech + +The dollar has hit its highest level against the euro in almost three months after the Federal Reserve head said the US trade deficit is set to stabilise. + +And Alan Greenspan highlighted the US government's willingness to curb spending and rising household savings as factors which may help to reduce it. In late trading in New York, the dollar reached $1.2871 against the euro, from $1.2974 on Thursday. Market concerns about the deficit has hit the greenback in recent months. On Friday, Federal Reserve chairman Mr Greenspan's speech in London ahead of the meeting of G7 finance ministers sent the dollar higher after it had earlier tumbled on the back of worse-than-expected US jobs data. "I think the chairman's taking a much more sanguine view on the current account deficit than he's taken for some time," said Robert Sinche, head of currency strategy at Bank of America in New York. "He's taking a longer-term view, laying out a set of conditions under which the current account deficit can improve this year and next." + +Worries about the deficit concerns about China do, however, remain. China's currency remains pegged to the dollar and the US currency's sharp falls in recent months have therefore made Chinese export prices highly competitive. But calls for a shift in Beijing's policy have fallen on deaf ears, despite recent comments in a major Chinese newspaper that the "time is ripe" for a loosening of the peg. The G7 meeting is thought unlikely to produce any meaningful movement in Chinese policy. In the meantime, the US Federal Reserve's decision on 2 February to boost interest rates by a quarter of a point - the sixth such move in as many months - has opened up a differential with European rates. The half-point window, some believe, could be enough to keep US assets looking more attractive, and could help prop up the dollar. The recent falls have partly been the result of big budget deficits, as well as the US's yawning current account gap, both of which need to be funded by the buying of US bonds and assets by foreign firms and governments. The White House will announce its budget on Monday, and many commentators believe the deficit will remain at close to half a trillion dollars. diff --git a/tests/Dataset/Resources/bbc/business/003.txt b/tests/Dataset/Resources/bbc/business/003.txt new file mode 100644 index 0000000..dd69655 --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/003.txt @@ -0,0 +1,7 @@ +Yukos unit buyer faces loan claim + +The owners of embattled Russian oil giant Yukos are to ask the buyer of its former production unit to pay back a $900m (£479m) loan. + +State-owned Rosneft bought the Yugansk unit for $9.3bn in a sale forced by Russia to part settle a $27.5bn tax claim against Yukos. Yukos' owner Menatep Group says it will ask Rosneft to repay a loan that Yugansk had secured on its assets. Rosneft already faces a similar $540m repayment demand from foreign banks. Legal experts said Rosneft's purchase of Yugansk would include such obligations. "The pledged assets are with Rosneft, so it will have to pay real money to the creditors to avoid seizure of Yugansk assets," said Moscow-based US lawyer Jamie Firestone, who is not connected to the case. Menatep Group's managing director Tim Osborne told the Reuters news agency: "If they default, we will fight them where the rule of law exists under the international arbitration clauses of the credit." + +Rosneft officials were unavailable for comment. But the company has said it intends to take action against Menatep to recover some of the tax claims and debts owed by Yugansk. Yukos had filed for bankruptcy protection in a US court in an attempt to prevent the forced sale of its main production arm. The sale went ahead in December and Yugansk was sold to a little-known shell company which in turn was bought by Rosneft. Yukos claims its downfall was punishment for the political ambitions of its founder Mikhail Khodorkovsky and has vowed to sue any participant in the sale. diff --git a/tests/Dataset/Resources/bbc/business/004.txt b/tests/Dataset/Resources/bbc/business/004.txt new file mode 100644 index 0000000..f03a2c1 --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/004.txt @@ -0,0 +1,11 @@ +High fuel prices hit BA's profits + +British Airways has blamed high fuel prices for a 40% drop in profits. + +Reporting its results for the three months to 31 December 2004, the airline made a pre-tax profit of £75m ($141m) compared with £125m a year earlier. Rod Eddington, BA's chief executive, said the results were "respectable" in a third quarter when fuel costs rose by £106m or 47.3%. BA's profits were still better than market expectation of £59m, and it expects a rise in full-year revenues. + +To help offset the increased price of aviation fuel, BA last year introduced a fuel surcharge for passengers. + +In October, it increased this from £6 to £10 one-way for all long-haul flights, while the short-haul surcharge was raised from £2.50 to £4 a leg. Yet aviation analyst Mike Powell of Dresdner Kleinwort Wasserstein says BA's estimated annual surcharge revenues - £160m - will still be way short of its additional fuel costs - a predicted extra £250m. Turnover for the quarter was up 4.3% to £1.97bn, further benefiting from a rise in cargo revenue. Looking ahead to its full year results to March 2005, BA warned that yields - average revenues per passenger - were expected to decline as it continues to lower prices in the face of competition from low-cost carriers. However, it said sales would be better than previously forecast. "For the year to March 2005, the total revenue outlook is slightly better than previous guidance with a 3% to 3.5% improvement anticipated," BA chairman Martin Broughton said. BA had previously forecast a 2% to 3% rise in full-year revenue. + +It also reported on Friday that passenger numbers rose 8.1% in January. Aviation analyst Nick Van den Brul of BNP Paribas described BA's latest quarterly results as "pretty modest". "It is quite good on the revenue side and it shows the impact of fuel surcharges and a positive cargo development, however, operating margins down and cost impact of fuel are very strong," he said. Since the 11 September 2001 attacks in the United States, BA has cut 13,000 jobs as part of a major cost-cutting drive. "Our focus remains on reducing controllable costs and debt whilst continuing to invest in our products," Mr Eddington said. "For example, we have taken delivery of six Airbus A321 aircraft and next month we will start further improvements to our Club World flat beds." BA's shares closed up four pence at 274.5 pence. diff --git a/tests/Dataset/Resources/bbc/business/005.txt b/tests/Dataset/Resources/bbc/business/005.txt new file mode 100644 index 0000000..ac7bf0b --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/005.txt @@ -0,0 +1,7 @@ +Pernod takeover talk lifts Domecq + +Shares in UK drinks and food firm Allied Domecq have risen on speculation that it could be the target of a takeover by France's Pernod Ricard. + +Reports in the Wall Street Journal and the Financial Times suggested that the French spirits firm is considering a bid, but has yet to contact its target. Allied Domecq shares in London rose 4% by 1200 GMT, while Pernod shares in Paris slipped 1.2%. Pernod said it was seeking acquisitions but refused to comment on specifics. + +Pernod's last major purchase was a third of US giant Seagram in 2000, the move which propelled it into the global top three of drinks firms. The other two-thirds of Seagram was bought by market leader Diageo. In terms of market value, Pernod - at 7.5bn euros ($9.7bn) - is about 9% smaller than Allied Domecq, which has a capitalisation of £5.7bn ($10.7bn; 8.2bn euros). Last year Pernod tried to buy Glenmorangie, one of Scotland's premier whisky firms, but lost out to luxury goods firm LVMH. Pernod is home to brands including Chivas Regal Scotch whisky, Havana Club rum and Jacob's Creek wine. Allied Domecq's big names include Malibu rum, Courvoisier brandy, Stolichnaya vodka and Ballantine's whisky - as well as snack food chains such as Dunkin' Donuts and Baskin-Robbins ice cream. The WSJ said that the two were ripe for consolidation, having each dealt with problematic parts of their portfolio. Pernod has reduced the debt it took on to fund the Seagram purchase to just 1.8bn euros, while Allied has improved the performance of its fast-food chains. diff --git a/tests/Dataset/Resources/bbc/business/006.txt b/tests/Dataset/Resources/bbc/business/006.txt new file mode 100644 index 0000000..fa78492 --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/006.txt @@ -0,0 +1,7 @@ +Japan narrowly escapes recession + +Japan's economy teetered on the brink of a technical recession in the three months to September, figures show. + +Revised figures indicated growth of just 0.1% - and a similar-sized contraction in the previous quarter. On an annual basis, the data suggests annual growth of just 0.2%, suggesting a much more hesitant recovery than had previously been thought. A common technical definition of a recession is two successive quarters of negative growth. + +The government was keen to play down the worrying implications of the data. "I maintain the view that Japan's economy remains in a minor adjustment phase in an upward climb, and we will monitor developments carefully," said economy minister Heizo Takenaka. But in the face of the strengthening yen making exports less competitive and indications of weakening economic conditions ahead, observers were less sanguine. "It's painting a picture of a recovery... much patchier than previously thought," said Paul Sheard, economist at Lehman Brothers in Tokyo. Improvements in the job market apparently have yet to feed through to domestic demand, with private consumption up just 0.2% in the third quarter. diff --git a/tests/Dataset/Resources/bbc/business/007.txt b/tests/Dataset/Resources/bbc/business/007.txt new file mode 100644 index 0000000..4147eb0 --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/007.txt @@ -0,0 +1,9 @@ +Jobs growth still slow in the US + +The US created fewer jobs than expected in January, but a fall in jobseekers pushed the unemployment rate to its lowest level in three years. + +According to Labor Department figures, US firms added only 146,000 jobs in January. The gain in non-farm payrolls was below market expectations of 190,000 new jobs. Nevertheless it was enough to push down the unemployment rate to 5.2%, its lowest level since September 2001. The job gains mean that President Bush can celebrate - albeit by a very fine margin - a net growth in jobs in the US economy in his first term in office. He presided over a net fall in jobs up to last November's Presidential election - the first President to do so since Herbert Hoover. As a result, job creation became a key issue in last year's election. However, when adding December and January's figures, the administration's first term jobs record ended in positive territory. + +The Labor Department also said it had revised down the jobs gains in December 2004, from 157,000 to 133,000. + +Analysts said the growth in new jobs was not as strong as could be expected given the favourable economic conditions. "It suggests that employment is continuing to expand at a moderate pace," said Rick Egelton, deputy chief economist at BMO Financial Group. "We are not getting the boost to employment that we would have got given the low value of the dollar and the still relatively low interest rate environment." "The economy is producing a moderate but not a satisfying amount of job growth," said Ken Mayland, president of ClearView Economics. "That means there are a limited number of new opportunities for workers." diff --git a/tests/Dataset/Resources/bbc/business/008.txt b/tests/Dataset/Resources/bbc/business/008.txt new file mode 100644 index 0000000..6657036 --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/008.txt @@ -0,0 +1,7 @@ +India calls for fair trade rules + +India, which attends the G7 meeting of seven leading industrialised nations on Friday, is unlikely to be cowed by its newcomer status. + +In London on Thursday ahead of the meeting, India's finance minister, lashed out at the restrictive trade policies of the G7 nations. He objected to subsidies on agriculture that make it hard for developing nations like India to compete. He also called for reform of the United Nations, the World Bank and the IMF. + +Palaniappan Chidambaram, India's finance minister, argued that these organisations need to take into account the changing world order, given India and China's integration into the global economy. He said the issue is not globalisation but "the terms of engagement in globalisation." Mr Chidambaram is attending the G7 meeting as part of the G20 group of nations, which account for two thirds of the world's population. At a conference on developing enterprise hosted by UK finance minister Gordon Brown on Friday, he said that he was in favour of floating exchange rates because they help countries cope with economic shocks. "A flexible exchange rate is one more channel for absorbing both positive and negative shocks," he told the conference. India, along with China, Brazil, South Africa and Russia, has been invited to take part in the G7 meeting taking place in London on Friday and Saturday. China is expected to face renewed pressure to abandon its fixed exchange rate, which G7 nations, in particular the US, have blamed for a surge in cheap Chinese exports. "Some countries have tried to use fixed exchange rates. I do not wish to make any judgements," Mr Chidambaram said. Separately, the IMF warned on Thursday that India's budget deficit was too large and would hamper the country's economic growth, which it forecast to be around 6.5% in the year to March 2005. In the year to March 2004, the Indian economy grew by 8.5%. diff --git a/tests/Dataset/Resources/bbc/business/009.txt b/tests/Dataset/Resources/bbc/business/009.txt new file mode 100644 index 0000000..7345c8d --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/009.txt @@ -0,0 +1,9 @@ +Ethiopia's crop production up 24% + +Ethiopia produced 14.27 million tonnes of crops in 2004, 24% higher than in 2003 and 21% more than the average of the past five years, a report says. + +In 2003, crop production totalled 11.49 million tonnes, the joint report from the Food and Agriculture Organisation and the World Food Programme said. Good rains, increased use of fertilizers and improved seeds contributed to the rise in production. Nevertheless, 2.2 million Ethiopians will still need emergency assistance. + +The report calculated emergency food requirements for 2005 to be 387,500 tonnes. On top of that, 89,000 tonnes of fortified blended food and vegetable oil for "targeted supplementary food distributions for a survival programme for children under five and pregnant and lactating women" will be needed. + +In eastern and southern Ethiopia, a prolonged drought has killed crops and drained wells. Last year, a total of 965,000 tonnes of food assistance was needed to help seven million Ethiopians. The Food and Agriculture Organisation (FAO) recommend that the food assistance is bought locally. "Local purchase of cereals for food assistance programmes is recommended as far as possible, so as to assist domestic markets and farmers," said Henri Josserand, chief of FAO's Global Information and Early Warning System. Agriculture is the main economic activity in Ethiopia, representing 45% of gross domestic product. About 80% of Ethiopians depend directly or indirectly on agriculture. diff --git a/tests/Dataset/Resources/bbc/business/010.txt b/tests/Dataset/Resources/bbc/business/010.txt new file mode 100644 index 0000000..078b6e9 --- /dev/null +++ b/tests/Dataset/Resources/bbc/business/010.txt @@ -0,0 +1,7 @@ +Court rejects $280bn tobacco case + +A US government claim accusing the country's biggest tobacco companies of covering up the effects of smoking has been thrown out by an appeal court. + +The demand for $280bn (£155bn) - filed by the Clinton administration in 1999 - was rejected in a 2-1 decision. The court in Washington found that the case could not be brought under federal anti-racketeering laws. Among the accused were Altria Group, RJ Reynolds Tobacco, Lorillard Tobacco, Liggett Group and Brown and Williamson. In its case, the government claimed tobacco firms manipulated nicotine levels to increase addiction, targeted teenagers with multi-billion dollar advertising campaigns, lied about the dangers of smoking and ignored research to the contrary. + +Prosecutors wanted the cigarette firms to surrender $280bn in profits accumulated over the past 50 years and impose tougher rules on marketing their products. But the Court of Appeals for the District of Columbia ruled that the US government could not sue the firms under legislation drawn up to counteract Mafia infiltration of business. The tobacco companies deny that they illegally conspired to promote smoking and defraud the public. They also say they have already met many of the government's demands in a landmark $206bn settlement reached with 46 states in 1998. Shares of tobacco companies closed higher after the ruling, with Altria rising 5% and Reynolds showing gains of 4.5%. diff --git a/tests/Dataset/Resources/bbc/entertainment/001.txt b/tests/Dataset/Resources/bbc/entertainment/001.txt new file mode 100644 index 0000000..aa8cee0 --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/001.txt @@ -0,0 +1,7 @@ +Gallery unveils interactive tree + +A Christmas tree that can receive text messages has been unveiled at London's Tate Britain art gallery. + +The spruce has an antenna which can receive Bluetooth texts sent by visitors to the Tate. The messages will be "unwrapped" by sculptor Richard Wentworth, who is responsible for decorating the tree with broken plates and light bulbs. It is the 17th year that the gallery has invited an artist to dress their Christmas tree. Artists who have decorated the Tate tree in previous years include Tracey Emin in 2002. + +The plain green Norway spruce is displayed in the gallery's foyer. Its light bulb adornments are dimmed, ordinary domestic ones joined together with string. The plates decorating the branches will be auctioned off for the children's charity ArtWorks. Wentworth worked as an assistant to sculptor Henry Moore in the late 1960s. His reputation as a sculptor grew in the 1980s, while he has been one of the most influential teachers during the last two decades. Wentworth is also known for his photography of mundane, everyday subjects such as a cigarette packet jammed under the wonky leg of a table. diff --git a/tests/Dataset/Resources/bbc/entertainment/002.txt b/tests/Dataset/Resources/bbc/entertainment/002.txt new file mode 100644 index 0000000..b79825f --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/002.txt @@ -0,0 +1,7 @@ +Jarre joins fairytale celebration + +French musician Jean-Michel Jarre is to perform at a concert in Copenhagen to mark the bicentennial of the birth of writer Hans Christian Andersen. + +Denmark is holding a three-day celebration of the life of the fairy-tale author, with a concert at Parken stadium on 2 April. Other stars are expected to join the line-up in the coming months, and the Danish royal family will attend. "Christian Andersen's fairy tales are timeless and universal," said Jarre. "For all of us, at any age there is always - beyond the pure enjoyment of the tale - a message to learn." There are year-long celebrations planned across the world to celebrate Andersen and his work, which includes The Emperor's New Clothes and The Little Mermaid. Denmark's Crown Prince Frederik and Crown Princess Mary visited New York on Monday to help promote the festivities. The pair were at a Manhattan library to honour US literary critic Harold Bloom "the international icon we thought we knew so well". + +"Bloom recognizes the darker aspects of Andersen's authorship," Prince Frederik said. Bloom is to be formally presented with the Hans Christian Andersen Award this spring in Anderson's hometown of Odense. The royal couple also visited the Hans Christian Anderson School complex, where Queen Mary read The Ugly Duckling to the young audience. Later at a gala dinner, Danish supermodel Helena Christensen was named a Hans Christian Andersen ambassador. Other ambassadors include actors Harvey Keitel and Sir Roger Moore, athlete Cathy Freeman and Brazilian soccer legend Pele. diff --git a/tests/Dataset/Resources/bbc/entertainment/003.txt b/tests/Dataset/Resources/bbc/entertainment/003.txt new file mode 100644 index 0000000..92bb95a --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/003.txt @@ -0,0 +1,7 @@ +Musical treatment for Capra film + +The classic film It's A Wonderful Life is to be turned into a musical by the producer of the controversial hit show Jerry Springer - The Opera. + +Frank Capra's 1946 movie starring James Stewart, is being turned into a £7m musical by producer Jon Thoday. He is working with Steve Brown, who wrote the award-winning musical Spend Spend Spend. A spokeswoman said the plans were in the "very early stages", with no cast, opening date or theatre announced. + +A series of workshops have been held in London, and on Wednesday a cast of singers unveiled the musical to a select group of potential investors. Mr Thoday said the idea of turning the film into a musical had been an ambition of his for almost 20 years. It's a Wonderful Life was based on a short story, The Greatest Gift, by Philip van Doren Stern. Mr Thoday managed to buy the rights to the story from Van Doren Stern's family in 1999, following Mr Brown's success with Spend Spend Spend. He later secured the film rights from Paramount, enabling them to use the title It's A Wonderful Life. diff --git a/tests/Dataset/Resources/bbc/entertainment/004.txt b/tests/Dataset/Resources/bbc/entertainment/004.txt new file mode 100644 index 0000000..8a4b657 --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/004.txt @@ -0,0 +1,7 @@ +Richard and Judy choose top books + +The 10 authors shortlisted for a Richard and Judy book award in 2005 are hoping for a boost in sales following the success of this year's winner. + +The TV couple's interest in the book world coined the term "the Richard & Judy effect" and created the top two best-selling paperbacks of 2004 so far. The finalists for 2005 include Andrew Taylor's The American Boy and Robbie Williams' autobiography Feel. This year's winner, Alice Sebold's The Lovely Bones, sold over one million. Joseph O'Connor's Star of the Sea came second and saw sales increase by 350%. The best read award, on Richard Madeley and Judy Finnigan's Channel 4 show, is part of the British Book Awards. David Mitchell's Booker-shortlisted novel, Cloud Atlas, makes it into this year's top 10 along with several lesser known works. + +"There's no doubt that this year's selection of book club entries is the best yet. If anything, the choice is even wider than last time," said Madeley. "It was very hard to follow last year's extremely successful list, but we think this year's books will do even better," said Richard and Judy executive producer Amanda Ross. "We were spoiled for choice and it was tough getting down to only 10 from the 301 submitted." diff --git a/tests/Dataset/Resources/bbc/entertainment/005.txt b/tests/Dataset/Resources/bbc/entertainment/005.txt new file mode 100644 index 0000000..e7bc04e --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/005.txt @@ -0,0 +1,7 @@ +Poppins musical gets flying start + +The stage adaptation of children's film Mary Poppins has had its opening night in London's West End. + +Sir Cameron Mackintosh's lavish production, which has cost £9m to bring to the stage, was given a 10-minute standing ovation. Lead actress Laura Michelle Kelly soared over the heads of the audience holding the nanny's trademark umbrella. Technical hitches had prevented Mary Poppins' flight into the auditorium during preview performances. A number of celebrities turned out for the musical's premiere, including actress Barbara Windsor, comic Graham Norton and Sir Richard Attenborough. + +The show's director Richard Eyre issued a warning earlier in the week that the show was unsuitable for children under seven, while under-threes are barred. Mary Poppins was originally created by author Pamela Travers, who is said to have cried when she saw Disney's 1964 film starring Julie Andrews. Travers had intended the story to be a lot darker than the perennial family favourite. Theatre impresario Sir Cameron Mackintosh has said he hopes the musical is a blend of the sweet-natured film and the original book. diff --git a/tests/Dataset/Resources/bbc/entertainment/006.txt b/tests/Dataset/Resources/bbc/entertainment/006.txt new file mode 100644 index 0000000..03dd264 --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/006.txt @@ -0,0 +1,9 @@ +Bennett play takes theatre prizes + +The History Boys by Alan Bennett has been named best new play in the Critics' Circle Theatre Awards. + +Set in a grammar school, the play also earned a best actor prize for star Richard Griffiths as teacher Hector. The Producers was named best musical, Victoria Hamilton was best actress for Suddenly Last Summer and Festen's Rufus Norris was named best director. The History Boys also won the best new comedy title at the Theatregoers' Choice Awards. + +Partly based upon Alan Bennett's experience as a teacher, The History Boys has been at London's National Theatre since last May. The Critics' Circle named Rebecca Lenkiewicz its most promising playwright for The Night Season, and Eddie Redmayne most promising newcomer for The Goat or, Who is Sylvia? + +Paul Rhys was its best Shakespearean performer for Measure for Measure at the National Theatre and Christopher Oram won the design award for Suddenly Last Summer. Both the Critics' Circle and Whatsonstage.com Theatregoers' Choice award winners were announced on Tuesday. Chosen by more than 11,000 theatre fans, the Theatregoers' Choice Awards named US actor Christian Slater best actor for One Flew Over the Cuckoo's Nest. Diana Rigg was best actress for Suddenly Last Summer, Dame Judi Dench was best supporting actress for the RSC's All's Well That Ends Well and The History Boys' Samuel Barnett was best supporting actor. diff --git a/tests/Dataset/Resources/bbc/entertainment/007.txt b/tests/Dataset/Resources/bbc/entertainment/007.txt new file mode 100644 index 0000000..64efbe1 --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/007.txt @@ -0,0 +1,9 @@ +Levy tipped for Whitbread prize + +Novelist Andrea Levy is favourite to win the main Whitbread Prize book of the year award, after winning novel of the year with her book Small Island. + +The book has already won the Orange Prize for fiction, and is now 5/4 favourite for the £25,000 Whitbread. Second favourite is a biography of Mary Queen of Scots, by John Guy. A panel of judges including Sir Trevor McDonald, actor Hugh Grant and writer Joanne Harris will decide the overall winner on Tuesday. + +The five writers in line for the award won their respective categories - first novel, novel, biography, poetry and children's book - on 6 January. Small Island, Levy's fourth novel, is set in post-war London and centres on a landlady and her lodgers. One is a Jamaican who joined British troops to fight Hitler but finds life difficult out of uniform when he settles in the UK. "What could have been a didactic or preachy prospect turns out to hilarious, moving humane and eye-popping. It's hard to think of anybody not enjoying it," wrote the judges. The judges called Guy's My Heart is My Own: The Life of Mary Queen of Scots "an impressive and readable piece of scholarship, which cannot fail but leave the reader moved and intrigued by this most tragic and likeable of queens". Guy has published many histories, including one of Tudor England. He is a fellow at Clare College, Cambridge and became a honorary research professor of the University of St Andrews in 2003. + +The other contenders include Susan Fletcher for Eve Green, which won the first novel prize. Fletcher has recently graduated from the University of East Anglia's creative writing course. The fourth book in the running is Corpus, Michael Symmons Roberts' fourth collection of poems. As well as writing poetry, Symmons Roberts also makes documentary films. Geraldine McCaughrean is the final contender, having won the children's fiction category for the third time for Not the End of the World. McCaughrean, who went into magazine publishing after studying teaching, previously won the category in 1987 with A Little Lower than Angels and in 1994 with Gold Dust. diff --git a/tests/Dataset/Resources/bbc/entertainment/008.txt b/tests/Dataset/Resources/bbc/entertainment/008.txt new file mode 100644 index 0000000..c1dcf1b --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/008.txt @@ -0,0 +1,9 @@ +West End to honour finest shows + +The West End is honouring its finest stars and shows at the Evening Standard Theatre Awards in London on Monday. + +The Producers, starring Nathan Lane and Lee Evans, is up for best musical at the ceremony at the National Theatre. It is competing against Sweeney Todd and A Funny Thing Happened on the Way to the Forum for the award. The Goat or Who is Sylvia? by Edward Albee, The Pillowman by Martin McDonagh and Alan Bennett's The History Boys are shortlisted in the best play category. + +Pam Ferris, Victoria Hamilton and Kelly Reilly are nominated for best actress. Ferris - best known for her television roles in programmes such as The Darling Buds of May - has made the shortlist for her role in Notes on Falling Leaves, at the Royal Court Theatre. Meanwhile, Richard Griffiths, who plays Hector in The History Boys at the National Theatre, will battle it out for the best actor award with Douglas Hodge (Dumb Show) and Stanley Townsend (Shining City). The best director shortlist includes Luc Bondy for Cruel and Tender, Simon McBurney for Measure for Measure, and Rufus Norris for Festen. + +Festen is also shortlisted in the best designer category where Ian MacNeil, Jean Kalman and Paul Arditti will be up against Hildegard Bechtler, for Iphigenia at Aulis, and Paul Brown, for False Servant. The Milton Shulman Award for outstanding newcomer will be presented to Dominic Cooper (His Dark Materials and The History Boys), Romola Garai (Calico), Eddie Redmayne (The Goat, or Who is Sylvia?) or Ben Wishaw (Hamlet). And playwrights David Eldridge, Rebecca Lenkiewicz and Owen McCafferty will fight it out for The Charles Wintour Award and a £30,000 bursary. Three 50th Anniversary Special Awards will also be presented to an institution, a playwright and an individual. diff --git a/tests/Dataset/Resources/bbc/entertainment/009.txt b/tests/Dataset/Resources/bbc/entertainment/009.txt new file mode 100644 index 0000000..7862fc2 --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/009.txt @@ -0,0 +1,9 @@ +Da Vinci Code is 'lousy history' + +The plot of an international bestseller that thousands of readers are likely to receive as a Christmas present is 'laughable', a clergyman has said. + +The Da Vinci Code claims Jesus was not crucified, but married Mary Magdalene and died a normal death. It claims this was later covered up by the Church. The Bishop of Durham, the Rt Rev Dr Tom Wright, described the novel as a "great thriller" but "lousy history". The book has sold more than seven million copies worldwide. Despite enjoying Dan Brown's conspiracy theory, the Bishop said there was a lack of evidence to back up its claims. + +Writing his Christmas message in the Northern Echo, the Bishop said: "Conspiracy theories are always fun - fun to invent, fun to read, fun to fantasise about. "Dan Brown is the best writer I've come across in the genre, but anyone who knows anything about 1st century history will see that this underlying material is laughable." A great deal of credible evidence proves the Biblical version of Jesus' life was true, according to the Bishop. "The evidence for Jesus and the origins of Christianity is astonishingly good," he said. "We have literally a hundred times more early manuscripts for the gospels and letters in the New Testament than we have for the main classical authors like Cicero, Virgil and Tacitus. + +"Historical research shows that they present a coherent and thoroughly credible picture of Jesus, with all sorts of incidental details that fit the time when he lived, and don't fit the world of later legend." Brown's book has become a publishing phenomenon, consistently topping book charts in the UK and US. The Da Vinci Code has been translated into 42 languages and has spawned its own cottage industry of publications, including guides on to how to read the book, rebuttals and counter claims. The book, which has become an international best-seller in little over two years, is set to be made into a film starring Tom Hanks. diff --git a/tests/Dataset/Resources/bbc/entertainment/010.txt b/tests/Dataset/Resources/bbc/entertainment/010.txt new file mode 100644 index 0000000..037c155 --- /dev/null +++ b/tests/Dataset/Resources/bbc/entertainment/010.txt @@ -0,0 +1,9 @@ +Uganda bans Vagina Monologues + +Uganda's authorities have banned the play The Vagina Monologues, due to open in the capital, Kampala this weekend. + +The Ugandan Media Council said the performance would not be put on as it promoted and glorified acts such as lesbianism and homosexuality. It said the production could go ahead if the organisers "expunge all the offending parts". But the organisers of the play say it raises awareness of sexual abuse against women. "The play promotes illegal, unnatural sexual acts, homosexuality and prostitution, it should be and is hereby banned," the council's ruling said. + +The show, which has been a controversial sell-out around the world, explores female sexuality and strength through individual women telling their stories through monologues. Some parliamentarians and church leaders are also siding with the Media Council, Uganda's New Vision newspaper reports. "The play is obscene and pornographic although it was under the guise of women's liberation," MP Kefa Ssempgani told parliament. + +But the work's author, US playwright Eve Ensler, says it is all about women's empowerment. "There is obviously some fear of the vagina and saying the word vagina," Ms Ensler told the BBC. "It's not a slang word or dirty word it's a biological, anatomical word." She said the play is being produced and performed by Ugandan women and it is not being forced on them. The four Ugandan NGOs organising the play intended to raise money to campaign to stop violence against women and to raise funds for the war-torn north of the country. "I'm extremely outraged at the hypocrisy," the play's organiser in Uganda, Sarah Mukasa, told the BBC's Focus on Africa programme. "I'm amazed that this country Uganda gives the impression that it is progressive and supports women's rights and the notions of free speech; yet when women want to share their stories the government uses the apparatus of state to shut us up." diff --git a/tests/Dataset/Resources/bbc/politics/001.txt b/tests/Dataset/Resources/bbc/politics/001.txt new file mode 100644 index 0000000..285893a --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/001.txt @@ -0,0 +1,11 @@ +Labour plans maternity pay rise + +Maternity pay for new mothers is to rise by £1,400 as part of new proposals announced by the Trade and Industry Secretary Patricia Hewitt. + +It would mean paid leave would be increased to nine months by 2007, Ms Hewitt told GMTV's Sunday programme. Other plans include letting maternity pay be given to fathers and extending rights to parents of older children. The Tories dismissed the maternity pay plan as "desperate", while the Liberal Democrats said it was misdirected. + +Ms Hewitt said: "We have already doubled the length of maternity pay, it was 13 weeks when we were elected, we have already taken it up to 26 weeks. "We are going to extend the pay to nine months by 2007 and the aim is to get it right up to the full 12 months by the end of the next Parliament." She said new mothers were already entitled to 12 months leave, but that many women could not take it as only six of those months were paid. "We have made a firm commitment. We will definitely extend the maternity pay, from the six months where it now is to nine months, that's the extra £1,400." She said ministers would consult on other proposals that could see fathers being allowed to take some of their partner's maternity pay or leave period, or extending the rights of flexible working to carers or parents of older children. The Shadow Secretary of State for the Family, Theresa May, said: "These plans were announced by Gordon Brown in his pre-budget review in December and Tony Blair is now recycling it in his desperate bid to win back women voters." + +She said the Conservatives would announce their proposals closer to the General Election. Liberal Democrat spokeswoman for women Sandra Gidley said: "While mothers would welcome any extra maternity pay the Liberal Democrats feel this money is being misdirected." She said her party would boost maternity pay in the first six months to allow more women to stay at home in that time. + +Ms Hewitt also stressed the plans would be paid for by taxpayers, not employers. But David Frost, director general of the British Chambers of Commerce, warned that many small firms could be "crippled" by the move. "While the majority of any salary costs may be covered by the government's statutory pay, recruitment costs, advertising costs, retraining costs and the strain on the company will not be," he said. Further details of the government's plans will be outlined on Monday. New mothers are currently entitled to 90% of average earnings for the first six weeks after giving birth, followed by £102.80 a week until the baby is six months old. diff --git a/tests/Dataset/Resources/bbc/politics/002.txt b/tests/Dataset/Resources/bbc/politics/002.txt new file mode 100644 index 0000000..5468695 --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/002.txt @@ -0,0 +1,11 @@ +Watchdog probes e-mail deletions + +The information commissioner says he is urgently asking for details of Cabinet Office orders telling staff to delete e-mails more than three months old. + +Richard Thomas "totally condemned" the deletion of e-mails to prevent their disclosure under freedom of information laws coming into force on 1 January. Government guidance said e-mails should only be deleted if they served "no current purpose", Mr Thomas said. The Tories and the Lib Dems have questioned the timing of the new rules. + +Tory leader Michael Howard has written to Tony Blair demanding an explanation of the new rules on e-mail retention. On Monday Lib Dem constitutional affairs committee chairman Alan Beith warned that the deletion of millions of government e-mails could harm the ability of key probes like the Hutton Inquiry. The timing of the new rules just before the Freedom of Information Act comes into forces was "too unlikely to have been a coincidence", Mr Beith said. But a Cabinet Office spokeswoman said the move was not about the new laws or "the destruction of important records". Mr Beith urged the information commissioner to look at how the "e-mail regime" could "support the freedom of information regime". + +Mr Thomas said: "The new Act of Parliament makes it very clear that to destroy records in order to prevent their disclosure becomes a criminal offence." He said there was already clear guidance on the retention of e-mails contained in a code of practice from the lord chancellor. All e-mails are subject to the freedom of information laws, but the important thing was the content of the e-mail, said Mr Thomas. + +"If in doubt retain, that has been the long-standing principle of the civil service and public authorities. It's only when you've got no further use for the particular record that it may be legitimate to destroy it. "But any deliberate destruction to avoid the possibility of later disclosure is to be totally condemned." The Freedom of Information Act will cover England, Wales and Northern Ireland from next year. Similar measures are being brought in at the same time in Scotland. It provides the public with a right of access to information held by about 100,000 public bodies, subject to various exemptions. Its implementation will be monitored by the information commissioner. diff --git a/tests/Dataset/Resources/bbc/politics/003.txt b/tests/Dataset/Resources/bbc/politics/003.txt new file mode 100644 index 0000000..d8e1bce --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/003.txt @@ -0,0 +1,15 @@ +Hewitt decries 'career sexism' + +Plans to extend paid maternity leave beyond six months should be prominent in Labour's election manifesto, the Trade and Industry Secretary has said. + +Patricia Hewitt said the cost of the proposals was being evaluated, but it was an "increasingly high priority" and a "shared goal across government". Ms Hewitt was speaking at a gender and productivity seminar organised by the Equal Opportunities Commission (EOC). Mothers can currently take up to six months' paid leave - and six unpaid. Ms Hewitt told the seminar: "Clearly, one of the things we need to do in the future is to extend the period of payment for maternity leave beyond the first six months into the second six months. "We are looking at how quickly we can do that, because obviously there are cost implications because the taxpayer reimburses the employers for the cost of that." + +Ms Hewitt also announced a new drive to help women who want to work in male dominated sectors, saying sexism at work was still preventing women reaching their full potential. Plans include funding for universities to help female science and engineering graduates find jobs and "taster courses" for men and women in non-traditional jobs. Women in full-time work earn 19% less than men, according to the Equal Opportunities Commission (EOC). + +The minister told delegates that getting rid of "career sexism" was vital to closing the gender pay gap. + +"Career sexism limits opportunities for women of all ages and prevents them from achieving their full potential. "It is simply wrong to assume someone cannot do a job on the grounds of their sex," she said. Earlier, she told BBC Radio 4's Today programme: "What we are talking about here is the fact that about six out of 20 women work in jobs that are low-paid and typically dominated by women, so we have got very segregated employment. "Unfortunately, in some cases, this reflects very old-fashioned and stereotypical ideas about the appropriate jobs for women, or indeed for men. "Career sexism is about saying that engineering, for instance, where only 10% of employees are women, is really a male-dominated industry. Construction is even worse. "But it is also about saying childcare jobs are really there for women and not suitable for men. Career sexism goes both ways." + +She added that while progress had been made, there was still a gap in pay figures. "The average woman working full-time is being paid about 80p for every pound a man is earning. For women working part-time it is 60p." The Department for Trade and Industry will also provide funding to help a new pay experts panel run by the TUC. + +It has been set up to advise hundreds of companies on equal wage policies. Research conducted by the EOC last year revealed that many Britons believe the pay gap between men and women is the result of "natural differences" between the sexes. Women hold less than 10% of the top positions in FTSE 100 companies, the police, the judiciary and trade unions, according to their figures. And retired women have just over half the income of their male counterparts on average. diff --git a/tests/Dataset/Resources/bbc/politics/004.txt b/tests/Dataset/Resources/bbc/politics/004.txt new file mode 100644 index 0000000..e192dc5 --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/004.txt @@ -0,0 +1,9 @@ +Labour chooses Manchester + +The Labour Party will hold its 2006 autumn conference in Manchester and not Blackpool, it has been confirmed. + +The much trailed decision was ratified by Labour's ruling National Executive Committee in a break with the traditional choice of a seaside venue. It will be the first time since 1917 that the party has chosen Manchester to host the annual event. Blackpool will get the much smaller February spring conference instead in what will be seen as a placatory move. + +For years the main political parties have rotated between Blackpool, Bournemouth and Brighton. And the news the much larger annual conference is not to gather in Blackpool will be seen as a blow in the coastal resort. In 1998 the party said it would not return to Blackpool but did so in 2002. The following year Bournemouth hosted the event before the party signed a two year deal for Brighton to host the autumn conference. + +Colin Asplin, Blackpool Hotel Association said: "We have tried very hard to make sure they come back to Blackpool. "Obviously we have failed in that. I just hope Manchester can handle the crowds. "It amazes me that the Labour Party, which is a working class party, doesn't want to come to the main working class resort in the country." The exact cost to Blackpool in terms of lost revenue for hotel accommodation is not yet known but it is thought that block bookings will be taken at the major Manchester hotels after the official announcement. diff --git a/tests/Dataset/Resources/bbc/politics/005.txt b/tests/Dataset/Resources/bbc/politics/005.txt new file mode 100644 index 0000000..17748d8 --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/005.txt @@ -0,0 +1,15 @@ +Brown ally rejects Budget spree + +Chancellor Gordon Brown's closest ally has denied suggestions there will be a Budget giveaway on 16 March. + +Ed Balls, ex-chief economic adviser to the Treasury, said there would be no spending spree before polling day. But Mr Balls, a prospective Labour MP, said he was confident the chancellor would meet his fiscal rules. He was speaking as Sir Digby Jones, CBI director general, warned Mr Brown not to be tempted to use any extra cash on pre-election bribes. + +Mr Balls, who stepped down from his Treasury post to stand as a Labour candidate in the election, had suggested that Mr Brown would meet his golden economic rule - "with a margin to spare". He said he hoped more would be done to build on current tax credit rules. + +He also stressed rise in interest rates ahead of an expected May election would not affect the Labour Party's chances of winning. Expectations of a rate rise have gathered pace after figures showed house prices are still rising. Consumer borrowing rose at a near-record pace in January. "If the MPC (the Bank of England's Monetary Policy Committee) were to judge that a rate rise was justified before the election because of the strength of the economy - and I'm not predicting that they will - I do not believe that this will be a big election issue in Britain for Labour," he told a Parliamentary lunch. "This is a big change in our political culture." + +During an interview with BBC Radio 4's Today programme, Mr Balls said he was sure Mr Brown's Budget would not put at risk the stability of the economy. "I don't think we'll see a pre-election spending spree - we certainly did not see that before 2001," he said. + +His assurances came after Sir Digby Jones said stability was all important and any extra cash should be spent on improving workers' skills. His message to the chancellor was: "Please don't give it away in any form of electioneering." Sir Digby added: "I don't think he will. I have to say he has been a prudent chancellor right the way through. Stability is the key word - British business needs boring stability more than anything. "We would say to him 'don't increase your public spending, don't give it away. But if you are going to anywhere, just add something to the competitiveness of Britain, put it into skilling our people'. "That would be a good way to spend any excess." + +Mr Balls refused to say whether Mr Brown would remain as chancellor after the election, amid speculation he will be offered the job of Foreign Secretary. "I think that Gordon Brown wants to be part of the successful Labour government which delivers in the third term for the priorities of the people and sees off a Conservative Party that will take Britain backwards," Mr Balls told Today. Prime Minister Tony Blair has yet to name the date of the election, but most pundits are betting on 5 May. diff --git a/tests/Dataset/Resources/bbc/politics/006.txt b/tests/Dataset/Resources/bbc/politics/006.txt new file mode 100644 index 0000000..9dc8640 --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/006.txt @@ -0,0 +1,7 @@ +'Errors' doomed first Dome sale + +The initial attempt to sell the Millennium Dome failed due to a catalogue of errors, a report by the government's finance watchdog says. + +The report said too many parties were involved in decision-making when the attraction first went on sale after the Millennium exhibition ended. The National Audit Office said the Dome cost taxpayers £28.7m to maintain and sell in the four years after it closed. Finally, a deal to turn it into a sport and entertainment venue was struck. More than £550m could now be returned to the public sector in the wake of the deal to regenerate the site in Greenwich, London. + +The NAO report said that this sale went through because it avoided many of the problems of the previous attempt to sell the Dome. Deputy Prime Minister John Prescott said a good deal had been secured. "Delivery of the many benefits secured through this deal will continue the substantial progress already made at the Millennium Village and elsewhere on the peninsula," he said. But Edward Leigh, who is chairman of the Commons public accounts committee, warned the government would have to work hard to ensure taxpayers would get full benefit from the Dome deal. He said: "This report also shows that the first attempt to sell the Dome proved a complete fiasco. Every arm of government seems to have had a finger in the pie. The process was confused and muddled." He added: "Four years after the Millennium Exhibition closed, the Government finally has a deal to find a use for what has been a white elephant since it closed in a deal that, incredible as it may seem, should bring in some money and provide a benefit for the local area and the country as whole. However, it was more a question of luck that a strong bid turned up after thefirst abortive attempt." NAO head Sir John Bourn said: "In difficult circumstances following the failure of the first competition, English Partnerships and the office of the deputy prime minister have worked hard to get a deal." diff --git a/tests/Dataset/Resources/bbc/politics/007.txt b/tests/Dataset/Resources/bbc/politics/007.txt new file mode 100644 index 0000000..e17e192 --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/007.txt @@ -0,0 +1,13 @@ +Fox attacks Blair's Tory 'lies' + +Tony Blair lied when he took the UK to war so has no qualms about lying in the election campaign, say the Tories. + +Tory co-chairman Liam Fox was speaking after Mr Blair told Labour members the Tories offered a "hard right agenda". Dr Fox told BBC Radio: "If you are willing to lie about the reasons for going to war, I guess you are going to lie about anything at all." He would not discuss reports the party repaid £500,000 to Lord Ashcroft after he predicted an election defeat. + +The prime minister ratcheted up Labour's pre-election campaigning at the weekend with a helicopter tour of the country and his speech at the party's spring conference. He insisted he did not know the poll date, but it is widely expected to be 5 May. + +In what was seen as a highly personal speech in Gateshead on Sunday, Mr Blair said: "I have the same passion and hunger as when I first walked through the door of 10 Downing Street." He described his relationship with the public as starting euphoric, then struggling to live up to the expectations, and reaching the point of raised voices and "throwing crockery". He warned his supporters against complacency, saying: "It's a fight for the future of our country, it's a fight that for Britain and the people of Britain we have to win." + +Mr Blair said that whether the public chose Michael Howard or Mr Kennedy, it would result in "a Tory government not a Labour government and a country that goes back and does not move forward". Dr Fox accused Mr Blair and other Cabinet ministers of telling lies about their opponents' policies and then attacking the lies. "What we learned at the weekend is what Labour tactics are going to be and it's going to be fear and smear," he told BBC News. The Tory co-chairman attacked Labour's six new pledges as "vacuous" and said Mr Blair was very worried voters would take revenge for his failure to deliver. Dr Fox refused to discuss weekend newspaper reports that the party had repaid £500,000 to former Tory Treasurer Lord Ashcroft after he said the party could not win the election. "We repay loans when they are due but do not comment to individual financial matters," he said, insisting he enjoyed a "warm and constructive" relationship to Lord Ashcroft. + +Meanwhile Lib Dem leader Charles Kennedy is expected to attack Mr Blair's words as he begins a nationwide tour on Monday. Mr Kennedy is accelerating Lib Dem election preparations this week as he visits Manchester, Liverpool, Leicester, Somerset, Basingstoke, Shrewsbury, Dorset and Torbay. He said: "This is three-party politics. In the northern cities, the contest is between Labour and the Liberal Democrats. "In southern and rural seats - especially in the South West - the principal contenders are the Liberal Democrats and the Conservatives, who are out of the running in Scotland and Wales." The Lib Dems accuse Mr Blair of making a "touchy-feely" speech to Labour delegates which will not help him regain public trust. diff --git a/tests/Dataset/Resources/bbc/politics/008.txt b/tests/Dataset/Resources/bbc/politics/008.txt new file mode 100644 index 0000000..8da6426 --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/008.txt @@ -0,0 +1,11 @@ +Women MPs reveal sexist taunts + +Women MPs endure "shocking" levels of sexist abuse at the hands of their male counterparts, a new study shows. + +Male MPs pretended to juggle imaginary breasts and jeered "melons" as women made Commons speeches, researchers from Birkbeck College were told. Labour's Yvette Cooper said she found it hard to persuade Commons officials she was a minister and not a secretary. Some 83 MPs gave their answers in 100 hours of taped interviews for the study "Whose Secretary are You, minister". + +The research team, under Professor Joni Lovenduski, had set out to look at the achievements and experiences of women at Westminster. But what emerged was complaints from MPs of all parties of sexist barracking in the Chamber, sexist insults and patronising assumptions about their abilities. Barbara Follet, one of the so-called "Blair Babes" elected in 1997, told researchers: "I remember some Conservatives - whenever a Labour woman got up to speak they would take their breasts - imaginary breasts - in their hands and wiggle them and say 'melons' as we spoke." Former Liberal Democrat MP Jackie Ballard recalled a stream of remarks from a leading MP on topics such as women's legs or their sexual persuasion. And ex-Tory education secretary Gillian Shepherd remembered how one of her male colleagues called all women "Betty". + +"When I said, 'Look you know my name isn't Betty', he said, 'ah but you're all the same, so I call you all Betty'." Harriet Harman told researchers of the sheer hostility prompted by her advancement to the Cabinet: "Well, you've only succeeded because you're a woman." Another current member of the Cabinet says she was told: "Oh, you've had a very fast rise, who have you been sleeping with?" Even after the great influx of women MPs at the 1997 general election, and greater numbers of women in the Cabinet, female MPs often say they feel stuck on the edge of a male world. + +Liberal Democrat Sarah Teather, the most recent female MP to be elected, told researchers: "Lots of people say it's like an old boys club. "I've always said to me it feels more like a teenage public school - you know a public school full of teenagers." Prof Joni Lovenduski, who conducted the study with the help of Margaret Moran MP and a team of journalists, said she was shocked at the findings. "We expected a bit of this but nothing like this extent. We expected to find a couple of shocking episodes." But she said there was a difference between the experiences of women before the 1997 intake and afterwards. This was mainly because there were more women present in Parliament who were not prepared to "put up with" the sexist attitudes they came across, Prof Lovenduski said. But she added: "Some women, including the women who came in 1997, received extraordinary treatment and I am not convinced that if the number of women changed back to what it was before 1997 that things would not change back. "What I think is shocking to the general public is that these things go on in the House of Commons." The interviews are to be placed in the British Library as a historical record. diff --git a/tests/Dataset/Resources/bbc/politics/009.txt b/tests/Dataset/Resources/bbc/politics/009.txt new file mode 100644 index 0000000..ef07bba --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/009.txt @@ -0,0 +1,13 @@ +Campbell: E-mail row 'silly fuss' + +Ex-No 10 media chief Alastair Campbell is at the centre of a new political row over an e-mail containing a four-letter outburst aimed at BBC journalists. + +Mr Campbell sent the missive by mistake to BBC2's Newsnight after it sought to question his role in Labour's controversial poster campaign. He later contacted the show saying the original e-mail had been sent in error and that it was all a "silly fuss". Mr Campbell has recently re-joined Labour's election campaign. + +The e-mail was revealed the day after Peter Mandelson, former Labour minister and now a European Commissioner, warned the BBC to steer away from "demonising" Mr Campbell. Mr Campbell messaged Newsnight after the programme investigated claims that Labour's advertising agency TBWA was blaming him for controversy over its campaign posters. The images, including one of flying pigs and another of what critics claim depicted Tory leader Michael Howard as Fagin, prompted accusations of anti-Semitism, claims denied by Labour. + +Mr Campbell's e-mail, which was apparently intended for a party official, suggested they should get Trevor Beattie, TBWA's boss, to issue a statement. In it, he said: "Just spoke to trev. think tbwa shd give statement to newsnight saying party and agency work together well and nobody here has spoken to standard. Posters done by by tbwa according to political brief. Now fuck off and cover something important you twats!" The e-mail was sent by mistake to Newsnight journalist Andrew McFadyen. Realising his error, Mr Campbell then e-mailed Mr McFadyen pointing out the mistake, but suggesting presenter Jeremy Paxman would have seen the funny side. + +He said: "Not very good at this e-mail Blackberry malarkey. Just looked at log of sent messages, have realised e-mail meant for colleagues at TBWA has gone to you. For the record, first three sentences of email spot on. No row between me and trevor. "Posters done by them according to our brief. I dreamt up flying pigs. Pigs not great but okay in the circs of Tories promising tax cuts and spending rises with the same money. TBWA made production. "Campbell swears shock. Final sentence of earlier e-mail probably a bit colourful and personal considering we have never actually met but I'm sure you share the same sense of humour as your star presenter Mr P. "Never known such a silly fuss since the last silly fuss but there we go. Must look forward not back." + +Later the prime minister's spokesman was asked by journalists about his view on Mr Campbell's use of abusive language. The spokesman said: "The person you are referring to is capable of speaking for himself and he no longer works in government." Foreign Secretary Jack Straw said he had always had "very good and polite relations" with Mr Campbell, who he described as "very talented". But on the former spin doctor's use of language, Mr Straw said: "I do know the odd journalist who has occasionally used the odd word that would probably be inappropriate in some circumstances. Maybe I mix with the wrong kind of journalists." Liam Fox, Tory co-chairman, said the return of Mr Campbell was a sign of new "sinister and underhand tactics" by Labour. diff --git a/tests/Dataset/Resources/bbc/politics/010.txt b/tests/Dataset/Resources/bbc/politics/010.txt new file mode 100644 index 0000000..8bcedaa --- /dev/null +++ b/tests/Dataset/Resources/bbc/politics/010.txt @@ -0,0 +1,7 @@ +Crucial decision on super-casinos + +A decision on whether to allow Westminster to legislate on super-casinos is set to be made by the Scottish Parliament. + +The government has plans for up to eight Las Vegas style resorts in the UK, one of which is likely to be in Glasgow. Scottish ministers insist they will still have the final say on whether a super-casino will be built in Scotland. But opposition parties say that will not happen in practice. The vote is due to be taken on Wednesday and is expected to be close. + +The Scottish Executive believes that the legislation should be handled by Westminster. The new law will control internet gambling for the first time and is aimed at preventing children from becoming involved. A super-casino in Glasgow could be located at Ibrox or the Scottish Exhibition and Conference Centre. The new gambling bill going through Westminster will allow casino complexes to open to the public, have live entertainment and large numbers of fruit machines with unlimited prizes. But the Scottish National Party and the Tories say the issue of super-casinos should be decided in Scotland and believe the executive is shirking its responsibility. diff --git a/tests/Dataset/Resources/bbc/sport/001.txt b/tests/Dataset/Resources/bbc/sport/001.txt new file mode 100644 index 0000000..0233bf6 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/001.txt @@ -0,0 +1,7 @@ +Claxton hunting first major medal + +British hurdler Sarah Claxton is confident she can win her first major medal at next month's European Indoor Championships in Madrid. + +The 25-year-old has already smashed the British record over 60m hurdles twice this season, setting a new mark of 7.96 seconds to win the AAAs title. "I am quite confident," said Claxton. "But I take each race as it comes. "As long as I keep up my training but not do too much I think there is a chance of a medal." Claxton has won the national 60m hurdles title for the past three years but has struggled to translate her domestic success to the international stage. Now, the Scotland-born athlete owns the equal fifth-fastest time in the world this year. And at last week's Birmingham Grand Prix, Claxton left European medal favourite Russian Irina Shevchenko trailing in sixth spot. + +For the first time, Claxton has only been preparing for a campaign over the hurdles - which could explain her leap in form. In previous seasons, the 25-year-old also contested the long jump but since moving from Colchester to London she has re-focused her attentions. Claxton will see if her new training regime pays dividends at the European Indoors which take place on 5-6 March. diff --git a/tests/Dataset/Resources/bbc/sport/002.txt b/tests/Dataset/Resources/bbc/sport/002.txt new file mode 100644 index 0000000..0102893 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/002.txt @@ -0,0 +1,5 @@ +O'Sullivan could run in Worlds + +Sonia O'Sullivan has indicated that she would like to participate in next month's World Cross Country Championships in St Etienne. + +Athletics Ireland have hinted that the 35-year-old Cobh runner may be included in the official line-up for the event in France on 19-20 March. Provincial teams were selected after last Saturday's Nationals in Santry and will be officially announced this week. O'Sullivan is at present preparing for the London marathon on 17 April. The participation of O'Sullivan, currentily training at her base in Australia, would boost the Ireland team who won the bronze three years agio. The first three at Santry last Saturday, Jolene Byrne, Maria McCambridge and Fionnualla Britton, are automatic selections and will most likely form part of the long-course team. O'Sullivan will also take part in the Bupa Great Ireland Run on 9 April in Dublin. diff --git a/tests/Dataset/Resources/bbc/sport/003.txt b/tests/Dataset/Resources/bbc/sport/003.txt new file mode 100644 index 0000000..9dcc752 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/003.txt @@ -0,0 +1,7 @@ +Greene sets sights on world title + +Maurice Greene aims to wipe out the pain of losing his Olympic 100m title in Athens by winning a fourth World Championship crown this summer. + +He had to settle for bronze in Greece behind fellow American Justin Gatlin and Francis Obikwelu of Portugal. "It really hurts to look at that medal. It was my mistake. I lost because of the things I did," said Greene, who races in Birmingham on Friday. "It's never going to happen again. My goal - I'm going to win the worlds." Greene crossed the line just 0.02 seconds behind Gatlin, who won in 9.87 seconds in one of the closest and fastest sprints of all time. But Greene believes he lost the race and his title in the semi-finals. "In my semi-final race, I should have won the race but I was conserving energy. "That's when Francis Obikwelu came up and I took third because I didn't know he was there. "I believe that's what put me in lane seven in the final and, while I was in lane seven, I couldn't feel anything in the race. + +"I just felt like I was running all alone. "I believe if I was in the middle of the race I would have been able to react to people that came ahead of me." Greene was also denied Olympic gold in the 4x100m men's relay when he could not catch Britain's Mark Lewis-Francis on the final leg. The Kansas star is set to go head-to-head with Lewis-Francis again at Friday's Norwich Union Grand Prix. The pair contest the 60m, the distance over which Greene currently holds the world record of 6.39 seconds. He then has another indoor meeting in France before resuming training for the outdoor season and the task of recapturing his world title in Helsinki in August. Greene believes Gatlin will again prove the biggest threat to his ambitions in Finland. But he also admits he faces more than one rival for the world crown. "There's always someone else coming. I think when I was coming up I would say there was me and Ato (Boldon) in the young crowd," Greene said. "Now you've got about five or six young guys coming up at the same time." diff --git a/tests/Dataset/Resources/bbc/sport/004.txt b/tests/Dataset/Resources/bbc/sport/004.txt new file mode 100644 index 0000000..59195b6 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/004.txt @@ -0,0 +1,5 @@ +IAAF launches fight against drugs + +The IAAF - athletics' world governing body - has met anti-doping officials, coaches and athletes to co-ordinate the fight against drugs in sport. + +Two task forces have been set up to examine doping and nutrition issues. It was also agreed that a programme to "de-mystify" the issue to athletes, the public and the media was a priority. "Nothing was decided to change things - it was more to have a forum of the stakeholders allowing them to express themselves," said an IAAF spokesman. "Getting everyone together gave us a lot of food for thought." About 60 people attended Sunday's meeting in Monaco, including IAAF chief Lamine Diack and Namibian athlete Frankie Fredericks, now a member of the Athletes' Commission. "I am very happy to see you all, members of the athletics family, respond positively to the IAAF call to sit together and discuss what more we can do in the fight against doping," said Diack. "We are the leading Federation in this field and it is our duty to keep our sport clean." The two task forces will report back to the IAAF Council, at its April meeting in Qatar. diff --git a/tests/Dataset/Resources/bbc/sport/005.txt b/tests/Dataset/Resources/bbc/sport/005.txt new file mode 100644 index 0000000..f3d9dd4 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/005.txt @@ -0,0 +1,5 @@ +Dibaba breaks 5,000m world record + +Ethiopia's Tirunesh Dibaba set a new world record in winning the women's 5,000m at the Boston Indoor Games. + +Dibaba won in 14 minutes 32.93 seconds to erase the previous world indoor mark of 14:39.29 set by another Ethiopian, Berhane Adera, in Stuttgart last year. But compatriot Kenenisa Bekele's record hopes were dashed when he miscounted his laps in the men's 3,000m and staged his sprint finish a lap too soon. Ireland's Alistair Cragg won in 7:39.89 as Bekele battled to second in 7:41.42. "I didn't want to sit back and get out-kicked," said Cragg. "So I kept on the pace. The plan was to go with 500m to go no matter what, but when Bekele made the mistake that was it. The race was mine." Sweden's Carolina Kluft, the Olympic heptathlon champion, and Slovenia's Jolanda Ceplak had winning performances, too. Kluft took the long jump at 6.63m, while Ceplak easily won the women's 800m in 2:01.52. diff --git a/tests/Dataset/Resources/bbc/sport/006.txt b/tests/Dataset/Resources/bbc/sport/006.txt new file mode 100644 index 0000000..98d645f --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/006.txt @@ -0,0 +1,5 @@ +Isinbayeva claims new world best + +Pole vaulter Yelena Isinbayeva broke her own indoor world record by clearing 4.89 metres in Lievin on Saturday. + +It was the Russian's 12th world record of her career and came just a few days after she cleared 4.88m at the Norwich Union Grand Prix in Birmingham. The Olympic champion went on to attempt 5.05m at the meeting on France but failed to clear that height. In the men's 60m, former Olympic 100m champion Maurice Greene could only finish second to Leonard Scott. It was Greene's second consecutive defeat at the hands of his fellow American, who also won in Birmingham last week. "I ran my race perfectly," said Scott, who won in 6.46secs, his best time indoors. "I am happy even if I know that Maurice is a long way from being at his peak at the start of the season." diff --git a/tests/Dataset/Resources/bbc/sport/007.txt b/tests/Dataset/Resources/bbc/sport/007.txt new file mode 100644 index 0000000..1047180 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/007.txt @@ -0,0 +1,5 @@ +O'Sullivan commits to Dublin race + +Sonia O'Sullivan will seek to regain her title at the Bupa Great Ireland Run on 9 April in Dublin. + +The 35-year-old was beaten into fourth at last year's event, having won it a year earlier. "I understand she's had a solid winter's training down in Australia after recovering from a minor injury," said race director Matthew Turnbull. Mark Carroll, Irish record holder at 3km, 5km and 10km, will make his debut in the mass participation 10km race. Carroll has stepped up his form in recent weeks and in late January scored an impressive 3,000m victory over leading American Alan Webb in Boston. Carroll will be facing stiff competition from Australian Craig Mottram, winner in Dublin for the last two years. diff --git a/tests/Dataset/Resources/bbc/sport/008.txt b/tests/Dataset/Resources/bbc/sport/008.txt new file mode 100644 index 0000000..b2b47ae --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/008.txt @@ -0,0 +1,5 @@ +Hansen 'delays return until 2006' + +British triple jumper Ashia Hansen has ruled out a comeback this year after a setback in her recovery from a bad knee injury, according to reports. + +Hansen, the Commonwealth and European champion, has been sidelined since the European Cup in Poland in June 2004. It was hoped she would be able to return this summer, but the wound from the injury has been very slow to heal. Her coach Aston Moore told the Times: "We're not looking at any sooner than 2006, not as a triple jumper." Moore said Hansen may be able to return to sprinting and long jumping sooner, but there is no short-term prospect of her being involved again in her specialist event. "There was a problem with the wound healing and it set back her rehabilitation by about two months, but that has been solved and we can push ahead now," he said. "The aim is for her to get fit as an athlete - then we will start looking at sprinting and the long jump as an introduction back to the competitive arena." Moore said he is confident Hansen can make it back to top-level competition, though it is unclear if that will be in time for the Commonwealth Games in Melbourne next March, when she will be 34. "It's been a frustrating time for her, but it has not fazed her determination," he added. diff --git a/tests/Dataset/Resources/bbc/sport/009.txt b/tests/Dataset/Resources/bbc/sport/009.txt new file mode 100644 index 0000000..5123235 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/009.txt @@ -0,0 +1,15 @@ +Off-colour Gardener storms to win + +Britain's Jason Gardener shook off an upset stomach to win the 60m at Sunday's Leipzig International meeting. + +Gardener clocked 6.56 seconds to equal the meeting record and finished well ahead of Germany's Marc Blume, who crossed the line in 6.67 secs. The world indoor champion said: "I got to the airport and my stomach was upset and I was vomiting. I almost went home. "I felt a little better Sunday morning but decided I'd only run in the main race. Then everything went perfectly." Gardener, part of the Great Britain 4x100m quartet that won gold at the Athens Olympics, will now turn his attention to next weekend's Norwich Union European Indoor trials in Sheffield. + +"Given I am still off-colour I know there is plenty more in the tank and I expect to get faster in the next few weeks," he said. "It's just a case of chipping away as I have done in previous years and the results will come." Scotland's Ian Mackie was also in action in Leipzig. He stepped down from his favoured 400m to 200m to finish third in 21.72 secs. Germany's Alexander Kosenkow won the race in 21.07 secs with Dutchman Patrick van Balkom second in 21.58 secs. There were plenty of other senior British athletes showing their indoor form over the weekend. Promising 60m hurdler + +clocked a new UK record of 7.98 seconds at a meeting in Norway. The 24-year-old reached the mark in her heat but had to settle for joint first place with former AAA champion Diane Allahgreen in the final. + +, who broke onto the international scene at the Olympic Games last season, set an indoor personal best of 16.50m in the triple jump at a meeting in Ghent. That leap - 37cm short of Brazilian winner Jadel Gregorio's effort - was good enough to qualify for the European Indoor Championships. At the same meeting, + +finished third in 7.27 seconds in a high-class women's 60m. The event was won by European medal favourite Christine Arron of France while Belgium rival Kim Gevaert was second. Britain's Joice Maduaka finished fifth in 7.35. Olympic bronze heptathlon medallist + +made a low-key return to action at an indoor meeting in Birmingham. The 28-year-old cleared 1.76m to win the high jump and threw 13.86m in the women's shot put. diff --git a/tests/Dataset/Resources/bbc/sport/010.txt b/tests/Dataset/Resources/bbc/sport/010.txt new file mode 100644 index 0000000..b9fd654 --- /dev/null +++ b/tests/Dataset/Resources/bbc/sport/010.txt @@ -0,0 +1,5 @@ +Collins to compete in Birmingham + +World and Commonwealth 100m champion Kim Collins will compete in the 60m at the Norwich Union Grand Prix in Birmingham on 18 February. + +The St Kitts and Nevis star joins British Olympic relay gold medallists Jason Gardener and Mark Lewis-Francis. Sydney Olympic 100m champion and world indoor record holder Maurice Greene and Athens Olympic 100m silver medallist Francis Obikwelu will also take part. Collins ran in Birmingham at the 2003 World Indoor Championships. "I'm looking forward to competing against such a strong field," he said. "I got a great reception form the crowd at the NIA when I won my 60m world indoor silver medal in 2003 and it will be really exciting to return to this venue." The world champion says he's in good shape but he isn't underestimating the home competition. "Jason Gardener and Mark Lewis-Francis are Olympic gold medallists now and I'm sure they'll be aiming to win in front of their home supporters. "I'm looking forward to competing against Britain's best sprinters and I'm sure the 60 metres will be one of the most exciting races of the evening." Collins was sixth in the Olympic final in Athens but is hoping for a better result at the World Championships in Finland this summer. "This will be a big year for me and I plan to defend my 100m world title in Helsinki in August. Before then I want to perform well over 60m indoors and start my year in winning form." diff --git a/tests/Dataset/Resources/bbc/tech/001.txt b/tests/Dataset/Resources/bbc/tech/001.txt new file mode 100644 index 0000000..acb7e7f --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/001.txt @@ -0,0 +1,19 @@ +Ink helps drive democracy in Asia + +The Kyrgyz Republic, a small, mountainous state of the former Soviet republic, is using invisible ink and ultraviolet readers in the country's elections as part of a drive to prevent multiple voting. + +This new technology is causing both worries and guarded optimism among different sectors of the population. In an effort to live up to its reputation in the 1990s as "an island of democracy", the Kyrgyz President, Askar Akaev, pushed through the law requiring the use of ink during the upcoming Parliamentary and Presidential elections. The US government agreed to fund all expenses associated with this decision. + +The Kyrgyz Republic is seen by many experts as backsliding from the high point it reached in the mid-1990s with a hastily pushed through referendum in 2003, reducing the legislative branch to one chamber with 75 deputies. The use of ink is only one part of a general effort to show commitment towards more open elections - the German Embassy, the Soros Foundation and the Kyrgyz government have all contributed to purchase transparent ballot boxes. + +The actual technology behind the ink is not that complicated. The ink is sprayed on a person's left thumb. It dries and is not visible under normal light. + +However, the presence of ultraviolet light (of the kind used to verify money) causes the ink to glow with a neon yellow light. At the entrance to each polling station, one election official will scan voter's fingers with UV lamp before allowing them to enter, and every voter will have his/her left thumb sprayed with ink before receiving the ballot. If the ink shows under the UV light the voter will not be allowed to enter the polling station. Likewise, any voter who refuses to be inked will not receive the ballot. These elections are assuming even greater significance because of two large factors - the upcoming parliamentary elections are a prelude to a potentially regime changing presidential election in the Autumn as well as the echo of recent elections in other former Soviet Republics, notably Ukraine and Georgia. The use of ink has been controversial - especially among groups perceived to be pro-government. + +Widely circulated articles compared the use of ink to the rural practice of marking sheep - a still common metaphor in this primarily agricultural society. + +The author of one such article began a petition drive against the use of the ink. The greatest part of the opposition to ink has often been sheer ignorance. Local newspapers have carried stories that the ink is harmful, radioactive or even that the ultraviolet readers may cause health problems. Others, such as the aggressively middle of the road, Coalition of Non-governmental Organizations, have lauded the move as an important step forward. This type of ink has been used in many elections in the world, in countries as varied as Serbia, South Africa, Indonesia and Turkey. The other common type of ink in elections is indelible visible ink - but as the elections in Afghanistan showed, improper use of this type of ink can cause additional problems. The use of "invisible" ink is not without its own problems. In most elections, numerous rumors have spread about it. + +In Serbia, for example, both Christian and Islamic leaders assured their populations that its use was not contrary to religion. Other rumours are associated with how to remove the ink - various soft drinks, solvents and cleaning products are put forward. However, in reality, the ink is very effective at getting under the cuticle of the thumb and difficult to wash off. The ink stays on the finger for at least 72 hours and for up to a week. The use of ink and readers by itself is not a panacea for election ills. The passage of the inking law is, nevertheless, a clear step forward towards free and fair elections." The country's widely watched parliamentary elections are scheduled for 27 February. + +David Mikosz works for the IFES, an international, non-profit organisation that supports the building of democratic societies. diff --git a/tests/Dataset/Resources/bbc/tech/002.txt b/tests/Dataset/Resources/bbc/tech/002.txt new file mode 100644 index 0000000..4c5decd --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/002.txt @@ -0,0 +1,9 @@ +China net cafe culture crackdown + +Chinese authorities closed 12,575 net cafes in the closing months of 2004, the country's government said. + +According to the official news agency most of the net cafes were closed down because they were operating illegally. Chinese net cafes operate under a set of strict guidelines and many of those most recently closed broke rules that limit how close they can be to schools. The move is the latest in a series of steps the Chinese government has taken to crack down on what it considers to be immoral net use. + +The official Xinhua News Agency said the crackdown was carried out to create a "safer environment for young people in China". Rules introduced in 2002 demand that net cafes be at least 200 metres away from middle and elementary schools. The hours that children can use net cafes are also tightly regulated. China has long been worried that net cafes are an unhealthy influence on young people. The 12,575 cafes were shut in the three months from October to December. China also tries to dictate the types of computer games people can play to limit the amount of violence people are exposed to. + +Net cafes are hugely popular in China because the relatively high cost of computer hardware means that few people have PCs in their homes. This is not the first time that the Chinese government has moved against net cafes that are not operating within its strict guidelines. All the 100,000 or so net cafes in the country are required to use software that controls what websites users can see. Logs of sites people visit are also kept. Laws on net cafe opening hours and who can use them were introduced in 2002 following a fire at one cafe that killed 25 people. During the crackdown following the blaze authorities moved to clean up net cafes and demanded that all of them get permits to operate. In August 2004 Chinese authorities shut down 700 websites and arrested 224 people in a crackdown on net porn. At the same time it introduced new controls to block overseas sex sites. The Reporters Without Borders group said in a report that Chinese government technologies for e-mail interception and net censorship are among the most highly developed in the world. diff --git a/tests/Dataset/Resources/bbc/tech/003.txt b/tests/Dataset/Resources/bbc/tech/003.txt new file mode 100644 index 0000000..dbab557 --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/003.txt @@ -0,0 +1,9 @@ +Microsoft seeking spyware trojan + +Microsoft is investigating a trojan program that attempts to switch off the firm's anti-spyware software. + +The spyware tool was only released by Microsoft in the last few weeks and has been downloaded by six million people. Stephen Toulouse, a security manager at Microsoft, said the malicious program was called Bankash-A Trojan and was being sent as an e-mail attachment. Microsoft said it did not believe the program was widespread and recommended users to use an anti-virus program. The program attempts to disable or delete Microsoft's anti-spyware tool and suppress warning messages given to users. + +It may also try to steal online banking passwords or other personal information by tracking users' keystrokes. + +Microsoft said in a statement it is investigating what it called a criminal attack on its software. Earlier this week, Microsoft said it would buy anti-virus software maker Sybari Software to improve its security in its Windows and e-mail software. Microsoft has said it plans to offer its own paid-for anti-virus software but it has not yet set a date for its release. The anti-spyware program being targeted is currently only in beta form and aims to help users find and remove spyware - programs which monitor internet use, causes advert pop-ups and slow a PC's performance. diff --git a/tests/Dataset/Resources/bbc/tech/004.txt b/tests/Dataset/Resources/bbc/tech/004.txt new file mode 100644 index 0000000..950dd0e --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/004.txt @@ -0,0 +1,9 @@ +Digital guru floats sub-$100 PC + +Nicholas Negroponte, chairman and founder of MIT's Media Labs, says he is developing a laptop PC that will go on sale for less than $100 (£53). + +He told the BBC World Service programme Go Digital he hoped it would become an education tool in developing countries. He said one laptop per child could be " very important to the development of not just that child but now the whole family, village and neighbourhood". He said the child could use the laptop like a text book. He described the device as a stripped down laptop, which would run a Linux-based operating system, "We have to get the display down to below $20, to do this we need to rear project the image rather than using an ordinary flat panel. + +"The second trick is to get rid of the fat , if you can skinny it down you can gain speed and the ability to use smaller processors and slower memory." The device will probably be exported as a kit of parts to be assembled locally to keep costs down. Mr Negroponte said this was a not for profit venture, though he recognised that the manufacturers of the components would be making money. In 1995 Mr Negroponte published the bestselling Being Digital, now widely seen as predicting the digital age. The concept is based on experiments in the US state of Maine, where children were given laptop computers to take home and do their work on. + +While the idea was popular amongst the children, it initially received some resistance from the teachers and there were problems with laptops getting broken. However, Mr Negroponte has adapted the idea to his own work in Cambodia where he set up two schools together with his wife and gave the children laptops. "We put in 25 laptops three years ago , only one has been broken, the kids cherish these things, it's also a TV a telephone and a games machine, not just a textbook." Mr Negroponte wants the laptops to become more common than mobile phones but conceded this was ambitious. "Nokia make 200 million cell phones a year, so for us to claim we're going to make 200 million laptops is a big number, but we're not talking about doing it in three or five years, we're talking about months." He plans to be distributing them by the end of 2006 and is already in discussion with the Chinese education ministry who are expected to make a large order. "In China they spend $17 per child per year on textbooks. That's for five or six years, so if we can distribute and sell laptops in quantities of one million or more to ministries of education that's cheaper and the marketing overheads go away." diff --git a/tests/Dataset/Resources/bbc/tech/005.txt b/tests/Dataset/Resources/bbc/tech/005.txt new file mode 100644 index 0000000..bd1caf7 --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/005.txt @@ -0,0 +1,13 @@ +Technology gets the creative bug + +The hi-tech and the arts worlds have for some time danced around each other and offered creative and technical help when required. + +Often this help has come in the form of corporate art sponsorship or infrastructure provision. But that dance is growing more intimate as hi-tech firms look to the creative industries for inspiration. And vice versa. UK telco BT is serious about the idea and has launched its Connected World initiative. The idea, says BT, is to shape a "21st Century model" which will help cement the art, technology, and business worlds together. "We are hoping to understand the creative industry that has a natural thirst for broadband technology," said Frank Stone, head of the BT's business sector programmes. He looks after several "centres of excellence" which the telco has set up with other institutions and organisations, one of which is focused on creative industries. + +To mark the initiative's launch, a major international art installation is to open on 15 April in Brussels, with a further exhibit in Madrid later in the summer. They have both been created using the telco's technology that it has been incubating at its research and development arm, including a sophisticated graphics rendering program. Using a 3D graphics engine, the type commonly used in gaming, Bafta-winning artists Langlands & Bell have created a virtual, story-based, 3D model of Brussels' Coudenberg Cellars. + +They have recently been excavated and are thought to be the remnants of Coudenberg Palace, an historical seat of European power. The 3D world can be navigated using a joystick and offers an immersive experience of a landscape that historically had a river running through it until it was bricked up in the 19th Century. "The river was integral to the city's survival for hundreds of years and it was equally essential to the city that it disappeared," said the artists. "We hope that by uncovering the river, we can greater understand the connections between the past and the present, and appreciate the flow of modernity, once concealing, but now revealing the River Senne." In their previous works they used the Quake game graphics engine. The game engine is the core component of a video game because it handles graphics rendering, game AI, and how objects behave and relate to each other in a game. They are so time-consuming and expensive to create, the engines can be licensed out to handle other graphics-intensive games. BT's own engine, Tara (Total Abstract Rendering Architecture) has been in development since 2001 and has been used to recreate virtual interactive models of buildings for planners. It was also used in 2003 in Encounter, an urban-based, pervasive game that combined both virtual play in conjunction with physical, on-the-street action. Because the artists wanted video and interactive elements in their worlds, new features were added to Tara in order to handle the complex data sets. But collaboration between art and digital technology is by no means new, and many keen coders, designers, games makers and animators argue that what they create is art itself. + +As more tools for self-expression are given to the person on the street, enabling people to take photos with a phone and upload them to the web for instance, creativity will become an integral part of technology. The Orange Expressionist exhibition last year, for example, displayed thousands of picture messages from people all over the UK to create an interactive installation. + +Technology as a way of unleashing creativity has massive potential, not least because it gives people something to do with their technology. Big businesses know it is good for them to get in on the creative vein too. The art world is "fantastically rich", said Mr Stone, with creative people and ideas which means traditional companies like BT want to get in with them. Between 1997 and 2002, the creative industry brought £21 billion to London alone. It is an industry that is growing by 6% a year too. The partnership between artists and technologists is part of trying to understand the creative potential of technologies like broadband net, according to Mr Stone. "This is not just about putting art galleries and museums online," he said. "It is about how can everyone have the best seat in house and asking if technology has a role in solving that problem." With broadband penetration reaching 100% in the UK, businesses with a stake in the technology want to give people reasons to want and use it. The creative drive is not purely altruistic obviously. It is about both industries borrowing strategies and creative ideas together which can result in better business practices for creative industries, or more patent ideas for tech companies. "What we are trying to do is have outside-in thinking. "We are creating a future cultural drive for the economy," said Mr Stone. diff --git a/tests/Dataset/Resources/bbc/tech/006.txt b/tests/Dataset/Resources/bbc/tech/006.txt new file mode 100644 index 0000000..4a3d70e --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/006.txt @@ -0,0 +1,15 @@ +Wi-fi web reaches farmers in Peru + +A network of community computer centres, linked by wireless technology, is providing a helping hand for poor farmers in Peru. + +The pilot scheme in the Huaral Valley, 80 kilometres north of the capital Lima, aims to offer the 6,000-strong community up-to-date information on agricultural market prices and trends. The Agricultural Information Project for Farmers of the Chancay-Huaral Valley also provides vital links between local organisations in charge of water irrigation, enabling them to coordinate their actions. More than 13,000 rural inhabitants, as well as 18,000 students in the region, will also benefit from the telecoms infrastructure. + +The 14 telecentres uses only free open source software and affordable computer equipment. The network has been three years in the making and was officially inaugurated in September. + +The non-government organisation, Cepes (Peruvian Centre for Social Studies) led the $200,000 project, also backed by local institutions, the Education and Agriculture ministries, and European development organisations. "The plan includes training on computers and internet skills for both operators and users of the system," said Carlos Saldarriaga, technical coordinator at Cepes. Farmers are also taking extra lessons on how to apply the new information to make the most of their plots of land. The Board of Irrigation Users which runs the computer centres, aims to make the network self-sustainable within three years, through the cash generated by using the telecentres as internet cafes. + +One of the key elements of the project is the Agricultural Information System, with its flagship huaral.org website. There, farmers can find the prices for local produce, as well as information on topics ranging from plague prevention to the latest farming techniques. The system also helps the inhabitants of the Chancay-Huaral Valley to organise their vital irrigation systems. "Water is the main element that unites them all. It is a precious element in Peru's coastal areas, because it is so scarce, and therefore it is necessary to have proper irrigation systems to make the most of it," Mr Saldarriaga told the BBC News website. The information network also allows farmers to look beyond their own region, and share experiences with other colleagues from the rest of Peru and even around the world. + +Cepes says the involvement of the farmers has been key in the project's success. "Throughout the last three years, the people have provided a vital thrust to the project; they feel it belongs to them," said Mr Saldarriaga. The community training sessions, attended by an equal number of men and women, have been the perfect showcase for their enthusiasm. "We have had an excellent response, mainly from young people. But we have also had a great feedback when we trained 40 or 50-year old women, who were seeing a computer for the first time in their lives." So far, the Huaral programme promoters say the experience has been very positive, and are already planning on spreading the model among other farmers' organisations in Peru. "This is a pilot project, and we have been very keen on its cloning potential in other places," underlined Mr Saldarriaga. + +The Cepes researcher recalls what happened in Cuyo, a 50-family community with no electricity, during the construction of the local telecentre site. There it was necessary to build a mini-hydraulic dam in order to generate 2kW worth of power for the computers, the communications equipment and the cabin lights. "It was already dark when the technicians realised they didn't have any light bulbs to test the generator, so they turned up to the local store to buy light bulbs," recalls Carlos Saldarriaga. "The logical answer was 'we don't sell any', so they had to wait until the next morning to do the testing." Now, with the wireless network, Cuyo as well as the other communities is no longer isolated. diff --git a/tests/Dataset/Resources/bbc/tech/007.txt b/tests/Dataset/Resources/bbc/tech/007.txt new file mode 100644 index 0000000..1c9b89b --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/007.txt @@ -0,0 +1,7 @@ +Microsoft releases bumper patches + +Microsoft has warned PC users to update their systems with the latest security fixes for flaws in Windows programs. + +In its monthly security bulletin, it flagged up eight "critical" security holes which could leave PCs open to attack if left unpatched. The number of holes considered "critical" is more than usual. They affect Windows programs, including Internet Explorer (IE), media player and instant messaging. Four other important fixes were also released. These were considered to be less critical, however. If not updated, either automatically or manually, PC users running the programs could be vulnerable to viruses or other malicious attacks designed to exploit the holes. Many of the flaws could be used by virus writers to take over computers remotely, install programs, change, and delete or see data. + +One of the critical patches Microsoft has made available is an important one that fixes some IE flaws. Stephen Toulouse, a Microsoft security manager, said the flaws were known about, and although the firm had not seen any attacks exploiting the flaw, he did not rule them out. Often, when a critical flaw is announced, spates of viruses follow because home users and businesses leave the flaw unpatched. A further patch fixes a hole in Media Player, Windows Messenger and MSN Messenger which an attacker could use to take control of unprotected machines through .png files. Microsoft announces any vulnerabilities in its software every month. The most important ones are those which are classed as "critical". Its latest releases came the week that the company announced it was to buy security software maker Sybari Software as part of Microsoft's plans to make its own security programs. diff --git a/tests/Dataset/Resources/bbc/tech/008.txt b/tests/Dataset/Resources/bbc/tech/008.txt new file mode 100644 index 0000000..31359e3 --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/008.txt @@ -0,0 +1,9 @@ +Virus poses as Christmas e-mail + +Security firms are warning about a Windows virus disguising itself as an electronic Christmas card. + +The Zafi.D virus translates the Christmas greeting on its subject line into the language of the person receiving infected e-mail. Anti-virus firms speculate that this multilingual ability is helping the malicious program spread widely online. Anti-virus firm Sophos said that 10% of the e-mail currently on the net was infected with the Zafi virus. + +Like many other Windows viruses, Zafi-D plunders Microsoft Outlook for e-mail addresses and then uses mail-sending software to despatch itself across the web to new victims. To be infected users must open up the attachment travelling with the message which bears the code for the malicious bug. The attachment on the e-mail poses as an electronic Christmas card but anyone opening it will simply get a crude image of two smiley faces. + +The virus' subject line says "Merry Christmas" and translates this into one of 15 languages depending of the final suffix of the e-mail address the infected message has been sent to. The message in the body of the e-mail reads: "Happy Holidays" and this too is translated. On infected machines the virus tries to disable anti-virus and firewall software and opens up a backdoor on the PC to hand over control to the writer of the virus. The virus is thought to have spread most widely in South America, Italy, Spain, Bulgaria and Hungary. The original Zafi virus appeared in April this year. "We have seen these hoaxes for several Christmases already, and personally I prefer traditional pen and paper cards, and we recommend this to all our clients too," said Mikko Hypponen, who heads F-Secure's anti-virus team. diff --git a/tests/Dataset/Resources/bbc/tech/009.txt b/tests/Dataset/Resources/bbc/tech/009.txt new file mode 100644 index 0000000..3af3f25 --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/009.txt @@ -0,0 +1,69 @@ +Apple laptop is 'greatest gadget' + +The Apple Powerbook 100 has been chosen as the greatest gadget of all time, by US magazine Mobile PC. + +The 1991 laptop was chosen because it was one of the first "lightweight" portable computers and helped define the layout of all future notebook PCs. The magazine has compiled an all-time top 100 list of gadgets, which includes the Sony Walkman at number three and the 1956 Zenith remote control at two. Gadgets needed moving parts and/or electronics to warrant inclusion. The magazine specified that gadgets also needed to be a "self-contained apparatus that can be used on its own, not a subset of another device". + +"In general we included only items that were potentially mobile," said the magazine. + +"In the end, we tried to get to the heart of what really makes a gadget a gadget," it concluded. The oldest "gadget" in the top 100 is the abacus, which the magazine dates at 190 A.D., and put in 60th place. Other pre-electronic gadgets in the top 100 include the sextant from 1731 (59th position), the marine chronometer from 1761 (42nd position) and the Kodak Brownie camera from 1900 (28th position). The Tivo personal video recorder is the newest device to make the top 10, which also includes the first flash mp3 player (Diamound Multimedia), as well as the first "successful" digital camera (Casio QV-10) and mobile phone (Motorola Startac). The most popular gadget of the moment, the Apple iPod, is at number 12 in the list while the first Sony transistor radio is at number 13. + +Sony's third entry in the top 20 is the CDP-101 CD player from 1983. "Who can forget the crystalline, hiss-free blast of Madonna's Like A Virgin emenating from their first CD player?" asked the magazine. Karl Elsener's knife, the Swiss Army Knife from 1891, is at number 20 in the list. Gadgets which could be said to feature surprisngly low down in the list include the original telephone (23rd), the Nintendo GameBoy (25th), and the Pulsar quartz digital watch (36th). The list also contains plenty of oddities: the Pez sweet dispenser (98th), 1980s toy Tamagotchi (86th) and the bizarre Ronco inside the shell egg scrambler (84th). + +Why worry about mobile phones. Soon they will be subsumed into the PDA's / laptops etc. + +What about the Marine Chronometer? Completely revolutionised navigation for boats and was in use for centuries. For it's time, a technological marvel! + +Sony Net Minidisc! It paved the way for more mp3 player to explode onto the market. I always used my NetMD, and could not go anywhere without it. + +A laptop computer is not a gadget! It's a working tool! + +The Sinclair Executive was the world's first pocket calculator. I think this should be there as well. + +How about the clockwork radio? Or GPS? Or a pocket calculator? All these things are useful to real people, not just PC magazine editors. + +Are the people who created this list insane ? Surely the most important gadget of the modern age is the mobile phone? It has revolutionalised communication, which is more than can be said for a niche market laptop. From outside the modern age, the marine chronometer is the single most important gadget, without which modern transportation systems would not have evolved so quickly. + +Has everyone forgot about the Breville pie maker?? + +An interesting list. Of the electronic gadgets, thousands of journalists in the early 1980s blessed the original noteboook pc - the Tandy 100. The size of A4 paper and light, three weeks on a set of batteries, an excellent keyboard, a modem. A pity Tandy did not make it DOS compatible. + +What's an Apple Powerbook 100 ? It's out of date - not much of a "gadget". Surely it has to be something simple / timeless - the tin opener, Swiss Army Knife, safety razor blade, wristwatch or the thing for taking stones out of horses hooves ? + +It has to be the mobile phone. No other single device has had such an effect on our way of living in such a short space of time. + +The ball point pen has got to be one of the most used and common gadgets ever. Also many might be grateful for the pocket calculator which was a great improvement over the slide rule. + +The Casio pocket calculator that played a simple game and made tinny noises was also a hot gadget in 1980. A true gadget, it could be carried around and shown off. + +All top 10 are electronic toys, so the list is probably a better reflection of the current high-tech obsession than anyhting else. I say this as the Swiss Army Knife only made No 20. + +Sinclair QL a machine far ahead of its time. The first home machine with a true multi-takings OS. Shame the marketing was so bad!!! + +Apple.. a triumph of fashion over... well everything else. + +Utter rubbish. Yes, the Apple laptop and Sony Walkman are classic gadgets. But to call the sextant and the marine chronometer 'gadgets' and rank them as less important than a TV remote control reveals a quite shocking lack of historical perspective. The former literally helped change the world by vastly improving navigation at see. The latter is the seed around which the couch potato culture has developed. No competition. + +I'd also put Apple's Newton and the first Palm Pilot there as the front runners for portable computing, and possibly the Toshiba Libretto for the same reason. I only wish that Vulcan Inc's Flipstart wasn't just vapourware otherwise it would be at the top. + +How did a laptop ever manage to beat off the challenge of the wristwatch or the telephone (mobile or otherwise)? What about radios and TVs? + +The swiss army knife. By far the most useful gadget. I got mine 12 years ago. Still wearing and using it a lot! It stood the test of time. + +Psion Organiser series 3, should be up there. Had a usable qwerty keyboard, removable storage, good set of apps and programmable. Case design was good (batteries in the hinge - a first, I think). Great product innovation. + +The first mobile PC was voted best gadget by readers of...err... mobile PC?! Why do you keep putting these obviously biased lists on your site? It's obviously the mobile phone or remote control, and readers of a less partisan publication would tell you that. + +The Motorola Startac should be Number One. Why? There will be mobile phones long after notebook computers and other gadgets are either gone or integrated in communications devices. + +The Psion series 3c! The first most practical way to carry all your info around... + +I too would back the Sinclair Spectrum - without this little beauty I would never have moved into the world of IT and earn the living that I do now. + +I'd have put the mobile phone high up the list. Probably a Nokia model. + +Sinclair Spectrum - 16k. It plugged into the tv. Games were rubbish but it gave me a taste for programming and that's what I do for a living now. + +I wish more modern notebooks -- even Apple's newest offerings -- were more like the PB100. Particularly disheartening is the demise of the trackball, which has given way to the largely useless "trackpad" which every notebook on the market today uses. They're invariably inaccurate, uncomfortable, and cumbersome to use. + +Congratulations to Apple, a deserved win! diff --git a/tests/Dataset/Resources/bbc/tech/010.txt b/tests/Dataset/Resources/bbc/tech/010.txt new file mode 100644 index 0000000..3e4bd43 --- /dev/null +++ b/tests/Dataset/Resources/bbc/tech/010.txt @@ -0,0 +1,11 @@ +Google's toolbar sparks concern + +Search engine firm Google has released a trial tool which is concerning some net users because it directs people to pre-selected commercial websites. + +The AutoLink feature comes with Google's latest toolbar and provides links in a webpage to Amazon.com if it finds a book's ISBN number on the site. It also links to Google's map service, if there is an address, or to car firm Carfax, if there is a licence plate. Google said the feature, available only in the US, "adds useful links". But some users are concerned that Google's dominant position in the search engine market place could mean it would be giving a competitive edge to firms like Amazon. + +AutoLink works by creating a link to a website based on information contained in a webpage - even if there is no link specified and whether or not the publisher of the page has given permission. + +If a user clicks the AutoLink feature in the Google toolbar then a webpage with a book's unique ISBN number would link directly to Amazon's website. It could mean online libraries that list ISBN book numbers find they are directing users to Amazon.com whether they like it or not. Websites which have paid for advertising on their pages may also be directing people to rival services. Dan Gillmor, founder of Grassroots Media, which supports citizen-based media, said the tool was a "bad idea, and an unfortunate move by a company that is looking to continue its hypergrowth". In a statement Google said the feature was still only in beta, ie trial, stage and that the company welcomed feedback from users. It said: "The user can choose never to click on the AutoLink button, and web pages she views will never be modified. "In addition, the user can choose to disable the AutoLink feature entirely at any time." + +The new tool has been compared to the Smart Tags feature from Microsoft by some users. It was widely criticised by net users and later dropped by Microsoft after concerns over trademark use were raised. Smart Tags allowed Microsoft to link any word on a web page to another site chosen by the company. Google said none of the companies which received AutoLinks had paid for the service. Some users said AutoLink would only be fair if websites had to sign up to allow the feature to work on their pages or if they received revenue for any "click through" to a commercial site. Cory Doctorow, European outreach coordinator for digital civil liberties group Electronic Fronter Foundation, said that Google should not be penalised for its market dominance. "Of course Google should be allowed to direct people to whatever proxies it chooses. "But as an end user I would want to know - 'Can I choose to use this service?, 'How much is Google being paid?', 'Can I substitute my own companies for the ones chosen by Google?'." Mr Doctorow said the only objection would be if users were forced into using AutoLink or "tricked into using the service". diff --git a/tests/Phpml/Dataset/Resources/dataset.csv b/tests/Dataset/Resources/dataset.csv similarity index 100% rename from tests/Phpml/Dataset/Resources/dataset.csv rename to tests/Dataset/Resources/dataset.csv diff --git a/tests/Dataset/Resources/longdataset.csv b/tests/Dataset/Resources/longdataset.csv new file mode 100644 index 0000000..f8f3c40 --- /dev/null +++ b/tests/Dataset/Resources/longdataset.csv @@ -0,0 +1 @@ +1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988,989,990,991,992,993,994,995,996,997,998,999,1000,label diff --git a/tests/Dataset/Resources/mnist/images-idx-ubyte b/tests/Dataset/Resources/mnist/images-idx-ubyte new file mode 100644 index 0000000..40b870a Binary files /dev/null and b/tests/Dataset/Resources/mnist/images-idx-ubyte differ diff --git a/tests/Dataset/Resources/mnist/labels-11-idx-ubyte b/tests/Dataset/Resources/mnist/labels-11-idx-ubyte new file mode 100644 index 0000000..db9362d Binary files /dev/null and b/tests/Dataset/Resources/mnist/labels-11-idx-ubyte differ diff --git a/tests/Dataset/Resources/mnist/labels-idx-ubyte b/tests/Dataset/Resources/mnist/labels-idx-ubyte new file mode 100644 index 0000000..eca5265 Binary files /dev/null and b/tests/Dataset/Resources/mnist/labels-idx-ubyte differ diff --git a/tests/Dataset/Resources/svm/1x1.svm b/tests/Dataset/Resources/svm/1x1.svm new file mode 100644 index 0000000..fdd6c1f --- /dev/null +++ b/tests/Dataset/Resources/svm/1x1.svm @@ -0,0 +1 @@ +0 1:2.3 diff --git a/tests/Dataset/Resources/svm/3x1.svm b/tests/Dataset/Resources/svm/3x1.svm new file mode 100644 index 0000000..d817c96 --- /dev/null +++ b/tests/Dataset/Resources/svm/3x1.svm @@ -0,0 +1,3 @@ +1 1:2.3 +0 1:4.56 +1 1:78.9 diff --git a/tests/Dataset/Resources/svm/3x4.svm b/tests/Dataset/Resources/svm/3x4.svm new file mode 100644 index 0000000..5f6d015 --- /dev/null +++ b/tests/Dataset/Resources/svm/3x4.svm @@ -0,0 +1,3 @@ +1 1:2 2:4 3:6 4:8 +2 1:3 2:5 3:7 4:9 +0 1:1.2 2:3.4 3:5.6 4:7.8 diff --git a/tests/Dataset/Resources/svm/comments.svm b/tests/Dataset/Resources/svm/comments.svm new file mode 100644 index 0000000..7cf6fc4 --- /dev/null +++ b/tests/Dataset/Resources/svm/comments.svm @@ -0,0 +1,2 @@ +0 1:2 # This is a comment. +1 1:34 # This # is # : # also # a # comment # . diff --git a/tests/Dataset/Resources/svm/empty.svm b/tests/Dataset/Resources/svm/empty.svm new file mode 100644 index 0000000..e69de29 diff --git a/tests/Dataset/Resources/svm/err_empty_line.svm b/tests/Dataset/Resources/svm/err_empty_line.svm new file mode 100644 index 0000000..289e2b5 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_empty_line.svm @@ -0,0 +1,3 @@ +1 1:2.3 + +0 1:4.56 diff --git a/tests/Dataset/Resources/svm/err_index_zero.svm b/tests/Dataset/Resources/svm/err_index_zero.svm new file mode 100644 index 0000000..56c20f8 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_index_zero.svm @@ -0,0 +1 @@ +0 0:2.3 diff --git a/tests/Dataset/Resources/svm/err_invalid_feature.svm b/tests/Dataset/Resources/svm/err_invalid_feature.svm new file mode 100644 index 0000000..f57b6c5 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_invalid_feature.svm @@ -0,0 +1 @@ +0 12345 diff --git a/tests/Dataset/Resources/svm/err_invalid_spaces.svm b/tests/Dataset/Resources/svm/err_invalid_spaces.svm new file mode 100644 index 0000000..77ff868 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_invalid_spaces.svm @@ -0,0 +1 @@ + 0 1:2.3 diff --git a/tests/Dataset/Resources/svm/err_invalid_value.svm b/tests/Dataset/Resources/svm/err_invalid_value.svm new file mode 100644 index 0000000..b358890 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_invalid_value.svm @@ -0,0 +1 @@ +0 1:xyz diff --git a/tests/Dataset/Resources/svm/err_no_labels.svm b/tests/Dataset/Resources/svm/err_no_labels.svm new file mode 100644 index 0000000..789be38 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_no_labels.svm @@ -0,0 +1 @@ +1:2.3 diff --git a/tests/Dataset/Resources/svm/err_string_index.svm b/tests/Dataset/Resources/svm/err_string_index.svm new file mode 100644 index 0000000..25cb296 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_string_index.svm @@ -0,0 +1 @@ +0 x:2.3 diff --git a/tests/Dataset/Resources/svm/err_string_labels.svm b/tests/Dataset/Resources/svm/err_string_labels.svm new file mode 100644 index 0000000..8cc16f7 --- /dev/null +++ b/tests/Dataset/Resources/svm/err_string_labels.svm @@ -0,0 +1 @@ +A 1:2.3 diff --git a/tests/Dataset/Resources/svm/sparse.svm b/tests/Dataset/Resources/svm/sparse.svm new file mode 100644 index 0000000..23d7485 --- /dev/null +++ b/tests/Dataset/Resources/svm/sparse.svm @@ -0,0 +1,2 @@ +0 2:3.45 +1 5:6.789 diff --git a/tests/Dataset/Resources/svm/tabs.svm b/tests/Dataset/Resources/svm/tabs.svm new file mode 100644 index 0000000..bf8757f --- /dev/null +++ b/tests/Dataset/Resources/svm/tabs.svm @@ -0,0 +1 @@ +1 1:23 2:45 # comments diff --git a/tests/Dataset/SvmDatasetTest.php b/tests/Dataset/SvmDatasetTest.php new file mode 100644 index 0000000..437e3d2 --- /dev/null +++ b/tests/Dataset/SvmDatasetTest.php @@ -0,0 +1,212 @@ +getSamples()); + self::assertEquals($expectedTargets, $dataset->getTargets()); + } + + public function testSvmDataset1x1(): void + { + $filePath = self::getFilePath('1x1.svm'); + $dataset = new SvmDataset($filePath); + + $expectedSamples = [ + [2.3], + ]; + $expectedTargets = [ + 0, + ]; + + self::assertEquals($expectedSamples, $dataset->getSamples()); + self::assertEquals($expectedTargets, $dataset->getTargets()); + } + + public function testSvmDataset3x1(): void + { + $filePath = self::getFilePath('3x1.svm'); + $dataset = new SvmDataset($filePath); + + $expectedSamples = [ + [2.3], + [4.56], + [78.9], + ]; + $expectedTargets = [ + 1, + 0, + 1, + ]; + + self::assertEquals($expectedSamples, $dataset->getSamples()); + self::assertEquals($expectedTargets, $dataset->getTargets()); + } + + public function testSvmDataset3x4(): void + { + $filePath = self::getFilePath('3x4.svm'); + $dataset = new SvmDataset($filePath); + + $expectedSamples = [ + [2, 4, 6, 8], + [3, 5, 7, 9], + [1.2, 3.4, 5.6, 7.8], + ]; + $expectedTargets = [ + 1, + 2, + 0, + ]; + + self::assertEquals($expectedSamples, $dataset->getSamples()); + self::assertEquals($expectedTargets, $dataset->getTargets()); + } + + public function testSvmDatasetSparse(): void + { + $filePath = self::getFilePath('sparse.svm'); + $dataset = new SvmDataset($filePath); + + $expectedSamples = [ + [0, 3.45, 0, 0, 0], + [0, 0, 0, 0, 6.789], + ]; + $expectedTargets = [ + 0, + 1, + ]; + + self::assertEquals($expectedSamples, $dataset->getSamples()); + self::assertEquals($expectedTargets, $dataset->getTargets()); + } + + public function testSvmDatasetComments(): void + { + $filePath = self::getFilePath('comments.svm'); + $dataset = new SvmDataset($filePath); + + $expectedSamples = [ + [2], + [34], + ]; + $expectedTargets = [ + 0, + 1, + ]; + + self::assertEquals($expectedSamples, $dataset->getSamples()); + self::assertEquals($expectedTargets, $dataset->getTargets()); + } + + public function testSvmDatasetTabs(): void + { + $filePath = self::getFilePath('tabs.svm'); + $dataset = new SvmDataset($filePath); + + $expectedSamples = [ + [23, 45], + ]; + $expectedTargets = [ + 1, + ]; + + self::assertEquals($expectedSamples, $dataset->getSamples()); + self::assertEquals($expectedTargets, $dataset->getTargets()); + } + + public function testSvmDatasetMissingFile(): void + { + $this->expectException(FileException::class); + $this->expectExceptionMessage('File "err_file_not_exists.svm" missing.'); + + new SvmDataset(self::getFilePath('err_file_not_exists.svm')); + } + + public function testSvmDatasetEmptyLine(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid target "".'); + + new SvmDataset(self::getFilePath('err_empty_line.svm')); + } + + public function testSvmDatasetNoLabels(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid target "1:2.3".'); + + new SvmDataset(self::getFilePath('err_no_labels.svm')); + } + + public function testSvmDatasetStringLabels(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid target "A".'); + + new SvmDataset(self::getFilePath('err_string_labels.svm')); + } + + public function testSvmDatasetInvalidSpaces(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid target "".'); + + new SvmDataset(self::getFilePath('err_invalid_spaces.svm')); + } + + public function testSvmDatasetStringIndex(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid index "x".'); + + new SvmDataset(self::getFilePath('err_string_index.svm')); + } + + public function testSvmDatasetIndexZero(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid index "0".'); + + new SvmDataset(self::getFilePath('err_index_zero.svm')); + } + + public function testSvmDatasetInvalidValue(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid value "xyz".'); + + new SvmDataset(self::getFilePath('err_invalid_value.svm')); + } + + public function testSvmDatasetInvalidFeature(): void + { + $this->expectException(DatasetException::class); + $this->expectExceptionMessage('Invalid value "12345".'); + + new SvmDataset(self::getFilePath('err_invalid_feature.svm')); + } + + private static function getFilePath(string $baseName): string + { + return __DIR__.'/Resources/svm/'.$baseName; + } +} diff --git a/tests/DimensionReduction/KernelPCATest.php b/tests/DimensionReduction/KernelPCATest.php new file mode 100644 index 0000000..da4e51b --- /dev/null +++ b/tests/DimensionReduction/KernelPCATest.php @@ -0,0 +1,86 @@ +fit($data); + + // Due to the fact that the sign of values can be flipped + // during the calculation of eigenValues, we have to compare + // absolute value of the values + array_map(function ($val1, $val2) use ($epsilon): void { + self::assertEqualsWithDelta(abs($val1[0]), abs($val2[0]), $epsilon); + }, $transformed, $reducedData); + + // Fitted KernelPCA object can also transform an arbitrary sample of the + // same dimensionality with the original dataset + $newData = [1.25, 2.25]; + $newTransformed = [0.18956227539216]; + $newTransformed2 = $kpca->transform($newData); + self::assertEqualsWithDelta(abs($newTransformed[0]), abs($newTransformed2[0]), $epsilon); + } + + public function testKernelPCAThrowWhenKernelInvalid(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian'); + new KernelPCA(0, null, 1, 15.); + } + + public function testTransformThrowWhenNotFitted(): void + { + $samples = [1, 0]; + + $kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 1, 15.); + + $this->expectException(InvalidOperationException::class); + $this->expectExceptionMessage('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first'); + $kpca->transform($samples); + } + + public function testTransformThrowWhenMultiDimensionalArrayGiven(): void + { + $samples = [ + [1, 0], + [1, 1], + ]; + + $kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 1, 15.); + $kpca->fit($samples); + + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('KernelPCA::transform() accepts only one-dimensional arrays'); + $kpca->transform($samples); + } +} diff --git a/tests/DimensionReduction/LDATest.php b/tests/DimensionReduction/LDATest.php new file mode 100644 index 0000000..b64a7f3 --- /dev/null +++ b/tests/DimensionReduction/LDATest.php @@ -0,0 +1,109 @@ +fit($dataset->getSamples(), $dataset->getTargets()); + + // Some samples of the Iris data will be checked manually + // First 3 and last 3 rows from the original dataset + $data = [ + [5.1, 3.5, 1.4, 0.2], + [4.9, 3.0, 1.4, 0.2], + [4.7, 3.2, 1.3, 0.2], + [6.5, 3.0, 5.2, 2.0], + [6.2, 3.4, 5.4, 2.3], + [5.9, 3.0, 5.1, 1.8], + ]; + $transformed2 = [ + [-1.4922092756753, 1.9047102045574], + [-1.2576556684358, 1.608414450935], + [-1.3487505965419, 1.749846351699], + [1.7759343101456, 2.0371552314006], + [2.0059819019159, 2.4493123003226], + [1.701474913008, 1.9037880473772], + ]; + + $control = []; + $control = array_merge($control, array_slice($transformed, 0, 3)); + $control = array_merge($control, array_slice($transformed, -3)); + + $check = function ($row1, $row2) use ($epsilon): void { + // Due to the fact that the sign of values can be flipped + // during the calculation of eigenValues, we have to compare + // absolute value of the values + $row1 = array_map('abs', $row1); + $row2 = array_map('abs', $row2); + self::assertEqualsWithDelta($row1, $row2, $epsilon); + }; + array_map($check, $control, $transformed2); + + // Fitted LDA object should be able to return same values again + // for each projected row + foreach ($data as $i => $row) { + $newRow = [$transformed2[$i]]; + $newRow2 = $lda->transform($row); + + array_map($check, $newRow, $newRow2); + } + } + + public function testLDAThrowWhenTotalVarianceOutOfRange(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Total variance can be a value between 0.1 and 0.99'); + new LDA(0., null); + } + + public function testLDAThrowWhenNumFeaturesOutOfRange(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Number of features to be preserved should be greater than 0'); + new LDA(null, 0); + } + + public function testLDAThrowWhenParameterNotSpecified(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Either totalVariance or numFeatures should be specified in order to run the algorithm'); + new LDA(); + } + + public function testLDAThrowWhenBothParameterSpecified(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Either totalVariance or numFeatures should be specified in order to run the algorithm'); + new LDA(0.9, 1); + } + + public function testTransformThrowWhenNotFitted(): void + { + $samples = [ + [1, 0], + [1, 1], + ]; + + $pca = new LDA(0.9); + + $this->expectException(InvalidOperationException::class); + $this->expectExceptionMessage('LDA has not been fitted with respect to original dataset, please run LDA::fit() first'); + $pca->transform($samples); + } +} diff --git a/tests/DimensionReduction/PCATest.php b/tests/DimensionReduction/PCATest.php new file mode 100644 index 0000000..3dbc5a6 --- /dev/null +++ b/tests/DimensionReduction/PCATest.php @@ -0,0 +1,101 @@ +fit($data); + + // Due to the fact that the sign of values can be flipped + // during the calculation of eigenValues, we have to compare + // absolute value of the values + array_map(function ($val1, $val2) use ($epsilon): void { + self::assertEqualsWithDelta(abs($val1[0]), abs($val2[0]), $epsilon); + }, $transformed, $reducedData); + + // Test fitted PCA object to transform an arbitrary sample of the + // same dimensionality with the original dataset + foreach ($data as $i => $row) { + $newRow = [[$transformed[$i]]]; + $newRow2 = $pca->transform($row); + + array_map(function ($val1, $val2) use ($epsilon): void { + self::assertEqualsWithDelta(abs($val1[0][0]), abs($val2[0]), $epsilon); + }, $newRow, $newRow2); + } + } + + public function testPCAThrowWhenTotalVarianceOutOfRange(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Total variance can be a value between 0.1 and 0.99'); + new PCA(0., null); + } + + public function testPCAThrowWhenNumFeaturesOutOfRange(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Number of features to be preserved should be greater than 0'); + new PCA(null, 0); + } + + public function testPCAThrowWhenParameterNotSpecified(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Either totalVariance or numFeatures should be specified in order to run the algorithm'); + new PCA(); + } + + public function testPCAThrowWhenBothParameterSpecified(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Either totalVariance or numFeatures should be specified in order to run the algorithm'); + new PCA(0.9, 1); + } + + public function testTransformThrowWhenNotFitted(): void + { + $samples = [ + [1, 0], + [1, 1], + ]; + + $pca = new PCA(0.9); + + $this->expectException(InvalidOperationException::class); + $this->expectExceptionMessage('PCA has not been fitted with respect to original dataset, please run PCA::fit() first'); + $pca->transform($samples); + } +} diff --git a/tests/FeatureExtraction/StopWordsTest.php b/tests/FeatureExtraction/StopWordsTest.php new file mode 100644 index 0000000..112fa0f --- /dev/null +++ b/tests/FeatureExtraction/StopWordsTest.php @@ -0,0 +1,55 @@ +isStopWord('lorem')); + self::assertTrue($stopWords->isStopWord('ipsum')); + self::assertTrue($stopWords->isStopWord('dolor')); + + self::assertFalse($stopWords->isStopWord('consectetur')); + self::assertFalse($stopWords->isStopWord('adipiscing')); + self::assertFalse($stopWords->isStopWord('amet')); + } + + public function testThrowExceptionOnInvalidLanguage(): void + { + $this->expectException(InvalidArgumentException::class); + StopWords::factory('Lorem'); + } + + public function testEnglishStopWords(): void + { + $stopWords = StopWords::factory('English'); + + self::assertTrue($stopWords->isStopWord('again')); + self::assertFalse($stopWords->isStopWord('strategy')); + } + + public function testPolishStopWords(): void + { + $stopWords = StopWords::factory('Polish'); + + self::assertTrue($stopWords->isStopWord('wam')); + self::assertFalse($stopWords->isStopWord('transhumanizm')); + } + + public function testFrenchStopWords(): void + { + $stopWords = StopWords::factory('French'); + + self::assertTrue($stopWords->isStopWord('alors')); + self::assertFalse($stopWords->isStopWord('carte')); + } +} diff --git a/tests/FeatureExtraction/TfIdfTransformerTest.php b/tests/FeatureExtraction/TfIdfTransformerTest.php new file mode 100644 index 0000000..acb0103 --- /dev/null +++ b/tests/FeatureExtraction/TfIdfTransformerTest.php @@ -0,0 +1,59 @@ + 1, + 1 => 1, + 2 => 2, + 3 => 1, + 4 => 0, + 5 => 0, + ], + [ + 0 => 1, + 1 => 1, + 2 => 0, + 3 => 0, + 4 => 2, + 5 => 3, + ], + ]; + + $tfIdfSamples = [ + [ + 0 => 0, + 1 => 0, + 2 => 0.602, + 3 => 0.301, + 4 => 0, + 5 => 0, + ], + [ + 0 => 0, + 1 => 0, + 2 => 0, + 3 => 0, + 4 => 0.602, + 5 => 0.903, + ], + ]; + + $transformer = new TfIdfTransformer($samples); + $transformer->transform($samples); + + self::assertEqualsWithDelta($tfIdfSamples, $samples, 0.001); + } +} diff --git a/tests/FeatureExtraction/TokenCountVectorizerTest.php b/tests/FeatureExtraction/TokenCountVectorizerTest.php new file mode 100644 index 0000000..1347915 --- /dev/null +++ b/tests/FeatureExtraction/TokenCountVectorizerTest.php @@ -0,0 +1,259 @@ + 'Lorem', + 1 => 'ipsum', + 2 => 'dolor', + 3 => 'sit', + 4 => 'amet', + 5 => 'Mauris', + 6 => 'placerat', + 7 => 'diam', + 8 => 'eros', + 9 => 'fringilla', + ]; + + $tokensCounts = [ + [ + 0 => 1, + 1 => 1, + 2 => 2, + 3 => 1, + 4 => 1, + 5 => 0, + 6 => 0, + 7 => 0, + 8 => 0, + 9 => 0, + ], + [ + 0 => 0, + 1 => 1, + 2 => 1, + 3 => 0, + 4 => 0, + 5 => 1, + 6 => 1, + 7 => 0, + 8 => 0, + 9 => 0, + ], + [ + 0 => 0, + 1 => 0, + 2 => 0, + 3 => 0, + 4 => 0, + 5 => 1, + 6 => 0, + 7 => 2, + 8 => 1, + 9 => 1, + ], + ]; + + $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer()); + + $vectorizer->fit($samples); + self::assertSame($vocabulary, $vectorizer->getVocabulary()); + + $vectorizer->transform($samples); + self::assertSame($tokensCounts, $samples); + } + + public function testTransformationWithMinimumDocumentTokenCountFrequency(): void + { + // word at least in half samples + $samples = [ + 'Lorem ipsum dolor sit amet 1550', + 'Lorem ipsum sit amet', + 'ipsum sit amet', + 'ipsum sit amet', + ]; + + $vocabulary = [ + 0 => 'Lorem', + 1 => 'ipsum', + 2 => 'dolor', + 3 => 'sit', + 4 => 'amet', + 5 => 1550, + ]; + + $tokensCounts = [ + [ + 0 => 1, + 1 => 1, + 2 => 0, + 3 => 1, + 4 => 1, + 5 => 0, + ], + [ + 0 => 1, + 1 => 1, + 2 => 0, + 3 => 1, + 4 => 1, + 5 => 0, + ], + [ + 0 => 0, + 1 => 1, + 2 => 0, + 3 => 1, + 4 => 1, + 5 => 0, + ], + [ + 0 => 0, + 1 => 1, + 2 => 0, + 3 => 1, + 4 => 1, + 5 => 0, + ], + ]; + + $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5); + + $vectorizer->fit($samples); + self::assertSame($vocabulary, $vectorizer->getVocabulary()); + + $vectorizer->transform($samples); + self::assertSame($tokensCounts, $samples); + + // word at least once in all samples + $samples = [ + 'Lorem ipsum dolor sit amet', + 'Morbi quis sagittis Lorem', + 'eros Lorem', + ]; + + $tokensCounts = [ + [ + 0 => 1, + 1 => 0, + 2 => 0, + 3 => 0, + 4 => 0, + 5 => 0, + 6 => 0, + 7 => 0, + 8 => 0, + ], + [ + 0 => 1, + 1 => 0, + 2 => 0, + 3 => 0, + 4 => 0, + 5 => 0, + 6 => 0, + 7 => 0, + 8 => 0, + ], + [ + 0 => 1, + 1 => 0, + 2 => 0, + 3 => 0, + 4 => 0, + 5 => 0, + 6 => 0, + 7 => 0, + 8 => 0, + ], + ]; + + $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 1); + $vectorizer->fit($samples); + $vectorizer->transform($samples); + + self::assertSame($tokensCounts, $samples); + } + + public function testTransformationWithStopWords(): void + { + $samples = [ + 'Lorem ipsum dolor sit amet dolor', + 'Mauris placerat ipsum dolor', + 'Mauris diam eros fringilla diam', + ]; + + $stopWords = new StopWords(['dolor', 'diam']); + + $vocabulary = [ + 0 => 'Lorem', + 1 => 'ipsum', + //2 => 'dolor', + 2 => 'sit', + 3 => 'amet', + 4 => 'Mauris', + 5 => 'placerat', + //7 => 'diam', + 6 => 'eros', + 7 => 'fringilla', + ]; + + $tokensCounts = [ + [ + 0 => 1, + 1 => 1, + 2 => 1, + 3 => 1, + 4 => 0, + 5 => 0, + 6 => 0, + 7 => 0, + ], + [ + 0 => 0, + 1 => 1, + 2 => 0, + 3 => 0, + 4 => 1, + 5 => 1, + 6 => 0, + 7 => 0, + ], + [ + 0 => 0, + 1 => 0, + 2 => 0, + 3 => 0, + 4 => 1, + 5 => 0, + 6 => 1, + 7 => 1, + ], + ]; + + $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords); + + $vectorizer->fit($samples); + self::assertSame($vocabulary, $vectorizer->getVocabulary()); + + $vectorizer->transform($samples); + self::assertSame($tokensCounts, $samples); + } +} diff --git a/tests/FeatureSelection/ScoringFunction/ANOVAFValueTest.php b/tests/FeatureSelection/ScoringFunction/ANOVAFValueTest.php new file mode 100644 index 0000000..8954e32 --- /dev/null +++ b/tests/FeatureSelection/ScoringFunction/ANOVAFValueTest.php @@ -0,0 +1,24 @@ +score($dataset->getSamples(), $dataset->getTargets()), + 0.0001 + ); + } +} diff --git a/tests/FeatureSelection/ScoringFunction/UnivariateLinearRegressionTest.php b/tests/FeatureSelection/ScoringFunction/UnivariateLinearRegressionTest.php new file mode 100644 index 0000000..48d72d3 --- /dev/null +++ b/tests/FeatureSelection/ScoringFunction/UnivariateLinearRegressionTest.php @@ -0,0 +1,29 @@ +score($samples, $targets), 0.0001); + } + + public function testRegressionScoreWithoutCenter(): void + { + $samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; + $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + + $function = new UnivariateLinearRegression(false); + self::assertEqualsWithDelta([1.74450, 18.08347], $function->score($samples, $targets), 0.0001); + } +} diff --git a/tests/FeatureSelection/SelectKBestTest.php b/tests/FeatureSelection/SelectKBestTest.php new file mode 100644 index 0000000..5239954 --- /dev/null +++ b/tests/FeatureSelection/SelectKBestTest.php @@ -0,0 +1,119 @@ +fit($samples, $targets); + $selector->transform($samples); + + self::assertEquals([[2, 1], [3, 4], [2, 1], [3, 3], [3, 4], [3, 5]], $samples); + } + + public function testSelectKBestWithKBiggerThanFeatures(): void + { + $samples = [[1, 2, 1], [1, 3, 4], [5, 2, 1], [1, 3, 3], [1, 3, 4], [0, 3, 5]]; + $targets = ['a', 'a', 'a', 'b', 'b', 'b']; + $selector = new SelectKBest(4); + $selector->fit($samples, $targets); + $selector->transform($samples); + + self::assertEquals([[1, 2, 1], [1, 3, 4], [5, 2, 1], [1, 3, 3], [1, 3, 4], [0, 3, 5]], $samples); + } + + public function testSelectKBestWithIrisDataset(): void + { + $dataset = new IrisDataset(); + $selector = new SelectKBest(2, new ANOVAFValue()); + $selector->fit($samples = $dataset->getSamples(), $dataset->getTargets()); + $selector->transform($samples); + + self::assertEquals(2, count($samples[0])); + } + + public function testSelectKBestWithRegressionScoring(): void + { + $samples = [[73676, 1996, 2], [77006, 1998, 5], [10565, 2000, 4], [146088, 1995, 2], [15000, 2001, 2], [65940, 2000, 2], [9300, 2000, 2], [93739, 1996, 2], [153260, 1994, 2], [17764, 2002, 2], [57000, 1998, 2], [15000, 2000, 2]]; + $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + + $selector = new SelectKBest(2, new UnivariateLinearRegression()); + $selector->fit($samples, $targets); + $selector->transform($samples); + + self::assertEquals( + [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]], + $samples + ); + } + + public function testSelectKBestIssue386(): void + { + $samples = [ + [ + 0.0006729998475705993, + 0.0, + 0.999999773507577, + 0.0, + 0.0, + 6.66666515671718E-7, + 3.33333257835859E-6, + 6.66666515671718E-6, + ], + [ + 0.0006729998475849566, + 0.0, + 0.9999997735289103, + 0.0, + 0.0, + 6.666665156859402E-7, + 3.3333325784297012E-6, + 1.3333330313718804E-6, + ], + ]; + + $targets = [15.5844, 4.45284]; + + $selector = new SelectKBest(2); + $selector->fit($samples, $targets); + + self::assertEquals([ + -2.117582368135751E-22, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0097419586828951E-28, + 0.0, + 1.4222215779620095E-11, + ], $selector->scores()); + } + + public function testThrowExceptionOnEmptyTargets(): void + { + $this->expectException(InvalidArgumentException::class); + $selector = new SelectKBest(2, new ANOVAFValue()); + $selector->fit([[1, 2, 3], [4, 5, 6]], []); + } + + public function testThrowExceptionWhenNotTrained(): void + { + $this->expectException(InvalidOperationException::class); + $selector = new SelectKBest(2, new ANOVAFValue()); + $selector->scores(); + } +} diff --git a/tests/FeatureSelection/VarianceThresholdTest.php b/tests/FeatureSelection/VarianceThresholdTest.php new file mode 100644 index 0000000..c76305f --- /dev/null +++ b/tests/FeatureSelection/VarianceThresholdTest.php @@ -0,0 +1,39 @@ +fit($samples); + $transformer->transform($samples); + + // expecting to remove first column + self::assertEquals([[0, 1], [1, 0], [0, 0], [1, 1], [1, 0], [1, 1]], $samples); + } + + public function testVarianceThresholdWithZeroThreshold(): void + { + $samples = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]; + $transformer = new VarianceThreshold(); + $transformer->fit($samples); + $transformer->transform($samples); + + self::assertEquals([[2, 0], [1, 4], [1, 1]], $samples); + } + + public function testThrowExceptionWhenThresholdBelowZero(): void + { + $this->expectException(InvalidArgumentException::class); + new VarianceThreshold(-0.1); + } +} diff --git a/tests/FeatureUnionTest.php b/tests/FeatureUnionTest.php new file mode 100644 index 0000000..0a903b4 --- /dev/null +++ b/tests/FeatureUnionTest.php @@ -0,0 +1,105 @@ +fitAndTransform($samples, $targets); + + self::assertEquals([ + [0, 23.0, 100000.0], + [1, 23.0, 200000.0], + [1, 43.0, 150000.0], + [0, 33.0, 150000.0], + ], $samples); + self::assertEquals([1, 2, 1, 3], $targets); + } + + public function testFitAndTransformSeparate(): void + { + $columns = ['age', 'income', 'sex']; + $trainSamples = [ + ['23', '100000', 'male'], + ['23', '200000', 'female'], + ['43', '150000', 'female'], + ['33', 'n/a', 'male'], + ]; + $testSamples = [ + ['43', '500000', 'female'], + ['13', 'n/a', 'male'], + ['53', 'n/a', 'male'], + ['43', 'n/a', 'female'], + ]; + + $union = new FeatureUnion([ + new Pipeline([ + new ColumnFilter($columns, ['sex']), + new LambdaTransformer(function (array $sample) { + return $sample[0]; + }), + new LabelEncoder(), + ]), + new Pipeline([ + new ColumnFilter($columns, ['age', 'income']), + new NumberConverter(), + new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN), + ]), + ]); + + $union->fit($trainSamples); + $union->transform($testSamples); + + self::assertEquals([ + [1, 43.0, 500000.0], + [0, 13.0, 150000.0], + [0, 53.0, 150000.0], + [1, 43.0, 150000.0], + ], $testSamples); + } + + public function testNotAllowForEmptyPipelines(): void + { + $this->expectException(InvalidArgumentException::class); + + new FeatureUnion([]); + } +} diff --git a/tests/Helper/Optimizer/ConjugateGradientTest.php b/tests/Helper/Optimizer/ConjugateGradientTest.php new file mode 100644 index 0000000..fc85a60 --- /dev/null +++ b/tests/Helper/Optimizer/ConjugateGradientTest.php @@ -0,0 +1,102 @@ +runOptimization($samples, $targets, $callback); + + self::assertEqualsWithDelta([-1, 2], $theta, 0.1); + } + + public function testRunOptimizationWithCustomInitialTheta(): void + { + // 200 samples from y = -1 + 2x (i.e. theta = [-1, 2]) + $samples = []; + $targets = []; + for ($i = -100; $i <= 100; ++$i) { + $x = $i / 100; + $samples[] = [$x]; + $targets[] = -1 + 2 * $x; + } + + $callback = static function ($theta, $sample, $target): array { + $y = $theta[0] + $theta[1] * $sample[0]; + $cost = (($y - $target) ** 2) / 2; + $grad = $y - $target; + + return [$cost, $grad]; + }; + + $optimizer = new ConjugateGradient(1); + // set very weak theta to trigger very bad result + $optimizer->setTheta([0.0000001, 0.0000001]); + + $theta = $optimizer->runOptimization($samples, $targets, $callback); + + self::assertEqualsWithDelta([-1.087708, 2.212034], $theta, 0.000001); + } + + public function testRunOptimization2Dim(): void + { + // 100 samples from y = -1 + 2x0 - 3x1 (i.e. theta = [-1, 2, -3]) + $samples = []; + $targets = []; + for ($i = 0; $i < 100; ++$i) { + $x0 = intval($i / 10) / 10; + $x1 = ($i % 10) / 10; + $samples[] = [$x0, $x1]; + $targets[] = -1 + 2 * $x0 - 3 * $x1; + } + + $callback = static function ($theta, $sample, $target): array { + $y = $theta[0] + $theta[1] * $sample[0] + $theta[2] * $sample[1]; + $cost = (($y - $target) ** 2) / 2; + $grad = $y - $target; + + return [$cost, $grad]; + }; + + $optimizer = new ConjugateGradient(2); + $optimizer->setChangeThreshold(1e-6); + + $theta = $optimizer->runOptimization($samples, $targets, $callback); + + self::assertEqualsWithDelta([-1, 2, -3], $theta, 0.1); + } + + public function testThrowExceptionOnInvalidTheta(): void + { + $opimizer = new ConjugateGradient(2); + + $this->expectException(InvalidArgumentException::class); + $opimizer->setTheta([0.15]); + } +} diff --git a/tests/Helper/Optimizer/GDTest.php b/tests/Helper/Optimizer/GDTest.php new file mode 100644 index 0000000..a6b4277 --- /dev/null +++ b/tests/Helper/Optimizer/GDTest.php @@ -0,0 +1,65 @@ +runOptimization($samples, $targets, $callback); + + self::assertEqualsWithDelta([-1, 2], $theta, 0.1); + } + + public function testRunOptimization2Dim(): void + { + // 100 samples from y = -1 + 2x0 - 3x1 (i.e. theta = [-1, 2, -3]) + $samples = []; + $targets = []; + for ($i = 0; $i < 100; ++$i) { + $x0 = intval($i / 10) / 10; + $x1 = ($i % 10) / 10; + $samples[] = [$x0, $x1]; + $targets[] = -1 + 2 * $x0 - 3 * $x1; + } + + $callback = static function ($theta, $sample, $target): array { + $y = $theta[0] + $theta[1] * $sample[0] + $theta[2] * $sample[1]; + $cost = (($y - $target) ** 2) / 2; + $grad = $y - $target; + + return [$cost, $grad]; + }; + + $optimizer = new GD(2); + $optimizer->setChangeThreshold(1e-6); + + $theta = $optimizer->runOptimization($samples, $targets, $callback); + + self::assertEqualsWithDelta([-1, 2, -3], $theta, 0.1); + } +} diff --git a/tests/Helper/Optimizer/OptimizerTest.php b/tests/Helper/Optimizer/OptimizerTest.php new file mode 100644 index 0000000..184f6c7 --- /dev/null +++ b/tests/Helper/Optimizer/OptimizerTest.php @@ -0,0 +1,32 @@ +expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Number of values in the weights array should be 3'); + /** @var Optimizer $optimizer */ + $optimizer = $this->getMockForAbstractClass(Optimizer::class, [3]); + + $optimizer->setTheta([]); + } + + public function testSetTheta(): void + { + /** @var Optimizer $optimizer */ + $optimizer = $this->getMockForAbstractClass(Optimizer::class, [2]); + $object = $optimizer->setTheta([0.3, 1]); + + self::assertSame($object, $optimizer); + self::assertSame([0.3, 1], $object->theta()); + } +} diff --git a/tests/Helper/Optimizer/StochasticGDTest.php b/tests/Helper/Optimizer/StochasticGDTest.php new file mode 100644 index 0000000..4f99f78 --- /dev/null +++ b/tests/Helper/Optimizer/StochasticGDTest.php @@ -0,0 +1,65 @@ +runOptimization($samples, $targets, $callback); + + self::assertEqualsWithDelta([-1, 2], $theta, 0.1); + } + + public function testRunOptimization2Dim(): void + { + // 100 samples from y = -1 + 2x0 - 3x1 (i.e. theta = [-1, 2, -3]) + $samples = []; + $targets = []; + for ($i = 0; $i < 100; ++$i) { + $x0 = intval($i / 10) / 10; + $x1 = ($i % 10) / 10; + $samples[] = [$x0, $x1]; + $targets[] = -1 + 2 * $x0 - 3 * $x1; + } + + $callback = static function ($theta, $sample, $target): array { + $y = $theta[0] + $theta[1] * $sample[0] + $theta[2] * $sample[1]; + $cost = (($y - $target) ** 2) / 2; + $grad = $y - $target; + + return [$cost, $grad]; + }; + + $optimizer = new StochasticGD(2); + $optimizer->setChangeThreshold(1e-6); + + $theta = $optimizer->runOptimization($samples, $targets, $callback); + + self::assertEqualsWithDelta([-1, 2, -3], $theta, 0.1); + } +} diff --git a/tests/Math/ComparisonTest.php b/tests/Math/ComparisonTest.php new file mode 100644 index 0000000..338055b --- /dev/null +++ b/tests/Math/ComparisonTest.php @@ -0,0 +1,74 @@ +expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Invalid operator "~=" provided'); + Comparison::compare(1, 1, '~='); + } + + public function provideData(): array + { + return [ + // Greater + [1, 0, '>', true], + [1, 1, '>', false], + [0, 1, '>', false], + // Greater or equal + [1, 0, '>=', true], + [1, 1, '>=', true], + [0, 1, '>=', false], + // Equal + [1, 0, '=', false], + [1, 1, '==', true], + [1, '1', '=', true], + [1, '0', '==', false], + // Identical + [1, 0, '===', false], + [1, 1, '===', true], + [1, '1', '===', false], + ['a', 'a', '===', true], + // Not equal + [1, 0, '!=', true], + [1, 1, '<>', false], + [1, '1', '!=', false], + [1, '0', '<>', true], + // Not identical + [1, 0, '!==', true], + [1, 1, '!==', false], + [1, '1', '!==', true], + [1, '0', '!==', true], + // Less or equal + [1, 0, '<=', false], + [1, 1, '<=', true], + [0, 1, '<=', true], + // Less + [1, 0, '<', false], + [1, 1, '<', false], + [0, 1, '<', true], + ]; + } +} diff --git a/tests/Phpml/Math/Distance/ChebyshevTest.php b/tests/Math/Distance/ChebyshevTest.php similarity index 50% rename from tests/Phpml/Math/Distance/ChebyshevTest.php rename to tests/Math/Distance/ChebyshevTest.php index 78fb2a0..a59edbf 100644 --- a/tests/Phpml/Math/Distance/ChebyshevTest.php +++ b/tests/Math/Distance/ChebyshevTest.php @@ -1,35 +1,34 @@ distanceMetric = new Chebyshev(); } - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnInvalidArguments() + public function testThrowExceptionOnInvalidArguments(): void { + $this->expectException(InvalidArgumentException::class); $a = [0, 1, 2]; $b = [0, 2]; - $this->distanceMetric->distance($a, $b); } - public function testCalculateDistanceForOneDimension() + public function testCalculateDistanceForOneDimension(): void { $a = [4]; $b = [2]; @@ -37,10 +36,10 @@ class ChebyshevTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } - public function testCalculateDistanceForTwoDimensions() + public function testCalculateDistanceForTwoDimensions(): void { $a = [4, 6]; $b = [2, 5]; @@ -48,10 +47,10 @@ class ChebyshevTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } - public function testCalculateDistanceForThreeDimensions() + public function testCalculateDistanceForThreeDimensions(): void { $a = [6, 10, 3]; $b = [2, 5, 5]; @@ -59,6 +58,6 @@ class ChebyshevTest extends \PHPUnit_Framework_TestCase $expectedDistance = 5; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } } diff --git a/tests/Phpml/Math/Distance/EuclideanTest.php b/tests/Math/Distance/EuclideanTest.php similarity index 51% rename from tests/Phpml/Math/Distance/EuclideanTest.php rename to tests/Math/Distance/EuclideanTest.php index a3dea3c..979a378 100644 --- a/tests/Phpml/Math/Distance/EuclideanTest.php +++ b/tests/Math/Distance/EuclideanTest.php @@ -1,35 +1,34 @@ distanceMetric = new Euclidean(); } - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnInvalidArguments() + public function testThrowExceptionOnInvalidArguments(): void { + $this->expectException(InvalidArgumentException::class); $a = [0, 1, 2]; $b = [0, 2]; - $this->distanceMetric->distance($a, $b); } - public function testCalculateDistanceForOneDimension() + public function testCalculateDistanceForOneDimension(): void { $a = [4]; $b = [2]; @@ -37,10 +36,10 @@ class EuclideanTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } - public function testCalculateDistanceForTwoDimensions() + public function testCalculateDistanceForTwoDimensions(): void { $a = [4, 6]; $b = [2, 5]; @@ -48,10 +47,10 @@ class EuclideanTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2.2360679774998; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } - public function testCalculateDistanceForThreeDimensions() + public function testCalculateDistanceForThreeDimensions(): void { $a = [6, 10, 3]; $b = [2, 5, 5]; @@ -59,6 +58,6 @@ class EuclideanTest extends \PHPUnit_Framework_TestCase $expectedDistance = 6.7082039324993694; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } } diff --git a/tests/Phpml/Math/Distance/ManhattanTest.php b/tests/Math/Distance/ManhattanTest.php similarity index 50% rename from tests/Phpml/Math/Distance/ManhattanTest.php rename to tests/Math/Distance/ManhattanTest.php index 7d0cf2d..c189f26 100644 --- a/tests/Phpml/Math/Distance/ManhattanTest.php +++ b/tests/Math/Distance/ManhattanTest.php @@ -1,35 +1,34 @@ distanceMetric = new Manhattan(); } - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnInvalidArguments() + public function testThrowExceptionOnInvalidArguments(): void { + $this->expectException(InvalidArgumentException::class); $a = [0, 1, 2]; $b = [0, 2]; - $this->distanceMetric->distance($a, $b); } - public function testCalculateDistanceForOneDimension() + public function testCalculateDistanceForOneDimension(): void { $a = [4]; $b = [2]; @@ -37,10 +36,10 @@ class ManhattanTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } - public function testCalculateDistanceForTwoDimensions() + public function testCalculateDistanceForTwoDimensions(): void { $a = [4, 6]; $b = [2, 5]; @@ -48,10 +47,10 @@ class ManhattanTest extends \PHPUnit_Framework_TestCase $expectedDistance = 3; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } - public function testCalculateDistanceForThreeDimensions() + public function testCalculateDistanceForThreeDimensions(): void { $a = [6, 10, 3]; $b = [2, 5, 5]; @@ -59,6 +58,6 @@ class ManhattanTest extends \PHPUnit_Framework_TestCase $expectedDistance = 11; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } } diff --git a/tests/Phpml/Math/Distance/MinkowskiTest.php b/tests/Math/Distance/MinkowskiTest.php similarity index 53% rename from tests/Phpml/Math/Distance/MinkowskiTest.php rename to tests/Math/Distance/MinkowskiTest.php index ad9318d..fbff7d9 100644 --- a/tests/Phpml/Math/Distance/MinkowskiTest.php +++ b/tests/Math/Distance/MinkowskiTest.php @@ -1,35 +1,34 @@ distanceMetric = new Minkowski(); } - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnInvalidArguments() + public function testThrowExceptionOnInvalidArguments(): void { + $this->expectException(InvalidArgumentException::class); $a = [0, 1, 2]; $b = [0, 2]; - $this->distanceMetric->distance($a, $b); } - public function testCalculateDistanceForOneDimension() + public function testCalculateDistanceForOneDimension(): void { $a = [4]; $b = [2]; @@ -37,10 +36,10 @@ class MinkowskiTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance); + self::assertEquals($expectedDistance, $actualDistance); } - public function testCalculateDistanceForTwoDimensions() + public function testCalculateDistanceForTwoDimensions(): void { $a = [4, 6]; $b = [2, 5]; @@ -48,10 +47,10 @@ class MinkowskiTest extends \PHPUnit_Framework_TestCase $expectedDistance = 2.080; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001); + self::assertEqualsWithDelta($expectedDistance, $actualDistance, $delta = 0.001); } - public function testCalculateDistanceForThreeDimensions() + public function testCalculateDistanceForThreeDimensions(): void { $a = [6, 10, 3]; $b = [2, 5, 5]; @@ -59,10 +58,10 @@ class MinkowskiTest extends \PHPUnit_Framework_TestCase $expectedDistance = 5.819; $actualDistance = $this->distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001); + self::assertEqualsWithDelta($expectedDistance, $actualDistance, $delta = 0.001); } - public function testCalculateDistanceForThreeDimensionsWithDifferentLambda() + public function testCalculateDistanceForThreeDimensionsWithDifferentLambda(): void { $distanceMetric = new Minkowski($lambda = 5); @@ -72,6 +71,6 @@ class MinkowskiTest extends \PHPUnit_Framework_TestCase $expectedDistance = 5.300; $actualDistance = $distanceMetric->distance($a, $b); - $this->assertEquals($expectedDistance, $actualDistance, '', $delta = 0.001); + self::assertEqualsWithDelta($expectedDistance, $actualDistance, $delta = 0.001); } } diff --git a/tests/Math/Kernel/RBFTest.php b/tests/Math/Kernel/RBFTest.php new file mode 100644 index 0000000..3e4ce26 --- /dev/null +++ b/tests/Math/Kernel/RBFTest.php @@ -0,0 +1,35 @@ +compute([1, 2], [1, 2])); + self::assertEqualsWithDelta(0.97336, $rbf->compute([1, 2, 3], [4, 5, 6]), $delta = 0.0001); + self::assertEqualsWithDelta(0.00011, $rbf->compute([4, 5], [1, 100]), $delta = 0.0001); + + $rbf = new RBF($gamma = 0.2); + + self::assertEquals(1, $rbf->compute([1, 2], [1, 2])); + self::assertEqualsWithDelta(0.00451, $rbf->compute([1, 2, 3], [4, 5, 6]), $delta = 0.0001); + self::assertEquals(0, $rbf->compute([4, 5], [1, 100])); + } + + public function testThrowExceptionWhenComputeArgumentIsNotAnArray(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Arguments of Phpml\\Math\\Kernel\\RBF::compute must be arrays'); + + (new RBF(0.1))->compute([0], 1.0); + } +} diff --git a/tests/Math/LinearAlgebra/EigenvalueDecompositionTest.php b/tests/Math/LinearAlgebra/EigenvalueDecompositionTest.php new file mode 100644 index 0000000..884da25 --- /dev/null +++ b/tests/Math/LinearAlgebra/EigenvalueDecompositionTest.php @@ -0,0 +1,104 @@ +getRealEigenvalues(), 0.001); + self::assertEqualsWithDelta([ + [-0.735178656, 0.677873399], + [-0.677873399, -0.735178656], + ], $decomp->getEigenvectors(), 0.001); + } + + public function testMatrixWithAllZeroRow(): void + { + // http://www.wolframalpha.com/widgets/view.jsp?id=9aa01caf50c9307e9dabe159c9068c41 + $matrix = [ + [10, 0, 0], + [0, 6, 0], + [0, 0, 0], + ]; + + $decomp = new EigenvalueDecomposition($matrix); + + self::assertEqualsWithDelta([0.0, 6.0, 10.0], $decomp->getRealEigenvalues(), 0.0001); + self::assertEqualsWithDelta([ + [0, 0, 1], + [0, 1, 0], + [1, 0, 0], + ], $decomp->getEigenvectors(), 0.0001); + } + + public function testMatrixThatCauseErrorWithStrictComparision(): void + { + // http://www.wolframalpha.com/widgets/view.jsp?id=9aa01caf50c9307e9dabe159c9068c41 + $matrix = [ + [1, 0, 3], + [0, 1, 7], + [3, 7, 4], + ]; + + $decomp = new EigenvalueDecomposition($matrix); + + self::assertEqualsWithDelta([-5.2620873481, 1.0, 10.2620873481], $decomp->getRealEigenvalues(), 0.000001); + self::assertEqualsWithDelta([ + [-0.3042688, -0.709960552, 0.63511928], + [-0.9191450, 0.393919298, 0.0], + [0.25018574, 0.5837667, 0.7724140], + ], $decomp->getEigenvectors(), 0.0001); + } + + public function testRandomSymmetricMatrixEigenPairs(): void + { + // Acceptable error + $epsilon = 0.001; + // Secondly, generate a symmetric square matrix + // and test for A.v=λ.v + // (We, for now, omit non-symmetric matrices whose eigenvalues can be complex numbers) + $len = 3; + srand((int) microtime(true) * 1000); + $A = array_fill(0, $len, array_fill(0, $len, 0.0)); + for ($i = 0; $i < $len; ++$i) { + for ($k = 0; $k < $len; ++$k) { + if ($i > $k) { + $A[$i][$k] = $A[$k][$i]; + } else { + $A[$i][$k] = random_int(0, 10); + } + } + } + + $decomp = new EigenvalueDecomposition($A); + $eigValues = $decomp->getRealEigenvalues(); + $eigVectors = $decomp->getEigenvectors(); + + foreach ($eigValues as $index => $lambda) { + $m1 = new Matrix($A); + $m2 = (new Matrix($eigVectors[$index]))->transpose(); + + // A.v=λ.v + $leftSide = $m1->multiply($m2)->toArray(); + $rightSide = $m2->multiplyByScalar($lambda)->toArray(); + + self::assertEqualsWithDelta($leftSide, $rightSide, $epsilon); + } + } +} diff --git a/tests/Math/LinearAlgebra/LUDecompositionTest.php b/tests/Math/LinearAlgebra/LUDecompositionTest.php new file mode 100644 index 0000000..bd81e2e --- /dev/null +++ b/tests/Math/LinearAlgebra/LUDecompositionTest.php @@ -0,0 +1,47 @@ +expectException(MatrixException::class); + + new LUDecomposition(new Matrix([1, 2, 3, 4, 5])); + } + + public function testSolveWithInvalidMatrix(): void + { + $this->expectException(MatrixException::class); + + $lu = new LUDecomposition(new Matrix([[1, 2], [3, 4]])); + $lu->solve(new Matrix([1, 2, 3])); + } + + public function testLowerTriangularFactor(): void + { + $lu = new LUDecomposition(new Matrix([[1, 2], [3, 4]])); + $L = $lu->getL(); + + self::assertSame([[1.0, 0.0], [0.3333333333333333, 1.0]], $L->toArray()); + } + + public function testUpperTriangularFactor(): void + { + $lu = new LUDecomposition(new Matrix([[1, 2], [3, 4]])); + $U = $lu->getU(); + + self::assertSame([[3.0, 4.0], [0.0, 0.6666666666666667]], $U->toArray()); + } +} diff --git a/tests/Math/MatrixTest.php b/tests/Math/MatrixTest.php new file mode 100644 index 0000000..4c11caf --- /dev/null +++ b/tests/Math/MatrixTest.php @@ -0,0 +1,302 @@ +expectException(InvalidArgumentException::class); + new Matrix([[1, 2], [3]]); + } + + public function testCreateMatrixFromFlatArray(): void + { + $flatArray = [1, 2, 3, 4]; + $matrix = Matrix::fromFlatArray($flatArray); + + self::assertEquals([[1], [2], [3], [4]], $matrix->toArray()); + self::assertEquals(4, $matrix->getRows()); + self::assertEquals(1, $matrix->getColumns()); + self::assertEquals($flatArray, $matrix->getColumnValues(0)); + } + + public function testThrowExceptionOnInvalidColumnNumber(): void + { + $this->expectException(MatrixException::class); + $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix->getColumnValues(4); + } + + public function testThrowExceptionOnGetDeterminantIfArrayIsNotSquare(): void + { + $this->expectException(MatrixException::class); + $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix->getDeterminant(); + } + + public function testGetMatrixDeterminant(): void + { + //http://matrix.reshish.com/determinant.php + $matrix = new Matrix([ + [3, 3, 3], + [4, 2, 1], + [5, 6, 7], + ]); + self::assertEquals(-3, $matrix->getDeterminant()); + + $matrix = new Matrix([ + [1, 2, 3, 3, 2, 1], + [1 / 2, 5, 6, 7, 1, 1], + [3 / 2, 7 / 2, 2, 0, 6, 8], + [1, 8, 10, 1, 2, 2], + [1 / 4, 4, 1, 0, 2, 3 / 7], + [1, 8, 7, 5, 4, 4 / 5], + ]); + self::assertEqualsWithDelta(1116.5035, $matrix->getDeterminant(), $delta = 0.0001); + } + + public function testMatrixTranspose(): void + { + $matrix = new Matrix([ + [3, 3, 3], + [4, 2, 1], + [5, 6, 7], + ]); + + $transposedMatrix = [ + [3, 4, 5], + [3, 2, 6], + [3, 1, 7], + ]; + + self::assertEquals($transposedMatrix, $matrix->transpose()->toArray()); + } + + public function testThrowExceptionOnMultiplyWhenInconsistentMatrixSupplied(): void + { + $this->expectException(InvalidArgumentException::class); + $matrix1 = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix2 = new Matrix([[3, 2, 1], [6, 5, 4]]); + $matrix1->multiply($matrix2); + } + + public function testMatrixMultiplyByMatrix(): void + { + $matrix1 = new Matrix([ + [1, 2, 3], + [4, 5, 6], + ]); + + $matrix2 = new Matrix([ + [7, 8], + [9, 10], + [11, 12], + ]); + + $product = [ + [58, 64], + [139, 154], + ]; + + self::assertEquals($product, $matrix1->multiply($matrix2)->toArray()); + } + + public function testDivideByScalar(): void + { + $matrix = new Matrix([ + [4, 6, 8], + [2, 10, 20], + ]); + + $quotient = [ + [2, 3, 4], + [1, 5, 10], + ]; + + self::assertEquals($quotient, $matrix->divideByScalar(2)->toArray()); + } + + public function testThrowExceptionWhenInverseIfArrayIsNotSquare(): void + { + $this->expectException(MatrixException::class); + $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); + $matrix->inverse(); + } + + public function testThrowExceptionWhenInverseIfMatrixIsSingular(): void + { + $this->expectException(MatrixException::class); + $matrix = new Matrix([ + [0, 0, 0], + [0, 0, 0], + [0, 0, 0], + ]); + $matrix->inverse(); + } + + public function testInverseMatrix(): void + { + //http://ncalculators.com/matrix/inverse-matrix.htm + $matrix = new Matrix([ + [3, 4, 2], + [4, 5, 5], + [1, 1, 1], + ]); + + $inverseMatrix = [ + [0, -1, 5], + [1 / 2, 1 / 2, -7 / 2], + [-1 / 2, 1 / 2, -1 / 2], + ]; + + self::assertEqualsWithDelta($inverseMatrix, $matrix->inverse()->toArray(), $delta = 0.0001); + } + + public function testCrossOutMatrix(): void + { + $matrix = new Matrix([ + [3, 4, 2], + [4, 5, 5], + [1, 1, 1], + ]); + + $crossOuted = [ + [3, 2], + [1, 1], + ]; + + self::assertEquals($crossOuted, $matrix->crossOut(1, 1)->toArray()); + } + + public function testToScalar(): void + { + $matrix = new Matrix([[1, 2, 3], [3, 2, 3]]); + + self::assertEquals($matrix->toScalar(), 1); + } + + public function testMultiplyByScalar(): void + { + $matrix = new Matrix([ + [4, 6, 8], + [2, 10, 20], + ]); + + $result = [ + [-8, -12, -16], + [-4, -20, -40], + ]; + + self::assertEquals($result, $matrix->multiplyByScalar(-2)->toArray()); + } + + public function testAdd(): void + { + $array1 = [1, 1, 1]; + $array2 = [2, 2, 2]; + $result = [3, 3, 3]; + + $m1 = new Matrix($array1); + $m2 = new Matrix($array2); + + self::assertEquals($result, $m1->add($m2)->toArray()[0]); + } + + public function testSubtract(): void + { + $array1 = [1, 1, 1]; + $array2 = [2, 2, 2]; + $result = [-1, -1, -1]; + + $m1 = new Matrix($array1); + $m2 = new Matrix($array2); + + self::assertEquals($result, $m1->subtract($m2)->toArray()[0]); + } + + public function testTransposeArray(): void + { + $array = [ + [1, 1, 1], + [2, 2, 2], + ]; + $transposed = [ + [1, 2], + [1, 2], + [1, 2], + ]; + + self::assertEquals($transposed, Matrix::transposeArray($array)); + } + + public function testDot(): void + { + $vect1 = [2, 2, 2]; + $vect2 = [3, 3, 3]; + $dot = [18]; + + self::assertEquals($dot, Matrix::dot($vect1, $vect2)); + + $matrix1 = [[1, 1], [2, 2]]; + $matrix2 = [[3, 3], [3, 3], [3, 3]]; + $dot = [6, 12]; + self::assertEquals($dot, Matrix::dot($matrix2, $matrix1)); + } + + /** + * @dataProvider dataProviderForFrobeniusNorm + */ + public function testFrobeniusNorm(array $matrix, float $norm): void + { + self::assertEqualsWithDelta($norm, (new Matrix($matrix))->frobeniusNorm(), 0.0001); + } + + public function dataProviderForFrobeniusNorm(): array + { + return [ + [ + [ + [1, -7], + [2, 3], + ], 7.93725, + ], + [ + [ + [1, 2, 3], + [2, 3, 4], + [3, 4, 5], + ], 9.643651, + ], + [ + [ + [1, 5, 3, 9], + [2, 3, 4, 12], + [4, 2, 5, 11], + ], 21.330729, + ], + [ + [ + [1, 5, 3], + [2, 3, 4], + [4, 2, 5], + [6, 6, 3], + ], 13.784049, + ], + [ + [ + [5, -4, 2], + [-1, 2, 3], + [-2, 1, 0], + ], 8, + ], + ]; + } +} diff --git a/tests/Math/ProductTest.php b/tests/Math/ProductTest.php new file mode 100644 index 0000000..4eaff58 --- /dev/null +++ b/tests/Math/ProductTest.php @@ -0,0 +1,22 @@ +cardinality()); + } + + public function testIntersection(): void + { + $intersection = Set::intersection(new Set(['C', 'A']), new Set(['B', 'C'])); + + self::assertEquals(new Set(['C']), $intersection); + self::assertEquals(1, $intersection->cardinality()); + } + + public function testDifference(): void + { + $difference = Set::difference(new Set(['C', 'A', 'B']), new Set(['A'])); + + self::assertEquals(new Set(['B', 'C']), $difference); + self::assertEquals(2, $difference->cardinality()); + } + + public function testPower(): void + { + $power = Set::power(new Set(['A', 'B'])); + + self::assertEquals([new Set(), new Set(['A']), new Set(['B']), new Set(['A', 'B'])], $power); + self::assertCount(4, $power); + } + + public function testCartesian(): void + { + $cartesian = Set::cartesian(new Set(['A']), new Set([1, 2])); + + self::assertEquals([new Set(['A', 1]), new Set(['A', 2])], $cartesian); + self::assertCount(2, $cartesian); + } + + public function testContains(): void + { + $set = new Set(['B', 'A', 2, 1]); + + self::assertTrue($set->contains('B')); + self::assertTrue($set->containsAll(['A', 'B'])); + + self::assertFalse($set->contains('C')); + self::assertFalse($set->containsAll(['A', 'B', 'C'])); + } + + public function testRemove(): void + { + $set = new Set(['B', 'A', 2, 1]); + + self::assertEquals((new Set([1, 2, 2, 2, 'B']))->toArray(), $set->remove('A')->toArray()); + } + + public function testAdd(): void + { + $set = new Set(['B', 'A', 2, 1]); + $set->addAll(['foo', 'bar']); + self::assertEquals(6, $set->cardinality()); + } + + public function testEmpty(): void + { + $set = new Set([1, 2]); + $set->removeAll([2, 1]); + self::assertEquals(new Set(), $set); + self::assertTrue($set->isEmpty()); + } + + public function testToArray(): void + { + $set = new Set([1, 2, 2, 3, 'A', false, '', 1.1, -1, -10, 'B']); + + self::assertEquals([-10, '', -1, 'A', 'B', 1, 1.1, 2, 3], $set->toArray()); + } +} diff --git a/tests/Math/Statistic/ANOVATest.php b/tests/Math/Statistic/ANOVATest.php new file mode 100644 index 0000000..acc79d9 --- /dev/null +++ b/tests/Math/Statistic/ANOVATest.php @@ -0,0 +1,44 @@ +expectException(InvalidArgumentException::class); + $samples = [ + [[1, 2, 1], [1, 3, 4], [5, 2, 1]], + ]; + + ANOVA::oneWayF($samples); + } +} diff --git a/tests/Phpml/Math/Statistic/CorrelationTest.php b/tests/Math/Statistic/CorrelationTest.php similarity index 57% rename from tests/Phpml/Math/Statistic/CorrelationTest.php rename to tests/Math/Statistic/CorrelationTest.php index 948dc16..98b7274 100644 --- a/tests/Phpml/Math/Statistic/CorrelationTest.php +++ b/tests/Math/Statistic/CorrelationTest.php @@ -1,38 +1,38 @@ assertEquals(-0.641, Correlation::pearson($x, $y), '', $delta); + self::assertEqualsWithDelta(-0.641, Correlation::pearson($x, $y), $delta); //http://www.statisticshowto.com/how-to-compute-pearsons-correlation-coefficients/ $delta = 0.001; $x = [43, 21, 25, 42, 57, 59]; $y = [99, 65, 79, 75, 87, 82]; - $this->assertEquals(0.549, Correlation::pearson($x, $y), '', $delta); + self::assertEqualsWithDelta(0.549, Correlation::pearson($x, $y), $delta); $delta = 0.001; $x = [60, 61, 62, 63, 65]; $y = [3.1, 3.6, 3.8, 4, 4.1]; - $this->assertEquals(0.911, Correlation::pearson($x, $y), '', $delta); + self::assertEqualsWithDelta(0.911, Correlation::pearson($x, $y), $delta); } - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnInvalidArgumentsForPearsonCorrelation() + public function testThrowExceptionOnInvalidArgumentsForPearsonCorrelation(): void { + $this->expectException(InvalidArgumentException::class); Correlation::pearson([1, 2, 4], [3, 5]); } } diff --git a/tests/Math/Statistic/CovarianceTest.php b/tests/Math/Statistic/CovarianceTest.php new file mode 100644 index 0000000..fe792c4 --- /dev/null +++ b/tests/Math/Statistic/CovarianceTest.php @@ -0,0 +1,105 @@ +expectException(InvalidArgumentException::class); + Covariance::fromXYArrays([], [1, 2, 3]); + } + + public function testThrowExceptionOnEmptyY(): void + { + $this->expectException(InvalidArgumentException::class); + Covariance::fromXYArrays([1, 2, 3], []); + } + + public function testThrowExceptionOnTooSmallArrayIfSample(): void + { + $this->expectException(InvalidArgumentException::class); + Covariance::fromXYArrays([1], [2], true); + } + + public function testThrowExceptionIfEmptyDataset(): void + { + $this->expectException(InvalidArgumentException::class); + Covariance::fromDataset([], 0, 1); + } + + public function testThrowExceptionOnTooSmallDatasetIfSample(): void + { + $this->expectException(InvalidArgumentException::class); + Covariance::fromDataset([1], 0, 1); + } + + public function testThrowExceptionWhenKIndexIsOutOfBound(): void + { + $this->expectException(InvalidArgumentException::class); + Covariance::fromDataset([1, 2, 3], 2, 5); + } + + public function testThrowExceptionWhenIIndexIsOutOfBound(): void + { + $this->expectException(InvalidArgumentException::class); + Covariance::fromDataset([1, 2, 3], 5, 2); + } +} diff --git a/tests/Math/Statistic/GaussianTest.php b/tests/Math/Statistic/GaussianTest.php new file mode 100644 index 0000000..16b1c5f --- /dev/null +++ b/tests/Math/Statistic/GaussianTest.php @@ -0,0 +1,28 @@ + $v) { + self::assertEqualsWithDelta($pdf[$i], $g->pdf($v), $delta); + + self::assertEqualsWithDelta($pdf[$i], Gaussian::distributionPdf($mean, $std, $v), $delta); + } + } +} diff --git a/tests/Math/Statistic/MeanTest.php b/tests/Math/Statistic/MeanTest.php new file mode 100644 index 0000000..640225d --- /dev/null +++ b/tests/Math/Statistic/MeanTest.php @@ -0,0 +1,59 @@ +expectException(InvalidArgumentException::class); + Mean::arithmetic([]); + } + + public function testArithmeticMean(): void + { + $delta = 0.01; + self::assertEqualsWithDelta(3.5, Mean::arithmetic([2, 5]), $delta); + self::assertEqualsWithDelta(41.16, Mean::arithmetic([43, 21, 25, 42, 57, 59]), $delta); + self::assertEqualsWithDelta(1.7, Mean::arithmetic([0.5, 0.5, 1.5, 2.5, 3.5]), $delta); + } + + public function testMedianThrowExceptionOnEmptyArray(): void + { + $this->expectException(InvalidArgumentException::class); + Mean::median([]); + } + + public function testMedianOnOddLengthArray(): void + { + $numbers = [5, 2, 6, 1, 3]; + + self::assertEquals(3, Mean::median($numbers)); + } + + public function testMedianOnEvenLengthArray(): void + { + $numbers = [5, 2, 6, 1, 3, 4]; + + self::assertEquals(3.5, Mean::median($numbers)); + } + + public function testModeThrowExceptionOnEmptyArray(): void + { + $this->expectException(InvalidArgumentException::class); + Mean::mode([]); + } + + public function testModeOnArray(): void + { + $numbers = [5, 2, 6, 1, 3, 4, 6, 6, 5]; + + self::assertEquals(6, Mean::mode($numbers)); + } +} diff --git a/tests/Math/Statistic/StandardDeviationTest.php b/tests/Math/Statistic/StandardDeviationTest.php new file mode 100644 index 0000000..7f4b435 --- /dev/null +++ b/tests/Math/Statistic/StandardDeviationTest.php @@ -0,0 +1,65 @@ +expectException(InvalidArgumentException::class); + StandardDeviation::population([], false); + } + + public function testThrowExceptionOnTooSmallArray(): void + { + $this->expectException(InvalidArgumentException::class); + StandardDeviation::population([1]); + } + + /** + * @dataProvider dataProviderForSumOfSquaresDeviations + */ + public function testSumOfSquares(array $numbers, float $sum): void + { + self::assertEqualsWithDelta($sum, StandardDeviation::sumOfSquares($numbers), 0.0001); + } + + public function dataProviderForSumOfSquaresDeviations(): array + { + return [ + [[3, 6, 7, 11, 12, 13, 17], 136.8571], + [[6, 11, 12, 14, 15, 20, 21], 162.8571], + [[1, 2, 3, 6, 7, 11, 12], 112], + [[1, 2, 3, 4, 5, 6, 7, 8, 9, 0], 82.5], + [[34, 253, 754, 2342, 75, 23, 876, 4, 1, -34, -345, 754, -377, 3, 0], 6453975.7333], + ]; + } + + public function testThrowExceptionOnEmptyArraySumOfSquares(): void + { + $this->expectException(InvalidArgumentException::class); + StandardDeviation::sumOfSquares([]); + } +} diff --git a/tests/Math/Statistic/VarianceTest.php b/tests/Math/Statistic/VarianceTest.php new file mode 100644 index 0000000..2cda011 --- /dev/null +++ b/tests/Math/Statistic/VarianceTest.php @@ -0,0 +1,34 @@ +expectException(InvalidArgumentException::class); + $actualLabels = ['a', 'b', 'a', 'b']; + $predictedLabels = ['a', 'a']; + Accuracy::score($actualLabels, $predictedLabels); + } + + public function testCalculateNormalizedScore(): void + { + $actualLabels = ['a', 'b', 'a', 'b']; + $predictedLabels = ['a', 'a', 'b', 'b']; + + self::assertEquals(0.5, Accuracy::score($actualLabels, $predictedLabels)); + } + + public function testCalculateNotNormalizedScore(): void + { + $actualLabels = ['a', 'b', 'a', 'b']; + $predictedLabels = ['a', 'b', 'b', 'b']; + + self::assertEquals(3, Accuracy::score($actualLabels, $predictedLabels, false)); + } + + public function testAccuracyOnDemoDataset(): void + { + $dataset = new RandomSplit(new IrisDataset(), 0.5, 123); + + $classifier = new SVC(Kernel::RBF); + $classifier->train($dataset->getTrainSamples(), $dataset->getTrainLabels()); + + $predicted = (array) $classifier->predict($dataset->getTestSamples()); + + $accuracy = Accuracy::score($dataset->getTestLabels(), $predicted); + + $expected = PHP_VERSION_ID >= 70100 ? 1 : 0.959; + + self::assertEqualsWithDelta($expected, $accuracy, 0.01); + } +} diff --git a/tests/Metric/ClassificationReportTest.php b/tests/Metric/ClassificationReportTest.php new file mode 100644 index 0000000..3feccc8 --- /dev/null +++ b/tests/Metric/ClassificationReportTest.php @@ -0,0 +1,206 @@ + 0.5, + 'ant' => 0.0, + 'bird' => 1.0, + ]; + $recall = [ + 'cat' => 1.0, + 'ant' => 0.0, + 'bird' => 0.67, + ]; + $f1score = [ + 'cat' => 0.67, + 'ant' => 0.0, + 'bird' => 0.80, + ]; + $support = [ + 'cat' => 1, + 'ant' => 1, + 'bird' => 3, + ]; + + // ClassificationReport uses macro-averaging as default + $average = [ + 'precision' => 0.5, // (1/2 + 0 + 1) / 3 = 1/2 + 'recall' => 0.56, // (1 + 0 + 2/3) / 3 = 5/9 + 'f1score' => 0.49, // (2/3 + 0 + 4/5) / 3 = 22/45 + ]; + + self::assertEqualsWithDelta($precision, $report->getPrecision(), 0.01); + self::assertEqualsWithDelta($recall, $report->getRecall(), 0.01); + self::assertEqualsWithDelta($f1score, $report->getF1score(), 0.01); + self::assertEqualsWithDelta($support, $report->getSupport(), 0.01); + self::assertEqualsWithDelta($average, $report->getAverage(), 0.01); + } + + public function testClassificationReportGenerateWithNumericLabels(): void + { + $labels = [0, 1, 2, 2, 2]; + $predicted = [0, 0, 2, 2, 1]; + + $report = new ClassificationReport($labels, $predicted); + + $precision = [ + 0 => 0.5, + 1 => 0.0, + 2 => 1.0, + ]; + $recall = [ + 0 => 1.0, + 1 => 0.0, + 2 => 0.67, + ]; + $f1score = [ + 0 => 0.67, + 1 => 0.0, + 2 => 0.80, + ]; + $support = [ + 0 => 1, + 1 => 1, + 2 => 3, + ]; + $average = [ + 'precision' => 0.5, + 'recall' => 0.56, + 'f1score' => 0.49, + ]; + + self::assertEqualsWithDelta($precision, $report->getPrecision(), 0.01); + self::assertEqualsWithDelta($recall, $report->getRecall(), 0.01); + self::assertEqualsWithDelta($f1score, $report->getF1score(), 0.01); + self::assertEqualsWithDelta($support, $report->getSupport(), 0.01); + self::assertEqualsWithDelta($average, $report->getAverage(), 0.01); + } + + public function testClassificationReportAverageOutOfRange(): void + { + $labels = ['cat', 'ant', 'bird', 'bird', 'bird']; + $predicted = ['cat', 'cat', 'bird', 'bird', 'ant']; + + $this->expectException(InvalidArgumentException::class); + new ClassificationReport($labels, $predicted, 0); + } + + public function testClassificationReportMicroAverage(): void + { + $labels = ['cat', 'ant', 'bird', 'bird', 'bird']; + $predicted = ['cat', 'cat', 'bird', 'bird', 'ant']; + + $report = new ClassificationReport($labels, $predicted, ClassificationReport::MICRO_AVERAGE); + + $average = [ + 'precision' => 0.6, // TP / (TP + FP) = (1 + 0 + 2) / (2 + 1 + 2) = 3/5 + 'recall' => 0.6, // TP / (TP + FN) = (1 + 0 + 2) / (1 + 1 + 3) = 3/5 + 'f1score' => 0.6, // Harmonic mean of precision and recall + ]; + + self::assertEqualsWithDelta($average, $report->getAverage(), 0.01); + } + + public function testClassificationReportMacroAverage(): void + { + $labels = ['cat', 'ant', 'bird', 'bird', 'bird']; + $predicted = ['cat', 'cat', 'bird', 'bird', 'ant']; + + $report = new ClassificationReport($labels, $predicted, ClassificationReport::MACRO_AVERAGE); + + $average = [ + 'precision' => 0.5, // (1/2 + 0 + 1) / 3 = 1/2 + 'recall' => 0.56, // (1 + 0 + 2/3) / 3 = 5/9 + 'f1score' => 0.49, // (2/3 + 0 + 4/5) / 3 = 22/45 + ]; + + self::assertEqualsWithDelta($average, $report->getAverage(), 0.01); + } + + public function testClassificationReportWeightedAverage(): void + { + $labels = ['cat', 'ant', 'bird', 'bird', 'bird']; + $predicted = ['cat', 'cat', 'bird', 'bird', 'ant']; + + $report = new ClassificationReport($labels, $predicted, ClassificationReport::WEIGHTED_AVERAGE); + + $average = [ + 'precision' => 0.7, // (1/2 * 1 + 0 * 1 + 1 * 3) / 5 = 7/10 + 'recall' => 0.6, // (1 * 1 + 0 * 1 + 2/3 * 3) / 5 = 3/5 + 'f1score' => 0.61, // (2/3 * 1 + 0 * 1 + 4/5 * 3) / 5 = 46/75 + ]; + + self::assertEqualsWithDelta($average, $report->getAverage(), 0.01); + } + + public function testPreventDivideByZeroWhenTruePositiveAndFalsePositiveSumEqualsZero(): void + { + $labels = [1, 2]; + $predicted = [2, 2]; + + $report = new ClassificationReport($labels, $predicted); + + self::assertEqualsWithDelta([ + 1 => 0.0, + 2 => 0.5, + ], $report->getPrecision(), 0.01); + } + + public function testPreventDivideByZeroWhenTruePositiveAndFalseNegativeSumEqualsZero(): void + { + $labels = [2, 2, 1]; + $predicted = [2, 2, 3]; + + $report = new ClassificationReport($labels, $predicted); + + self::assertEqualsWithDelta([ + 1 => 0.0, + 2 => 1, + 3 => 0, + ], $report->getPrecision(), 0.01); + } + + public function testPreventDividedByZeroWhenPredictedLabelsAllNotMatch(): void + { + $labels = [1, 2, 3, 4, 5]; + $predicted = [2, 3, 4, 5, 6]; + + $report = new ClassificationReport($labels, $predicted); + + self::assertEqualsWithDelta([ + 'precision' => 0, + 'recall' => 0, + 'f1score' => 0, + ], $report->getAverage(), 0.01); + } + + public function testPreventDividedByZeroWhenLabelsAreEmpty(): void + { + $labels = []; + $predicted = []; + + $report = new ClassificationReport($labels, $predicted); + + self::assertEqualsWithDelta([ + 'precision' => 0, + 'recall' => 0, + 'f1score' => 0, + ], $report->getAverage(), 0.01); + } +} diff --git a/tests/Metric/ConfusionMatrixTest.php b/tests/Metric/ConfusionMatrixTest.php new file mode 100644 index 0000000..36518a3 --- /dev/null +++ b/tests/Metric/ConfusionMatrixTest.php @@ -0,0 +1,62 @@ +saveToFile($estimator, $filepath); + + $restored = $modelManager->restoreFromFile($filepath); + self::assertEquals($estimator, $restored); + } + + public function testRestoreWrongFile(): void + { + $this->expectException(FileException::class); + $filepath = sys_get_temp_dir().DIRECTORY_SEPARATOR.'unexisting'; + $modelManager = new ModelManager(); + $modelManager->restoreFromFile($filepath); + } +} diff --git a/tests/NeuralNetwork/ActivationFunction/BinaryStepTest.php b/tests/NeuralNetwork/ActivationFunction/BinaryStepTest.php new file mode 100644 index 0000000..699c708 --- /dev/null +++ b/tests/NeuralNetwork/ActivationFunction/BinaryStepTest.php @@ -0,0 +1,53 @@ +compute($value)); + } + + public function binaryStepProvider(): array + { + return [ + [1, 1], + [1, 0], + [0, -0.1], + ]; + } + + /** + * @dataProvider binaryStepDerivativeProvider + * + * @param float|int $value + */ + public function testBinaryStepDerivative(float $expected, $value): void + { + $binaryStep = new BinaryStep(); + $activatedValue = $binaryStep->compute($value); + self::assertEquals($expected, $binaryStep->differentiate($value, $activatedValue)); + } + + public function binaryStepDerivativeProvider(): array + { + return [ + [0, -1], + [1, 0], + [0, 1], + ]; + } +} diff --git a/tests/NeuralNetwork/ActivationFunction/GaussianTest.php b/tests/NeuralNetwork/ActivationFunction/GaussianTest.php new file mode 100644 index 0000000..c44ae0f --- /dev/null +++ b/tests/NeuralNetwork/ActivationFunction/GaussianTest.php @@ -0,0 +1,59 @@ +compute($value), 0.001); + } + + public function gaussianProvider(): array + { + return [ + [0.367, 1], + [1, 0], + [0.367, -1], + [0, 3], + [0, -3], + ]; + } + + /** + * @dataProvider gaussianDerivativeProvider + * + * @param float|int $value + */ + public function testGaussianDerivative(float $expected, $value): void + { + $gaussian = new Gaussian(); + $activatedValue = $gaussian->compute($value); + self::assertEqualsWithDelta($expected, $gaussian->differentiate($value, $activatedValue), 0.001); + } + + public function gaussianDerivativeProvider(): array + { + return [ + [0, -5], + [0.735, -1], + [0.779, -0.5], + [0, 0], + [-0.779, 0.5], + [-0.735, 1], + [0, 5], + ]; + } +} diff --git a/tests/NeuralNetwork/ActivationFunction/HyperboliTangentTest.php b/tests/NeuralNetwork/ActivationFunction/HyperboliTangentTest.php new file mode 100644 index 0000000..8865c59 --- /dev/null +++ b/tests/NeuralNetwork/ActivationFunction/HyperboliTangentTest.php @@ -0,0 +1,61 @@ +compute($value), 0.001); + } + + public function tanhProvider(): array + { + return [ + [1.0, 0.761, 1], + [1.0, 0, 0], + [1.0, 1, 4], + [1.0, -1, -4], + [0.5, 0.462, 1], + [0.3, 0, 0], + ]; + } + + /** + * @dataProvider tanhDerivativeProvider + * + * @param float|int $value + */ + public function testHyperbolicTangentDerivative(float $beta, float $expected, $value): void + { + $tanh = new HyperbolicTangent($beta); + $activatedValue = $tanh->compute($value); + self::assertEqualsWithDelta($expected, $tanh->differentiate($value, $activatedValue), 0.001); + } + + public function tanhDerivativeProvider(): array + { + return [ + [1.0, 0, -6], + [1.0, 0.419, -1], + [1.0, 1, 0], + [1.0, 0.419, 1], + [1.0, 0, 6], + [0.5, 0.786, 1], + [0.5, 0.786, -1], + [0.3, 1, 0], + ]; + } +} diff --git a/tests/NeuralNetwork/ActivationFunction/PReLUTest.php b/tests/NeuralNetwork/ActivationFunction/PReLUTest.php new file mode 100644 index 0000000..4aa069b --- /dev/null +++ b/tests/NeuralNetwork/ActivationFunction/PReLUTest.php @@ -0,0 +1,59 @@ +compute($value), 0.001); + } + + public function preluProvider(): array + { + return [ + [0.01, 0.367, 0.367], + [0.0, 1, 1], + [0.3, -0.3, -1], + [0.9, 3, 3], + [0.02, -0.06, -3], + ]; + } + + /** + * @dataProvider preluDerivativeProvider + * + * @param float|int $value + */ + public function testPReLUDerivative(float $beta, float $expected, $value): void + { + $prelu = new PReLU($beta); + $activatedValue = $prelu->compute($value); + self::assertEquals($expected, $prelu->differentiate($value, $activatedValue)); + } + + public function preluDerivativeProvider(): array + { + return [ + [0.5, 0.5, -3], + [0.5, 1, 0], + [0.5, 1, 1], + [0.01, 1, 1], + [1, 1, 1], + [0.3, 1, 0.1], + [0.1, 0.1, -0.1], + ]; + } +} diff --git a/tests/NeuralNetwork/ActivationFunction/SigmoidTest.php b/tests/NeuralNetwork/ActivationFunction/SigmoidTest.php new file mode 100644 index 0000000..d0cf22b --- /dev/null +++ b/tests/NeuralNetwork/ActivationFunction/SigmoidTest.php @@ -0,0 +1,61 @@ +compute($value), 0.001); + } + + public function sigmoidProvider(): array + { + return [ + [1.0, 1, 7.25], + [2.0, 1, 3.75], + [1.0, 0.5, 0], + [0.5, 0.5, 0], + [1.0, 0, -7.25], + [2.0, 0, -3.75], + ]; + } + + /** + * @dataProvider sigmoidDerivativeProvider + * + * @param float|int $value + */ + public function testSigmoidDerivative(float $beta, float $expected, $value): void + { + $sigmoid = new Sigmoid($beta); + $activatedValue = $sigmoid->compute($value); + self::assertEqualsWithDelta($expected, $sigmoid->differentiate($value, $activatedValue), 0.001); + } + + public function sigmoidDerivativeProvider(): array + { + return [ + [1.0, 0, -10], + [1, 0.006, -5], + [1.0, 0.25, 0], + [1, 0.006, 5], + [1.0, 0, 10], + [2.0, 0.25, 0], + [0.5, 0.246, 0.5], + [0.5, 0.241, 0.75], + ]; + } +} diff --git a/tests/NeuralNetwork/ActivationFunction/ThresholdedReLUTest.php b/tests/NeuralNetwork/ActivationFunction/ThresholdedReLUTest.php new file mode 100644 index 0000000..19b0039 --- /dev/null +++ b/tests/NeuralNetwork/ActivationFunction/ThresholdedReLUTest.php @@ -0,0 +1,57 @@ +compute($value)); + } + + public function thresholdProvider(): array + { + return [ + [1.0, 0, 1.0], + [0.5, 3.75, 3.75], + [0.0, 0.5, 0.5], + [0.9, 0, 0.1], + ]; + } + + /** + * @dataProvider thresholdDerivativeProvider + * + * @param float|int $value + */ + public function testThresholdedReLUDerivative(float $theta, float $expected, $value): void + { + $thresholdedReLU = new ThresholdedReLU($theta); + $activatedValue = $thresholdedReLU->compute($value); + self::assertEquals($expected, $thresholdedReLU->differentiate($value, $activatedValue)); + } + + public function thresholdDerivativeProvider(): array + { + return [ + [0, 1, 1], + [0, 1, 0], + [0.5, 1, 1], + [0.5, 1, 1], + [0.5, 0, 0], + [2, 0, -1], + ]; + } +} diff --git a/tests/NeuralNetwork/LayerTest.php b/tests/NeuralNetwork/LayerTest.php new file mode 100644 index 0000000..87809b8 --- /dev/null +++ b/tests/NeuralNetwork/LayerTest.php @@ -0,0 +1,57 @@ +getNodes()); + } + + public function testLayerInitializationWithDefaultNodesType(): void + { + $layer = new Layer($number = 5); + + self::assertCount($number, $layer->getNodes()); + foreach ($layer->getNodes() as $node) { + self::assertInstanceOf(Neuron::class, $node); + } + } + + public function testLayerInitializationWithExplicitNodesType(): void + { + $layer = new Layer($number = 5, $class = Bias::class); + + self::assertCount($number, $layer->getNodes()); + foreach ($layer->getNodes() as $node) { + self::assertInstanceOf($class, $node); + } + } + + public function testThrowExceptionOnInvalidNodeClass(): void + { + $this->expectException(InvalidArgumentException::class); + new Layer(1, stdClass::class); + } + + public function testAddNodesToLayer(): void + { + $layer = new Layer(); + $layer->addNode($node1 = new Neuron()); + $layer->addNode($node2 = new Neuron()); + + self::assertEquals([$node1, $node2], $layer->getNodes()); + } +} diff --git a/tests/NeuralNetwork/Network/LayeredNetworkTest.php b/tests/NeuralNetwork/Network/LayeredNetworkTest.php new file mode 100644 index 0000000..0a48ee8 --- /dev/null +++ b/tests/NeuralNetwork/Network/LayeredNetworkTest.php @@ -0,0 +1,73 @@ +getLayeredNetworkMock(); + + $network->addLayer($layer1 = new Layer()); + $network->addLayer($layer2 = new Layer()); + + self::assertEquals([$layer1, $layer2], $network->getLayers()); + } + + public function testGetLastLayerAsOutputLayer(): void + { + $network = $this->getLayeredNetworkMock(); + $network->addLayer($layer1 = new Layer()); + + self::assertEquals($layer1, $network->getOutputLayer()); + + $network->addLayer($layer2 = new Layer()); + self::assertEquals($layer2, $network->getOutputLayer()); + } + + public function testSetInputAndGetOutput(): void + { + $network = $this->getLayeredNetworkMock(); + $network->addLayer(new Layer(2, Input::class)); + + $network->setInput($input = [34, 43]); + self::assertEquals($input, $network->getOutput()); + + $network->addLayer(new Layer(1)); + self::assertEquals([0.5], $network->getOutput()); + } + + public function testSetInputAndGetOutputWithCustomActivationFunctions(): void + { + $network = $this->getLayeredNetworkMock(); + $network->addLayer(new Layer(2, Input::class, $this->getActivationFunctionMock())); + + $network->setInput($input = [34, 43]); + self::assertEquals($input, $network->getOutput()); + } + + /** + * @return LayeredNetwork|MockObject + */ + private function getLayeredNetworkMock() + { + return $this->getMockForAbstractClass(LayeredNetwork::class); + } + + /** + * @return ActivationFunction|MockObject + */ + private function getActivationFunctionMock() + { + return $this->getMockForAbstractClass(ActivationFunction::class); + } +} diff --git a/tests/NeuralNetwork/Network/MultilayerPerceptronTest.php b/tests/NeuralNetwork/Network/MultilayerPerceptronTest.php new file mode 100644 index 0000000..6123f9b --- /dev/null +++ b/tests/NeuralNetwork/Network/MultilayerPerceptronTest.php @@ -0,0 +1,109 @@ +expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Provide at least 1 hidden layer'); + + $this->getMockForAbstractClass( + MultilayerPerceptron::class, + [5, [], [0, 1], 1000, null, 0.42] + ); + } + + public function testThrowExceptionWhenThereIsOnlyOneClass(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Provide at least 2 different classes'); + + $this->getMockForAbstractClass( + MultilayerPerceptron::class, + [5, [3], [0], 1000, null, 0.42] + ); + } + + public function testThrowExceptionWhenClassesAreNotUnique(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Classes must be unique'); + + $this->getMockForAbstractClass( + MultilayerPerceptron::class, + [5, [3], [0, 1, 2, 3, 1], 1000, null, 0.42] + ); + } + + public function testLearningRateSetter(): void + { + /** @var MultilayerPerceptron $mlp */ + $mlp = $this->getMockForAbstractClass( + MultilayerPerceptron::class, + [5, [3], [0, 1], 1000, null, 0.42] + ); + + self::assertEquals(0.42, $mlp->getLearningRate()); + self::assertEquals(0.42, $mlp->getBackpropagation()->getLearningRate()); + + $mlp->setLearningRate(0.24); + self::assertEquals(0.24, $mlp->getLearningRate()); + self::assertEquals(0.24, $mlp->getBackpropagation()->getLearningRate()); + } + + public function testLearningRateSetterWithCustomActivationFunctions(): void + { + $activation_function = $this->getActivationFunctionMock(); + + /** @var MultilayerPerceptron $mlp */ + $mlp = $this->getMockForAbstractClass( + MultilayerPerceptron::class, + [5, [[3, $activation_function], [5, $activation_function]], [0, 1], 1000, null, 0.42] + ); + + self::assertEquals(0.42, $mlp->getLearningRate()); + self::assertEquals(0.42, $mlp->getBackpropagation()->getLearningRate()); + + $mlp->setLearningRate(0.24); + self::assertEquals(0.24, $mlp->getLearningRate()); + self::assertEquals(0.24, $mlp->getBackpropagation()->getLearningRate()); + } + + public function testLearningRateSetterWithLayerObject(): void + { + $activation_function = $this->getActivationFunctionMock(); + + /** @var MultilayerPerceptron $mlp */ + $mlp = $this->getMockForAbstractClass( + MultilayerPerceptron::class, + [5, [new Layer(3, Neuron::class, $activation_function), new Layer(5, Neuron::class, $activation_function)], [0, 1], 1000, null, 0.42] + ); + + self::assertEquals(0.42, $mlp->getLearningRate()); + self::assertEquals(0.42, $mlp->getBackpropagation()->getLearningRate()); + + $mlp->setLearningRate(0.24); + self::assertEquals(0.24, $mlp->getLearningRate()); + self::assertEquals(0.24, $mlp->getBackpropagation()->getLearningRate()); + } + + /** + * @return ActivationFunction|MockObject + */ + private function getActivationFunctionMock() + { + return $this->getMockForAbstractClass(ActivationFunction::class); + } +} diff --git a/tests/NeuralNetwork/Node/BiasTest.php b/tests/NeuralNetwork/Node/BiasTest.php new file mode 100644 index 0000000..bb9650c --- /dev/null +++ b/tests/NeuralNetwork/Node/BiasTest.php @@ -0,0 +1,18 @@ +getOutput()); + } +} diff --git a/tests/NeuralNetwork/Node/InputTest.php b/tests/NeuralNetwork/Node/InputTest.php new file mode 100644 index 0000000..8304d3e --- /dev/null +++ b/tests/NeuralNetwork/Node/InputTest.php @@ -0,0 +1,28 @@ +getOutput()); + + $input = new Input($value = 9.6); + self::assertEquals($value, $input->getOutput()); + } + + public function testSetInput(): void + { + $input = new Input(); + $input->setInput($value = 6.9); + + self::assertEquals($value, $input->getOutput()); + } +} diff --git a/tests/NeuralNetwork/Node/Neuron/SynapseTest.php b/tests/NeuralNetwork/Node/Neuron/SynapseTest.php new file mode 100644 index 0000000..f3c68cd --- /dev/null +++ b/tests/NeuralNetwork/Node/Neuron/SynapseTest.php @@ -0,0 +1,53 @@ +getNodeMock($nodeOutput = 0.5); + + $synapse = new Synapse($node, $weight = 0.75); + + self::assertEquals($node, $synapse->getNode()); + self::assertEquals($weight, $synapse->getWeight()); + self::assertEquals($weight * $nodeOutput, $synapse->getOutput()); + + $synapse = new Synapse($node); + $weight = $synapse->getWeight(); + + self::assertTrue($weight === -1. || $weight === 1.); + } + + public function testSynapseWeightChange(): void + { + $node = $this->getNodeMock(); + $synapse = new Synapse($node, $weight = 0.75); + $synapse->changeWeight(1.0); + + self::assertEquals(1.75, $synapse->getWeight()); + + $synapse->changeWeight(-2.0); + + self::assertEquals(-0.25, $synapse->getWeight()); + } + + /** + * @return Neuron|MockObject + */ + private function getNodeMock(float $output = 1.) + { + $node = $this->getMockBuilder(Neuron::class)->getMock(); + $node->method('getOutput')->willReturn($output); + + return $node; + } +} diff --git a/tests/NeuralNetwork/Node/NeuronTest.php b/tests/NeuralNetwork/Node/NeuronTest.php new file mode 100644 index 0000000..376d78b --- /dev/null +++ b/tests/NeuralNetwork/Node/NeuronTest.php @@ -0,0 +1,66 @@ +getSynapses()); + self::assertEquals(0.5, $neuron->getOutput()); + } + + public function testNeuronActivationFunction(): void + { + /** @var BinaryStep|MockObject $activationFunction */ + $activationFunction = $this->getMockBuilder(BinaryStep::class)->getMock(); + $activationFunction->method('compute')->with(0)->willReturn($output = 0.69); + + $neuron = new Neuron($activationFunction); + + self::assertEquals($output, $neuron->getOutput()); + } + + public function testNeuronWithSynapse(): void + { + $neuron = new Neuron(); + $neuron->addSynapse($synapse = $this->getSynapseMock()); + + self::assertEquals([$synapse], $neuron->getSynapses()); + self::assertEqualsWithDelta(0.88, $neuron->getOutput(), 0.01); + } + + public function testNeuronRefresh(): void + { + $neuron = new Neuron(); + $neuron->getOutput(); + $neuron->addSynapse($this->getSynapseMock()); + + self::assertEqualsWithDelta(0.5, $neuron->getOutput(), 0.01); + + $neuron->reset(); + + self::assertEqualsWithDelta(0.88, $neuron->getOutput(), 0.01); + } + + /** + * @return Synapse|MockObject + */ + private function getSynapseMock(float $output = 2.) + { + $synapse = $this->getMockBuilder(Synapse::class)->disableOriginalConstructor()->getMock(); + $synapse->method('getOutput')->willReturn($output); + + return $synapse; + } +} diff --git a/tests/Performance/Data/.gitkeep b/tests/Performance/Data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/Performance/Regression/LeastSquaresBench.php b/tests/Performance/Regression/LeastSquaresBench.php new file mode 100644 index 0000000..d6dba2f --- /dev/null +++ b/tests/Performance/Regression/LeastSquaresBench.php @@ -0,0 +1,40 @@ +dataset = new CsvDataset(__DIR__.'/../Data/bike-sharing-hour.csv', 14); + } + + /** + * @Revs(1) + * @Iterations(5) + */ + public function benchLeastSquaresTrain(): void + { + $leastSqueares = new LeastSquares(); + $leastSqueares->train($this->dataset->getSamples(), $this->dataset->getTargets()); + } +} diff --git a/tests/Performance/Tokenization/NGramTokenizerBench.php b/tests/Performance/Tokenization/NGramTokenizerBench.php new file mode 100644 index 0000000..f99128d --- /dev/null +++ b/tests/Performance/Tokenization/NGramTokenizerBench.php @@ -0,0 +1,33 @@ +tokenize( + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Praesent placerat blandit cursus. Suspendisse sed + turpis sit amet enim viverra sodales a euismod est. Ut vitae tincidunt est. Proin venenatis placerat nunc + sed ornare. Etiam feugiat, nisl nec sollicitudin sodales, nulla massa sollicitudin ipsum, vitae cursus ante + velit vitae arcu. Vestibulum feugiat ultricies hendrerit. Morbi sed varius metus. Nam feugiat maximus + turpis, a sollicitudin ligula porttitor eu.Fusce hendrerit tellus et dignissim sagittis. Nulla consectetur + condimentum tortor, non bibendum erat lacinia eget. Integer vitae maximus tortor. Vestibulum ante ipsum + primis in faucibus orci luctus et ultrices posuere cubilia Curae; Pellentesque suscipit sem ipsum, in + tincidunt risus pellentesque vel. Nullam hendrerit consequat leo, in suscipit lectus euismod non. Cras arcu + lacus, lacinia semper mauris vel, pharetra dignissim velit. Nam lacinia turpis a nibh bibendum, et + placerat tellus accumsan. Sed tincidunt cursus nisi in laoreet. Suspendisse amet.' + ); + } +} diff --git a/tests/Performance/bootstrap.php b/tests/Performance/bootstrap.php new file mode 100644 index 0000000..a358903 --- /dev/null +++ b/tests/Performance/bootstrap.php @@ -0,0 +1,20 @@ +train($samples, $labels); - - $this->assertEquals('b', $classifier->predict([3, 2])); - $this->assertEquals('b', $classifier->predict([5, 1])); - $this->assertEquals('b', $classifier->predict([4, 3])); - $this->assertEquals('b', $classifier->predict([4, -5])); - - $this->assertEquals('a', $classifier->predict([2, 3])); - $this->assertEquals('a', $classifier->predict([1, 2])); - $this->assertEquals('a', $classifier->predict([1, 5])); - $this->assertEquals('a', $classifier->predict([3, 10])); - } - - public function testPredictArrayOfSamples() - { - $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; - $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; - - $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; - $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a']; - - $classifier = new KNearestNeighbors(); - $classifier->train($trainSamples, $trainLabels); - $predicted = $classifier->predict($testSamples); - - $this->assertEquals($testLabels, $predicted); - } - - public function testPredictArrayOfSamplesUsingChebyshevDistanceMetric() - { - $trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; - $trainLabels = ['a', 'a', 'a', 'b', 'b', 'b']; - - $testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]]; - $testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a']; - - $classifier = new KNearestNeighbors(3, new Chebyshev()); - $classifier->train($trainSamples, $trainLabels); - $predicted = $classifier->predict($testSamples); - - $this->assertEquals($testLabels, $predicted); - } -} diff --git a/tests/Phpml/Classification/NaiveBayesTest.php b/tests/Phpml/Classification/NaiveBayesTest.php deleted file mode 100644 index 3482cf5..0000000 --- a/tests/Phpml/Classification/NaiveBayesTest.php +++ /dev/null @@ -1,38 +0,0 @@ -train($samples, $labels); - - $this->assertEquals('a', $classifier->predict([3, 1, 1])); - $this->assertEquals('b', $classifier->predict([1, 4, 1])); - $this->assertEquals('c', $classifier->predict([1, 1, 6])); - } - - public function testPredictArrayOfSamples() - { - $trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]]; - $trainLabels = ['a', 'b', 'c']; - - $testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8], [1, 1, 2], [2, 3, 2], [1, 2, 1], [9, 5, 1], [3, 1, 2]]; - $testLabels = ['a', 'a', 'c', 'c', 'b', 'b', 'a', 'a']; - - $classifier = new NaiveBayes(); - $classifier->train($trainSamples, $trainLabels); - $predicted = $classifier->predict($testSamples); - - $this->assertEquals($testLabels, $predicted); - } -} diff --git a/tests/Phpml/Clustering/DBSCANTest.php b/tests/Phpml/Clustering/DBSCANTest.php deleted file mode 100644 index be37fff..0000000 --- a/tests/Phpml/Clustering/DBSCANTest.php +++ /dev/null @@ -1,33 +0,0 @@ -assertEquals($clustered, $dbscan->cluster($samples)); - - $samples = [[1, 1], [6, 6], [1, -1], [5, 6], [-1, -1], [7, 8], [-1, 1], [7, 7]]; - $clustered = [ - [[1, 1], [1, -1], [-1, -1], [-1, 1]], - [[6, 6], [5, 6], [7, 8], [7, 7]], - ]; - - $dbscan = new DBSCAN($epsilon = 3, $minSamples = 4); - - $this->assertEquals($clustered, $dbscan->cluster($samples)); - } -} diff --git a/tests/Phpml/Clustering/KMeansTest.php b/tests/Phpml/Clustering/KMeansTest.php deleted file mode 100644 index 5a85b38..0000000 --- a/tests/Phpml/Clustering/KMeansTest.php +++ /dev/null @@ -1,51 +0,0 @@ -cluster($samples); - - $this->assertEquals(2, count($clusters)); - - foreach ($samples as $index => $sample) { - if (in_array($sample, $clusters[0]) || in_array($sample, $clusters[1])) { - unset($samples[$index]); - } - } - $this->assertEquals(0, count($samples)); - } - - public function testKMeansInitializationMethods() - { - $samples = [ - [180, 155], [186, 159], [119, 185], [141, 147], [157, 158], - [176, 122], [194, 160], [113, 193], [190, 148], [152, 154], - [162, 146], [188, 144], [185, 124], [163, 114], [151, 140], - [175, 131], [186, 162], [181, 195], [147, 122], [143, 195], - [171, 119], [117, 165], [169, 121], [159, 160], [159, 112], - [115, 122], [149, 193], [156, 135], [118, 120], [139, 159], - [150, 115], [181, 136], [167, 162], [132, 115], [175, 165], - [110, 147], [175, 118], [113, 145], [130, 162], [195, 179], - [164, 111], [192, 114], [194, 149], [139, 113], [160, 168], - [162, 110], [174, 144], [137, 142], [197, 160], [147, 173], - ]; - - $kmeans = new KMeans(4, KMeans::INIT_KMEANS_PLUS_PLUS); - $clusters = $kmeans->cluster($samples); - $this->assertEquals(4, count($clusters)); - - $kmeans = new KMeans(4, KMeans::INIT_RANDOM); - $clusters = $kmeans->cluster($samples); - $this->assertEquals(4, count($clusters)); - } -} diff --git a/tests/Phpml/CrossValidation/RandomSplitTest.php b/tests/Phpml/CrossValidation/RandomSplitTest.php deleted file mode 100644 index d31c6a6..0000000 --- a/tests/Phpml/CrossValidation/RandomSplitTest.php +++ /dev/null @@ -1,94 +0,0 @@ -assertEquals(2, count($randomSplit->getTestSamples())); - $this->assertEquals(2, count($randomSplit->getTrainSamples())); - - $randomSplit2 = new RandomSplit($dataset, 0.25); - - $this->assertEquals(1, count($randomSplit2->getTestSamples())); - $this->assertEquals(3, count($randomSplit2->getTrainSamples())); - } - - public function testDatasetRandomSplitWithSameSeed() - { - $dataset = new ArrayDataset( - $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], - $labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] - ); - - $seed = 123; - - $randomSplit1 = new RandomSplit($dataset, 0.5, $seed); - $randomSplit2 = new RandomSplit($dataset, 0.5, $seed); - - $this->assertEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels()); - $this->assertEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples()); - $this->assertEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels()); - $this->assertEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples()); - } - - public function testDatasetRandomSplitWithDifferentSeed() - { - $dataset = new ArrayDataset( - $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], - $labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] - ); - - $randomSplit1 = new RandomSplit($dataset, 0.5, 4321); - $randomSplit2 = new RandomSplit($dataset, 0.5, 1234); - - $this->assertNotEquals($randomSplit1->getTestLabels(), $randomSplit2->getTestLabels()); - $this->assertNotEquals($randomSplit1->getTestSamples(), $randomSplit2->getTestSamples()); - $this->assertNotEquals($randomSplit1->getTrainLabels(), $randomSplit2->getTrainLabels()); - $this->assertNotEquals($randomSplit1->getTrainSamples(), $randomSplit2->getTrainSamples()); - } - - public function testRandomSplitCorrectSampleAndLabelPosition() - { - $dataset = new ArrayDataset( - $samples = [[1], [2], [3], [4]], - $labels = [1, 2, 3, 4] - ); - - $randomSplit = new RandomSplit($dataset, 0.5); - - $this->assertEquals($randomSplit->getTestSamples()[0][0], $randomSplit->getTestLabels()[0]); - $this->assertEquals($randomSplit->getTestSamples()[1][0], $randomSplit->getTestLabels()[1]); - $this->assertEquals($randomSplit->getTrainSamples()[0][0], $randomSplit->getTrainLabels()[0]); - $this->assertEquals($randomSplit->getTrainSamples()[1][0], $randomSplit->getTrainLabels()[1]); - } -} diff --git a/tests/Phpml/Dataset/ArrayDatasetTest.php b/tests/Phpml/Dataset/ArrayDatasetTest.php deleted file mode 100644 index 7244b3e..0000000 --- a/tests/Phpml/Dataset/ArrayDatasetTest.php +++ /dev/null @@ -1,29 +0,0 @@ -assertEquals($samples, $dataset->getSamples()); - $this->assertEquals($labels, $dataset->getLabels()); - } -} diff --git a/tests/Phpml/Dataset/CsvDatasetTest.php b/tests/Phpml/Dataset/CsvDatasetTest.php deleted file mode 100644 index 2994504..0000000 --- a/tests/Phpml/Dataset/CsvDatasetTest.php +++ /dev/null @@ -1,38 +0,0 @@ -assertEquals(10, count($dataset->getSamples())); - $this->assertEquals(10, count($dataset->getLabels())); - } - - public function testSampleCsvDatasetWithoutHeaderRow() - { - $filePath = dirname(__FILE__).'/Resources/dataset.csv'; - - $dataset = new CsvDataset($filePath, 2, false); - - $this->assertEquals(11, count($dataset->getSamples())); - $this->assertEquals(11, count($dataset->getLabels())); - } -} diff --git a/tests/Phpml/Dataset/Demo/GlassTest.php b/tests/Phpml/Dataset/Demo/GlassTest.php deleted file mode 100644 index 6f6e177..0000000 --- a/tests/Phpml/Dataset/Demo/GlassTest.php +++ /dev/null @@ -1,22 +0,0 @@ -assertEquals(214, count($glass->getSamples())); - $this->assertEquals(214, count($glass->getLabels())); - - // one sample features count - $this->assertEquals(9, count($glass->getSamples()[0])); - } -} diff --git a/tests/Phpml/Dataset/Demo/IrisTest.php b/tests/Phpml/Dataset/Demo/IrisTest.php deleted file mode 100644 index 1f0da90..0000000 --- a/tests/Phpml/Dataset/Demo/IrisTest.php +++ /dev/null @@ -1,22 +0,0 @@ -assertEquals(150, count($iris->getSamples())); - $this->assertEquals(150, count($iris->getLabels())); - - // one sample features count - $this->assertEquals(4, count($iris->getSamples()[0])); - } -} diff --git a/tests/Phpml/Dataset/Demo/WineTest.php b/tests/Phpml/Dataset/Demo/WineTest.php deleted file mode 100644 index de16483..0000000 --- a/tests/Phpml/Dataset/Demo/WineTest.php +++ /dev/null @@ -1,22 +0,0 @@ -assertEquals(178, count($wine->getSamples())); - $this->assertEquals(178, count($wine->getLabels())); - - // one sample features count - $this->assertEquals(13, count($wine->getSamples()[0])); - } -} diff --git a/tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php b/tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php deleted file mode 100644 index 64ac569..0000000 --- a/tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php +++ /dev/null @@ -1,73 +0,0 @@ - 1, 1 => 1, 2 => 2, 3 => 1, 4 => 1], - [5 => 1, 6 => 1, 1 => 1, 2 => 1], - [5 => 1, 7 => 2, 8 => 1, 9 => 1], - ]; - - $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer()); - - $this->assertEquals($vector, $vectorizer->transform($samples)); - $this->assertEquals($vocabulary, $vectorizer->getVocabulary()); - } - - public function testMinimumDocumentTokenCountFrequency() - { - // word at least in half samples - $samples = [ - 'Lorem ipsum dolor sit amet', - 'Lorem ipsum sit amet', - 'ipsum sit amet', - 'ipsum sit amet', - ]; - - $vocabulary = ['Lorem', 'ipsum', 'dolor', 'sit', 'amet']; - $vector = [ - [0 => 1, 1 => 1, 3 => 1, 4 => 1], - [0 => 1, 1 => 1, 3 => 1, 4 => 1], - [1 => 1, 3 => 1, 4 => 1], - [1 => 1, 3 => 1, 4 => 1], - ]; - - $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), 0.5); - - $this->assertEquals($vector, $vectorizer->transform($samples)); - $this->assertEquals($vocabulary, $vectorizer->getVocabulary()); - - // word at least in all samples - $samples = [ - 'Lorem ipsum dolor sit amet', - 'Morbi quis lacinia arcu. Sed eu sagittis Lorem', - 'Suspendisse gravida consequat eros Lorem', - ]; - - $vector = [ - [0 => 1], - [0 => 1], - [0 => 1], - ]; - - $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), 1); - - $this->assertEquals($vector, $vectorizer->transform($samples)); - } -} diff --git a/tests/Phpml/Math/Kernel/RBFTest.php b/tests/Phpml/Math/Kernel/RBFTest.php deleted file mode 100644 index 5b9bcb4..0000000 --- a/tests/Phpml/Math/Kernel/RBFTest.php +++ /dev/null @@ -1,25 +0,0 @@ -assertEquals(1, $rbf->compute([1, 2], [1, 2])); - $this->assertEquals(0.97336, $rbf->compute([1, 2, 3], [4, 5, 6]), '', $delta = 0.0001); - $this->assertEquals(0.00011, $rbf->compute([4, 5], [1, 100]), '', $delta = 0.0001); - - $rbf = new RBF($gamma = 0.2); - - $this->assertEquals(1, $rbf->compute([1, 2], [1, 2])); - $this->assertEquals(0.00451, $rbf->compute([1, 2, 3], [4, 5, 6]), '', $delta = 0.0001); - $this->assertEquals(0, $rbf->compute([4, 5], [1, 100])); - } -} diff --git a/tests/Phpml/Math/MatrixTest.php b/tests/Phpml/Math/MatrixTest.php deleted file mode 100644 index 64bb903..0000000 --- a/tests/Phpml/Math/MatrixTest.php +++ /dev/null @@ -1,176 +0,0 @@ -assertInstanceOf(Matrix::class, $matrix); - $this->assertEquals([[1], [2], [3], [4]], $matrix->toArray()); - $this->assertEquals(4, $matrix->getRows()); - $this->assertEquals(1, $matrix->getColumns()); - $this->assertEquals($flatArray, $matrix->getColumnValues(0)); - } - - /** - * @expectedException \Phpml\Exception\MatrixException - */ - public function testThrowExceptionOnInvalidColumnNumber() - { - $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); - $matrix->getColumnValues(4); - } - - /** - * @expectedException \Phpml\Exception\MatrixException - */ - public function testThrowExceptionOnGetDeterminantIfArrayIsNotSquare() - { - $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); - $matrix->getDeterminant(); - } - - public function testGetMatrixDeterminant() - { - //http://matrix.reshish.com/determinant.php - $matrix = new Matrix([ - [3, 3, 3], - [4, 2, 1], - [5, 6, 7], - ]); - $this->assertEquals(-3, $matrix->getDeterminant()); - - $matrix = new Matrix([ - [1, 2, 3, 3, 2, 1], - [1 / 2, 5, 6, 7, 1, 1], - [3 / 2, 7 / 2, 2, 0, 6, 8], - [1, 8, 10, 1, 2, 2], - [1 / 4, 4, 1, 0, 2, 3 / 7], - [1, 8, 7, 5, 4, 4 / 5], - ]); - $this->assertEquals(1116.5035, $matrix->getDeterminant(), '', $delta = 0.0001); - } - - public function testMatrixTranspose() - { - $matrix = new Matrix([ - [3, 3, 3], - [4, 2, 1], - [5, 6, 7], - ]); - - $transposedMatrix = [ - [3, 4, 5], - [3, 2, 6], - [3, 1, 7], - ]; - - $this->assertEquals($transposedMatrix, $matrix->transpose()->toArray()); - } - - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnMultiplyWhenInconsistentMatrixSupplied() - { - $matrix1 = new Matrix([[1, 2, 3], [4, 5, 6]]); - $matrix2 = new Matrix([[3, 2, 1], [6, 5, 4]]); - - $matrix1->multiply($matrix2); - } - - public function testMatrixMultiplyByMatrix() - { - $matrix1 = new Matrix([ - [1, 2, 3], - [4, 5, 6], - ]); - - $matrix2 = new Matrix([ - [7, 8], - [9, 10], - [11, 12], - ]); - - $product = [ - [58, 64], - [139, 154], - ]; - - $this->assertEquals($product, $matrix1->multiply($matrix2)->toArray()); - } - - public function testDivideByScalar() - { - $matrix = new Matrix([ - [4, 6, 8], - [2, 10, 20], - ]); - - $quotient = [ - [2, 3, 4], - [1, 5, 10], - ]; - - $this->assertEquals($quotient, $matrix->divideByScalar(2)->toArray()); - } - - /** - * @expectedException \Phpml\Exception\MatrixException - */ - public function testThrowExceptionWhenInverseIfArrayIsNotSquare() - { - $matrix = new Matrix([[1, 2, 3], [4, 5, 6]]); - $matrix->inverse(); - } - - public function testInverseMatrix() - { - //http://ncalculators.com/matrix/inverse-matrix.htm - $matrix = new Matrix([ - [3, 4, 2], - [4, 5, 5], - [1, 1, 1], - ]); - - $inverseMatrix = [ - [0, -1, 5], - [1 / 2, 1 / 2, -7 / 2], - [-1 / 2, 1 / 2, -1 / 2], - ]; - - $this->assertEquals($inverseMatrix, $matrix->inverse()->toArray(), '', $delta = 0.0001); - } - - public function testCrossOutMatrix() - { - $matrix = new Matrix([ - [3, 4, 2], - [4, 5, 5], - [1, 1, 1], - ]); - - $crossOuted = [ - [3, 2], - [1, 1], - ]; - - $this->assertEquals($crossOuted, $matrix->crossOut(1, 1)->toArray()); - } -} diff --git a/tests/Phpml/Math/ProductTest.php b/tests/Phpml/Math/ProductTest.php deleted file mode 100644 index aba0ff2..0000000 --- a/tests/Phpml/Math/ProductTest.php +++ /dev/null @@ -1,17 +0,0 @@ -assertEquals(10, Product::scalar([2, 3], [-1, 4])); - $this->assertEquals(-0.1, Product::scalar([1, 4, 1], [-2, 0.5, -0.1])); - $this->assertEquals(8, Product::scalar([2], [4])); - } -} diff --git a/tests/Phpml/Math/Statistic/MeanTest.php b/tests/Phpml/Math/Statistic/MeanTest.php deleted file mode 100644 index f0dca3b..0000000 --- a/tests/Phpml/Math/Statistic/MeanTest.php +++ /dev/null @@ -1,18 +0,0 @@ -assertEquals(3.5, Mean::arithmetic([2, 5]), '', $delta); - $this->assertEquals(41.16, Mean::arithmetic([43, 21, 25, 42, 57, 59]), '', $delta); - $this->assertEquals(1.7, Mean::arithmetic([0.5, 0.5, 1.5, 2.5, 3.5]), '', $delta); - } -} diff --git a/tests/Phpml/Math/Statistic/StandardDeviationTest.php b/tests/Phpml/Math/Statistic/StandardDeviationTest.php deleted file mode 100644 index 299c979..0000000 --- a/tests/Phpml/Math/Statistic/StandardDeviationTest.php +++ /dev/null @@ -1,42 +0,0 @@ -assertEquals(1.825, StandardDeviation::population($population), '', $delta); - - //http://www.stat.wmich.edu/s216/book/node126.html - $delta = 0.5; - $population = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025]; - $this->assertEquals(4079, StandardDeviation::population($population), '', $delta); - - $population = [9300, 10565, 15000, 15000, 17764, 57000, 65940, 73676, 77006, 93739, 146088, 153260]; - $this->assertEquals(50989, StandardDeviation::population($population), '', $delta); - } - - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnEmptyArrayIfNotSample() - { - StandardDeviation::population([], false); - } - - /** - * @expectedException \Phpml\Exception\InvalidArgumentException - */ - public function testThrowExceptionOnToSmallArray() - { - StandardDeviation::population([1]); - } -} diff --git a/tests/Phpml/Metric/AccuracyTest.php b/tests/Phpml/Metric/AccuracyTest.php deleted file mode 100644 index aa68b22..0000000 --- a/tests/Phpml/Metric/AccuracyTest.php +++ /dev/null @@ -1,37 +0,0 @@ -assertEquals(0.5, Accuracy::score($actualLabels, $predictedLabels)); - } - - public function testCalculateNotNormalizedScore() - { - $actualLabels = ['a', 'b', 'a', 'b']; - $predictedLabels = ['a', 'b', 'b', 'b']; - - $this->assertEquals(3, Accuracy::score($actualLabels, $predictedLabels, false)); - } -} diff --git a/tests/Phpml/Regression/LeastSquaresTest.php b/tests/Phpml/Regression/LeastSquaresTest.php deleted file mode 100644 index 8bd444f..0000000 --- a/tests/Phpml/Regression/LeastSquaresTest.php +++ /dev/null @@ -1,68 +0,0 @@ -train($samples, $targets); - - $this->assertEquals(4.06, $regression->predict([64]), '', $delta); - - //http://www.stat.wmich.edu/s216/book/node127.html - $samples = [[9300], [10565], [15000], [15000], [17764], [57000], [65940], [73676], [77006], [93739], [146088], [153260]]; - $targets = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025]; - - $regression = new LeastSquares(); - $regression->train($samples, $targets); - - $this->assertEquals(7659.35, $regression->predict([9300]), '', $delta); - $this->assertEquals(5213.81, $regression->predict([57000]), '', $delta); - $this->assertEquals(4188.13, $regression->predict([77006]), '', $delta); - $this->assertEquals(7659.35, $regression->predict([9300]), '', $delta); - $this->assertEquals(278.66, $regression->predict([153260]), '', $delta); - } - - public function testPredictSingleFeatureSamplesWithMatrixTargets() - { - $delta = 0.01; - - //https://www.easycalculation.com/analytical/learn-least-square-regression.php - $samples = [[60], [61], [62], [63], [65]]; - $targets = [[3.1], [3.6], [3.8], [4], [4.1]]; - - $regression = new LeastSquares(); - $regression->train($samples, $targets); - - $this->assertEquals(4.06, $regression->predict([64]), '', $delta); - } - - public function testPredictMultiFeaturesSamples() - { - $delta = 0.01; - - //http://www.stat.wmich.edu/s216/book/node129.html - $samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; - $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; - - $regression = new LeastSquares(); - $regression->train($samples, $targets); - - $this->assertEquals(-800614.957, $regression->getIntercept(), '', $delta); - $this->assertEquals([-0.0327, 404.14], $regression->getCoefficients(), '', $delta); - $this->assertEquals(4094.82, $regression->predict([60000, 1996]), '', $delta); - $this->assertEquals(5711.40, $regression->predict([60000, 2000]), '', $delta); - } -} diff --git a/tests/Phpml/Tokenization/WhitespaceTokenizerTest.php b/tests/Phpml/Tokenization/WhitespaceTokenizerTest.php deleted file mode 100644 index 506abc1..0000000 --- a/tests/Phpml/Tokenization/WhitespaceTokenizerTest.php +++ /dev/null @@ -1,40 +0,0 @@ -assertEquals($tokens, $tokenizer->tokenize($text)); - } - - public function testTokenizationOnUtf8() - { - $tokenizer = new WhitespaceTokenizer(); - - $text = '鋍鞎 鳼 鞮鞢騉 袟袘觕, 炟砏 蒮 謺貙蹖 偢偣唲 蒛 箷箯緷 鑴鱱爧 覮轀, - 剆坲 煘煓瑐 鬐鶤鶐 飹勫嫢 銪 餀 枲柊氠 鍎鞚韕 焲犈, - 殍涾烰 齞齝囃 蹅輶 鄜, 孻憵 擙樲橚 藒襓謥 岯岪弨 蒮 廞徲 孻憵懥 趡趛踠 槏'; - - $tokens = ['鋍鞎', '鳼', '鞮鞢騉', '袟袘觕,', '炟砏', '蒮', '謺貙蹖', '偢偣唲', '蒛', '箷箯緷', '鑴鱱爧', '覮轀,', - '剆坲', '煘煓瑐', '鬐鶤鶐', '飹勫嫢', '銪', '餀', '枲柊氠', '鍎鞚韕', '焲犈,', - '殍涾烰', '齞齝囃', '蹅輶', '鄜,', '孻憵', '擙樲橚', '藒襓謥', '岯岪弨', '蒮', '廞徲', '孻憵懥', '趡趛踠', '槏', ]; - - $this->assertEquals($tokens, $tokenizer->tokenize($text)); - } -} diff --git a/tests/PipelineTest.php b/tests/PipelineTest.php new file mode 100644 index 0000000..f905c8b --- /dev/null +++ b/tests/PipelineTest.php @@ -0,0 +1,170 @@ +getTransformers()); + self::assertEquals($estimator, $pipeline->getEstimator()); + } + + public function testPipelineWorkflow(): void + { + $transformers = [ + new Imputer(null, new MostFrequentStrategy()), + new Normalizer(), + ]; + $estimator = new SVC(); + + $samples = [ + [1, -1, 2], + [2, 0, null], + [null, 1, -1], + ]; + + $targets = [ + 4, + 1, + 4, + ]; + + $pipeline = new Pipeline($transformers, $estimator); + $pipeline->train($samples, $targets); + + $predicted = $pipeline->predict([[0, 0, 0]]); + + self::assertEquals(4, $predicted[0]); + } + + public function testPipelineTransformers(): void + { + $transformers = [ + new TokenCountVectorizer(new WordTokenizer()), + new TfIdfTransformer(), + ]; + + $estimator = new SVC(); + + $samples = [ + 'Hello Paul', + 'Hello Martin', + 'Goodbye Tom', + 'Hello John', + 'Goodbye Alex', + 'Bye Tony', + ]; + + $targets = [ + 'greetings', + 'greetings', + 'farewell', + 'greetings', + 'farewell', + 'farewell', + ]; + + $pipeline = new Pipeline($transformers, $estimator); + $pipeline->train($samples, $targets); + + $expected = ['greetings', 'farewell']; + + $predicted = $pipeline->predict(['Hello Max', 'Goodbye Mark']); + + self::assertEquals($expected, $predicted); + } + + public function testPipelineTransformersWithTargets(): void + { + $samples = [[1, 2, 1], [1, 3, 4], [5, 2, 1], [1, 3, 3], [1, 3, 4], [0, 3, 5]]; + $targets = ['a', 'a', 'a', 'b', 'b', 'b']; + + $pipeline = new Pipeline([$selector = new SelectKBest(2)], new SVC()); + $pipeline->train($samples, $targets); + + self::assertEqualsWithDelta([1.47058823, 4.0, 3.0], $selector->scores(), 0.00000001); + self::assertEquals(['b'], $pipeline->predict([[1, 3, 5]])); + } + + public function testPipelineAsTransformer(): void + { + $pipeline = new Pipeline([ + new Imputer(null, new MeanStrategy()), + ]); + + $trainSamples = [ + [10, 20, 30], + [20, 30, 40], + [30, 40, 50], + ]; + + $pipeline->fit($trainSamples); + + $testSamples = [ + [null, null, null], + ]; + + $pipeline->transform($testSamples); + + self::assertEquals([[20.0, 30.0, 40.0]], $testSamples); + } + + public function testSaveAndRestore(): void + { + $pipeline = new Pipeline([ + new TokenCountVectorizer(new WordTokenizer()), + new TfIdfTransformer(), + ], new SVC()); + + $pipeline->train([ + 'Hello Paul', + 'Hello Martin', + 'Goodbye Tom', + 'Hello John', + 'Goodbye Alex', + 'Bye Tony', + ], [ + 'greetings', + 'greetings', + 'farewell', + 'greetings', + 'farewell', + 'farewell', + ]); + + $testSamples = ['Hello Max', 'Goodbye Mark']; + $predicted = $pipeline->predict($testSamples); + + $filepath = (string) tempnam(sys_get_temp_dir(), uniqid('pipeline-test', true)); + $modelManager = new ModelManager(); + $modelManager->saveToFile($pipeline, $filepath); + + $restoredClassifier = $modelManager->restoreFromFile($filepath); + self::assertEquals($pipeline, $restoredClassifier); + self::assertEquals($predicted, $restoredClassifier->predict($testSamples)); + unlink($filepath); + } +} diff --git a/tests/Preprocessing/ColumnFilterTest.php b/tests/Preprocessing/ColumnFilterTest.php new file mode 100644 index 0000000..243c7eb --- /dev/null +++ b/tests/Preprocessing/ColumnFilterTest.php @@ -0,0 +1,27 @@ +transform($samples); + + self::assertEquals([[100000, 4], [120000, 12], [200000, 0]], $samples); + } +} diff --git a/tests/Preprocessing/ImputerTest.php b/tests/Preprocessing/ImputerTest.php new file mode 100644 index 0000000..b410854 --- /dev/null +++ b/tests/Preprocessing/ImputerTest.php @@ -0,0 +1,191 @@ +transform($data); + + self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01); + } + + public function testComplementsMissingValuesWithMeanStrategyOnRowAxis(): void + { + $data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], + ]; + + $imputeData = [ + [1, 2.66, 3, 4], + [4, 3, 2, 1], + [7, 6, 7, 8], + [8, 7, 6.66, 5], + ]; + + $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_ROW, $data); + $imputer->transform($data); + + self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01); + } + + public function testComplementsMissingValuesWithMediaStrategyOnColumnAxis(): void + { + $data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], + ]; + + $imputeData = [ + [1, 6, 3, 4], + [4, 3, 2, 1], + [4, 6, 7, 8], + [8, 7, 3, 5], + ]; + + $imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_COLUMN, $data); + $imputer->transform($data); + + self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01); + } + + public function testComplementsMissingValuesWithMediaStrategyOnRowAxis(): void + { + $data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], + ]; + + $imputeData = [ + [1, 3, 3, 4], + [4, 3, 2, 1], + [7, 6, 7, 8], + [8, 7, 7, 5], + ]; + + $imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW, $data); + $imputer->transform($data); + + self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01); + } + + public function testComplementsMissingValuesWithMostFrequentStrategyOnColumnAxis(): void + { + $data = [ + [1, null, 3, 4], + [4, 3, 2, 1], + [null, 6, 7, 8], + [8, 7, null, 5], + [8, 3, 2, 5], + ]; + + $imputeData = [ + [1, 3, 3, 4], + [4, 3, 2, 1], + [8, 6, 7, 8], + [8, 7, 2, 5], + [8, 3, 2, 5], + ]; + + $imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_COLUMN, $data); + $imputer->transform($data); + + self::assertEquals($imputeData, $data); + } + + public function testComplementsMissingValuesWithMostFrequentStrategyOnRowAxis(): void + { + $data = [ + [1, null, 3, 4, 3], + [4, 3, 2, 1, 7], + [null, 6, 7, 8, 6], + [8, 7, null, 5, 5], + [8, 3, 2, 5, 4], + ]; + + $imputeData = [ + [1, 3, 3, 4, 3], + [4, 3, 2, 1, 7], + [6, 6, 7, 8, 6], + [8, 7, 5, 5, 5], + [8, 3, 2, 5, 4], + ]; + + $imputer = new Imputer(null, new MostFrequentStrategy(), Imputer::AXIS_ROW, $data); + $imputer->transform($data); + + self::assertEquals($imputeData, $data); + } + + public function testImputerWorksOnFitSamples(): void + { + $trainData = [ + [1, 3, 4], + [6, 7, 8], + [8, 7, 5], + ]; + + $data = [ + [1, 3, null], + [6, null, 8], + [null, 7, 5], + ]; + + $imputeData = [ + [1, 3, 5.66], + [6, 5.66, 8], + [5, 7, 5], + ]; + + $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN, $trainData); + $imputer->transform($data); + + self::assertEqualsWithDelta($imputeData, $data, $delta = 0.01); + } + + public function testThrowExceptionWhenTryingToTransformWithoutTrainSamples(): void + { + $this->expectException(InvalidOperationException::class); + + $data = [ + [1, 3, null], + [6, null, 8], + [null, 7, 5], + ]; + + $imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN); + $imputer->transform($data); + } +} diff --git a/tests/Preprocessing/LabelEncoderTest.php b/tests/Preprocessing/LabelEncoderTest.php new file mode 100644 index 0000000..71dc87e --- /dev/null +++ b/tests/Preprocessing/LabelEncoderTest.php @@ -0,0 +1,68 @@ +fit($samples); + $le->transform($samples); + + self::assertEquals($transformed, $samples); + } + + public function labelEncoderDataProvider(): array + { + return [ + [['one', 'one', 'two', 'three'], [0, 0, 1, 2]], + [['one', 1, 'two', 'three'], [0, 1, 2, 3]], + [['one', null, 'two', 'three'], [0, 1, 2, 3]], + [['one', 'one', 'one', 'one'], [0, 0, 0, 0]], + [['one', 'one', 'one', 'one', null, null, 1, 1, 2, 'two'], [0, 0, 0, 0, 1, 1, 2, 2, 3, 4]], + ]; + } + + public function testResetClassesAfterNextFit(): void + { + $samples = ['Shanghai', 'Beijing', 'Karachi']; + + $le = new LabelEncoder(); + $le->fit($samples); + + self::assertEquals(['Shanghai', 'Beijing', 'Karachi'], $le->classes()); + + $samples = ['Istanbul', 'Dhaka', 'Tokyo']; + + $le->fit($samples); + + self::assertEquals(['Istanbul', 'Dhaka', 'Tokyo'], $le->classes()); + } + + public function testFitAndTransformFullCycle(): void + { + $samples = ['Shanghai', 'Beijing', 'Karachi', 'Beijing', 'Beijing', 'Karachi']; + $encoded = [0, 1, 2, 1, 1, 2]; + + $le = new LabelEncoder(); + $le->fit($samples); + + self::assertEquals(['Shanghai', 'Beijing', 'Karachi'], $le->classes()); + + $transformed = $samples; + $le->transform($transformed); + self::assertEquals($encoded, $transformed); + + $le->inverseTransform($transformed); + self::assertEquals($samples, $transformed); + } +} diff --git a/tests/Preprocessing/LambdaTransformerTest.php b/tests/Preprocessing/LambdaTransformerTest.php new file mode 100644 index 0000000..6f46f3e --- /dev/null +++ b/tests/Preprocessing/LambdaTransformerTest.php @@ -0,0 +1,28 @@ +transform($samples); + + self::assertEquals([3, 7, 11], $samples); + } +} diff --git a/tests/Preprocessing/NormalizerTest.php b/tests/Preprocessing/NormalizerTest.php new file mode 100644 index 0000000..0a8f76c --- /dev/null +++ b/tests/Preprocessing/NormalizerTest.php @@ -0,0 +1,137 @@ +expectException(NormalizerException::class); + new Normalizer(99); + } + + public function testNormalizeSamplesWithL2Norm(): void + { + $samples = [ + [1, -1, 2], + [2, 0, 0], + [0, 1, -1], + ]; + + $normalized = [ + [0.4, -0.4, 0.81], + [1.0, 0.0, 0.0], + [0.0, 0.7, -0.7], + ]; + + $normalizer = new Normalizer(); + $normalizer->transform($samples); + + self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01); + } + + public function testNormalizeSamplesWithL1Norm(): void + { + $samples = [ + [1, -1, 2], + [2, 0, 0], + [0, 1, -1], + ]; + + $normalized = [ + [0.25, -0.25, 0.5], + [1.0, 0.0, 0.0], + [0.0, 0.5, -0.5], + ]; + + $normalizer = new Normalizer(Normalizer::NORM_L1); + $normalizer->transform($samples); + + self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01); + } + + public function testFitNotChangeNormalizerBehavior(): void + { + $samples = [ + [1, -1, 2], + [2, 0, 0], + [0, 1, -1], + ]; + + $normalized = [ + [0.4, -0.4, 0.81], + [1.0, 0.0, 0.0], + [0.0, 0.7, -0.7], + ]; + + $normalizer = new Normalizer(); + $normalizer->transform($samples); + + self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01); + + $normalizer->fit($samples); + + self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01); + } + + public function testL1NormWithZeroSumCondition(): void + { + $samples = [ + [0, 0, 0], + [2, 0, 0], + [0, 1, -1], + ]; + + $normalized = [ + [0.33, 0.33, 0.33], + [1.0, 0.0, 0.0], + [0.0, 0.5, -0.5], + ]; + + $normalizer = new Normalizer(Normalizer::NORM_L1); + $normalizer->transform($samples); + + self::assertEqualsWithDelta($normalized, $samples, $delta = 0.01); + } + + public function testStandardNorm(): void + { + // Generate 10 random vectors of length 3 + $samples = []; + srand(time()); + for ($i = 0; $i < 10; ++$i) { + $sample = array_fill(0, 3, 0); + for ($k = 0; $k < 3; ++$k) { + $sample[$k] = random_int(1, 100); + } + + // Last feature's value shared across samples. + $sample[] = 1; + + $samples[] = $sample; + } + + // Use standard normalization + $normalizer = new Normalizer(Normalizer::NORM_STD); + $normalizer->transform($samples); + + // Values in the vector should be some value between -3 and +3 + self::assertCount(10, $samples); + foreach ($samples as $sample) { + $errors = array_filter( + $sample, + function ($element): bool { + return $element < -3 || $element > 3; + } + ); + self::assertCount(0, $errors); + self::assertEquals(0, $sample[3]); + } + } +} diff --git a/tests/Preprocessing/NumberConverterTest.php b/tests/Preprocessing/NumberConverterTest.php new file mode 100644 index 0000000..287b739 --- /dev/null +++ b/tests/Preprocessing/NumberConverterTest.php @@ -0,0 +1,47 @@ +transform($samples, $targets); + + self::assertEquals([[1.0, -4.0], [2.0, 3.0], [3.0, 112.5], [5.0, 0.0004]], $samples); + self::assertEquals(['1', '1', '2', '2'], $targets); + } + + public function testConvertTargets(): void + { + $samples = [['1', '-4'], ['2.0', 3.0], ['3', '112.5'], ['5', '0.0004']]; + $targets = ['1', '1', '2', 'not']; + + $converter = new NumberConverter(true); + $converter->transform($samples, $targets); + + self::assertEquals([[1.0, -4.0], [2.0, 3.0], [3.0, 112.5], [5.0, 0.0004]], $samples); + self::assertEquals([1.0, 1.0, 2.0, null], $targets); + } + + public function testConvertWithPlaceholder(): void + { + $samples = [['invalid'], ['13.5']]; + $targets = ['invalid', '2']; + + $converter = new NumberConverter(true, 'missing'); + $converter->transform($samples, $targets); + + self::assertEquals([['missing'], [13.5]], $samples); + self::assertEquals(['missing', 2.0], $targets); + } +} diff --git a/tests/Preprocessing/OneHotEncoderTest.php b/tests/Preprocessing/OneHotEncoderTest.php new file mode 100644 index 0000000..a5666b7 --- /dev/null +++ b/tests/Preprocessing/OneHotEncoderTest.php @@ -0,0 +1,66 @@ +fit($samples); + $encoder->transform($samples); + + self::assertEquals([ + [1, 0, 1, 0, 1, 0], + [0, 1, 1, 0, 1, 0], + [1, 0, 0, 1, 0, 1], + [0, 1, 0, 1, 1, 0], + ], $samples); + } + + public function testThrowExceptionWhenUnknownCategory(): void + { + $encoder = new OneHotEncoder(); + $encoder->fit([ + ['fish', 'New York', 'regression'], + ['dog', 'New York', 'regression'], + ['fish', 'Vancouver', 'classification'], + ['dog', 'Vancouver', 'regression'], + ]); + $samples = [['fish', 'New York', 'ka boom']]; + + $this->expectException(InvalidArgumentException::class); + + $encoder->transform($samples); + } + + public function testIgnoreMissingCategory(): void + { + $encoder = new OneHotEncoder(true); + $encoder->fit([ + ['fish', 'New York', 'regression'], + ['dog', 'New York', 'regression'], + ['fish', 'Vancouver', 'classification'], + ['dog', 'Vancouver', 'regression'], + ]); + $samples = [['ka', 'boom', 'riko']]; + $encoder->transform($samples); + + self::assertEquals([ + [0, 0, 0, 0, 0, 0], + ], $samples); + } +} diff --git a/tests/Regression/DecisionTreeRegressorTest.php b/tests/Regression/DecisionTreeRegressorTest.php new file mode 100644 index 0000000..046ce5d --- /dev/null +++ b/tests/Regression/DecisionTreeRegressorTest.php @@ -0,0 +1,83 @@ +train($samples, $targets); + + self::assertEqualsWithDelta([4.05], $regression->predict([[64]]), $delta); + + $samples = [[9300], [10565], [15000], [15000], [17764], [57000], [65940], [73676], [77006], [93739], [146088], [153260]]; + $targets = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025]; + + $regression = new DecisionTreeRegressor(); + $regression->train($samples, $targets); + + self::assertEqualsWithDelta([11300.0], $regression->predict([[9300]]), $delta); + self::assertEqualsWithDelta([5250.0], $regression->predict([[57000]]), $delta); + self::assertEqualsWithDelta([2433.33], $regression->predict([[77006]]), $delta); + self::assertEqualsWithDelta([11300.0], $regression->predict([[9300]]), $delta); + self::assertEqualsWithDelta([992.5], $regression->predict([[153260]]), $delta); + } + + public function testPreventPredictWhenNotTrained(): void + { + $regression = new DecisionTreeRegressor(); + + $this->expectException(InvalidOperationException::class); + + $regression->predict([[1]]); + } + + public function testMaxFeaturesLowerThanOne(): void + { + $this->expectException(InvalidArgumentException::class); + + new DecisionTreeRegressor(5, 3, 0.0, 0); + } + + public function testToleranceSmallerThanZero(): void + { + $this->expectException(InvalidArgumentException::class); + + new DecisionTreeRegressor(5, 3, 0.0, 20, -1); + } + + public function testSaveAndRestore(): void + { + $samples = [[60], [61], [62], [63], [65]]; + $targets = [3.1, 3.6, 3.8, 4, 4.1]; + + $regression = new DecisionTreeRegressor(4); + $regression->train($samples, $targets); + + $testSamples = [[9300], [10565], [15000]]; + $predicted = $regression->predict($testSamples); + + $filename = 'least-squares-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($regression, $filepath); + + $restoredRegression = $modelManager->restoreFromFile($filepath); + self::assertEquals($regression, $restoredRegression); + self::assertEquals($predicted, $restoredRegression->predict($testSamples)); + } +} diff --git a/tests/Regression/LeastSquaresTest.php b/tests/Regression/LeastSquaresTest.php new file mode 100644 index 0000000..4d79c4f --- /dev/null +++ b/tests/Regression/LeastSquaresTest.php @@ -0,0 +1,93 @@ +train($samples, $targets); + + self::assertEqualsWithDelta(4.06, $regression->predict([64]), $delta); + + //http://www.stat.wmich.edu/s216/book/node127.html + $samples = [[9300], [10565], [15000], [15000], [17764], [57000], [65940], [73676], [77006], [93739], [146088], [153260]]; + $targets = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025]; + + $regression = new LeastSquares(); + $regression->train($samples, $targets); + + self::assertEqualsWithDelta(7659.35, $regression->predict([9300]), $delta); + self::assertEqualsWithDelta(5213.81, $regression->predict([57000]), $delta); + self::assertEqualsWithDelta(4188.13, $regression->predict([77006]), $delta); + self::assertEqualsWithDelta(7659.35, $regression->predict([9300]), $delta); + self::assertEqualsWithDelta(278.66, $regression->predict([153260]), $delta); + } + + public function testPredictSingleFeatureSamplesWithMatrixTargets(): void + { + $delta = 0.01; + + //https://www.easycalculation.com/analytical/learn-least-square-regression.php + $samples = [[60], [61], [62], [63], [65]]; + $targets = [[3.1], [3.6], [3.8], [4], [4.1]]; + + $regression = new LeastSquares(); + $regression->train($samples, $targets); + + self::assertEqualsWithDelta(4.06, $regression->predict([64]), $delta); + } + + public function testPredictMultiFeaturesSamples(): void + { + $delta = 0.01; + + //http://www.stat.wmich.edu/s216/book/node129.html + $samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; + $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + + $regression = new LeastSquares(); + $regression->train($samples, $targets); + + self::assertEqualsWithDelta(-800614.957, $regression->getIntercept(), $delta); + self::assertEqualsWithDelta([-0.0327, 404.14], $regression->getCoefficients(), $delta); + self::assertEqualsWithDelta(4094.82, $regression->predict([60000, 1996]), $delta); + self::assertEqualsWithDelta(5711.40, $regression->predict([60000, 2000]), $delta); + } + + public function testSaveAndRestore(): void + { + //https://www.easycalculation.com/analytical/learn-least-square-regression.php + $samples = [[60], [61], [62], [63], [65]]; + $targets = [[3.1], [3.6], [3.8], [4], [4.1]]; + + $regression = new LeastSquares(); + $regression->train($samples, $targets); + + //http://www.stat.wmich.edu/s216/book/node127.html + $testSamples = [[9300], [10565], [15000]]; + $predicted = $regression->predict($testSamples); + + $filename = 'least-squares-test-'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($regression, $filepath); + + $restoredRegression = $modelManager->restoreFromFile($filepath); + self::assertEquals($regression, $restoredRegression); + self::assertEquals($predicted, $restoredRegression->predict($testSamples)); + } +} diff --git a/tests/Regression/SVRTest.php b/tests/Regression/SVRTest.php new file mode 100644 index 0000000..962a713 --- /dev/null +++ b/tests/Regression/SVRTest.php @@ -0,0 +1,60 @@ +train($samples, $targets); + + self::assertEqualsWithDelta(4.03, $regression->predict([64]), $delta); + } + + public function testPredictMultiFeaturesSamples(): void + { + $delta = 0.01; + + $samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]]; + $targets = [2000, 2750, 15500, 960, 4400, 8800, 7100, 2550, 1025, 5900, 4600, 4400]; + + $regression = new SVR(Kernel::LINEAR); + $regression->train($samples, $targets); + + self::assertEqualsWithDelta([4109.82, 4112.28], $regression->predict([[60000, 1996], [60000, 2000]]), $delta); + } + + public function testSaveAndRestore(): void + { + $samples = [[60], [61], [62], [63], [65]]; + $targets = [3.1, 3.6, 3.8, 4, 4.1]; + + $regression = new SVR(Kernel::LINEAR); + $regression->train($samples, $targets); + + $testSamples = [64]; + $predicted = $regression->predict($testSamples); + + $filename = 'svr-test'.random_int(100, 999).'-'.uniqid('', false); + $filepath = (string) tempnam(sys_get_temp_dir(), $filename); + $modelManager = new ModelManager(); + $modelManager->saveToFile($regression, $filepath); + + $restoredRegression = $modelManager->restoreFromFile($filepath); + self::assertEquals($regression, $restoredRegression); + self::assertEquals($predicted, $restoredRegression->predict($testSamples)); + } +} diff --git a/tests/SupportVectorMachine/DataTransformerTest.php b/tests/SupportVectorMachine/DataTransformerTest.php new file mode 100644 index 0000000..32d7d32 --- /dev/null +++ b/tests/SupportVectorMachine/DataTransformerTest.php @@ -0,0 +1,90 @@ + 0.1, + 'b' => 0.7, + 'c' => 0.2, + ], + [ + 'a' => 0.2, + 'b' => 0.3, + 'c' => 0.5, + ], + [ + 'a' => 0.6, + 'b' => 0.1, + 'c' => 0.3, + ], + ]; + + self::assertEquals($probabilities, DataTransformer::probabilities($rawPredictions, $labels)); + } + + public function testThrowExceptionWhenTestSetIsEmpty(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The array has zero elements'); + + DataTransformer::testSet([]); + } +} diff --git a/tests/SupportVectorMachine/SupportVectorMachineTest.php b/tests/SupportVectorMachine/SupportVectorMachineTest.php new file mode 100644 index 0000000..b7b1942 --- /dev/null +++ b/tests/SupportVectorMachine/SupportVectorMachineTest.php @@ -0,0 +1,206 @@ +train($samples, $labels); + + self::assertEquals($model, $svm->getModel()); + } + + public function testTrainCSVCModelWithProbabilityEstimate(): void + { + $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $labels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $svm = new SupportVectorMachine( + Type::C_SVC, + Kernel::LINEAR, + 100.0, + 0.5, + 3, + null, + 0.0, + 0.1, + 0.01, + 100, + true, + true + ); + $svm->train($samples, $labels); + + self::assertStringContainsString(PHP_EOL.'probA ', $svm->getModel()); + self::assertStringContainsString(PHP_EOL.'probB ', $svm->getModel()); + } + + public function testPredictSampleWithLinearKernel(): void + { + $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $labels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $svm = new SupportVectorMachine(Type::C_SVC, Kernel::LINEAR, 100.0); + $svm->train($samples, $labels); + + $predictions = $svm->predict([ + [3, 2], + [2, 3], + [4, -5], + ]); + + self::assertEquals('b', $predictions[0]); + self::assertEquals('a', $predictions[1]); + self::assertEquals('b', $predictions[2]); + } + + public function testPredictSampleFromMultipleClassWithRbfKernel(): void + { + $samples = [ + [1, 3], [1, 4], [1, 4], + [3, 1], [4, 1], [4, 2], + [-3, -1], [-4, -1], [-4, -2], + ]; + $labels = [ + 'a', 'a', 'a', + 'b', 'b', 'b', + 'c', 'c', 'c', + ]; + + $svm = new SupportVectorMachine(Type::C_SVC, Kernel::RBF, 100.0); + $svm->train($samples, $labels); + + $predictions = $svm->predict([ + [1, 5], + [4, 3], + [-4, -3], + ]); + + self::assertEquals('a', $predictions[0]); + self::assertEquals('b', $predictions[1]); + self::assertEquals('c', $predictions[2]); + } + + public function testPredictProbability(): void + { + $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $labels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $svm = new SupportVectorMachine( + Type::C_SVC, + Kernel::LINEAR, + 100.0, + 0.5, + 3, + null, + 0.0, + 0.1, + 0.01, + 100, + true, + true + ); + $svm->train($samples, $labels); + + $predictions = $svm->predictProbability([ + [3, 2], + [2, 3], + [4, -5], + ]); + + self::assertTrue($predictions[0]['a'] < $predictions[0]['b']); + self::assertTrue($predictions[1]['a'] > $predictions[1]['b']); + self::assertTrue($predictions[2]['a'] < $predictions[2]['b']); + + // Should be true because the latter is farther from the decision boundary + self::assertTrue($predictions[0]['b'] < $predictions[2]['b']); + } + + public function testThrowExceptionWhenVarPathIsNotWritable(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('is not writable'); + $svm = new SupportVectorMachine(Type::C_SVC, Kernel::RBF); + $svm->setVarPath('var-path'); + } + + public function testThrowExceptionWhenBinPathDoesNotExist(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('does not exist'); + $svm = new SupportVectorMachine(Type::C_SVC, Kernel::RBF); + $svm->setBinPath('bin-path'); + } + + public function testThrowExceptionWhenFileIsNotFoundInBinPath(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('not found'); + $svm = new SupportVectorMachine(Type::C_SVC, Kernel::RBF); + $svm->setBinPath('var'); + } + + public function testThrowExceptionWhenLibsvmFailsDuringTrain(): void + { + $this->expectException(LibsvmCommandException::class); + $this->expectExceptionMessage('ERROR: unknown svm type'); + + $svm = new SupportVectorMachine(99, Kernel::RBF); + $svm->train([], []); + } + + public function testThrowExceptionWhenLibsvmFailsDuringPredict(): void + { + $this->expectException(LibsvmCommandException::class); + $this->expectExceptionMessage('can\'t open model file'); + + $svm = new SupportVectorMachine(Type::C_SVC, Kernel::RBF); + $svm->predict([1]); + } + + public function testThrowExceptionWhenPredictProbabilityCalledWithoutProperModel(): void + { + $this->expectException(InvalidOperationException::class); + $this->expectExceptionMessage('Model does not support probabiliy estimates'); + + $samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]]; + $labels = ['a', 'a', 'a', 'b', 'b', 'b']; + + $svm = new SupportVectorMachine(Type::C_SVC, Kernel::LINEAR, 100.0); + $svm->train($samples, $labels); + + $svm->predictProbability([ + [3, 2], + [2, 3], + [4, -5], + ]); + } +} diff --git a/tests/Tokenization/NGramTokenizerTest.php b/tests/Tokenization/NGramTokenizerTest.php new file mode 100644 index 0000000..2df9531 --- /dev/null +++ b/tests/Tokenization/NGramTokenizerTest.php @@ -0,0 +1,100 @@ +tokenize($text)); + } + + public function testMinGramGreaterThanMaxGramNotAllowed(): void + { + self::expectException(InvalidArgumentException::class); + + new NGramTokenizer(5, 2); + } + + public function testMinGramValueTooSmall(): void + { + self::expectException(InvalidArgumentException::class); + + new NGramTokenizer(0, 2); + } + + public function testMaxGramValueTooSmall(): void + { + self::expectException(InvalidArgumentException::class); + + new NGramTokenizer(1, 0); + } + + public function textDataProvider(): array + { + return [ + [ + 1, 2, + 'Quick Fox', + ['Q', 'u', 'i', 'c', 'k', 'Qu', 'ui', 'ic', 'ck', 'F', 'o', 'x', 'Fo', 'ox'], + ], + [ + 3, 3, + 'Quick Foxes', + ['Qui', 'uic', 'ick', 'Fox', 'oxe', 'xes'], + ], + [ + 1, 2, + '快狐跑过 边缘跑', + ['快', '狐', '跑', '过', '快狐', '狐跑', '跑过', '边', '缘', '跑', '边缘', '缘跑'], + ], + [ + 3, 3, + '快狐跑过狐 边缘跑狐狐', + ['快狐跑', '狐跑过', '跑过狐', '边缘跑', '缘跑狐', '跑狐狐'], + ], + [ + 2, 4, + $this->getSimpleText(), + [ + 'Lo', 'or', 're', 'em', 'Lor', 'ore', 'rem', 'Lore', 'orem', 'ip', 'ps', 'su', 'um', 'ips', 'psu', 'sum', 'ipsu', + 'psum', 'do', 'ol', 'lo', 'or', 'dol', 'olo', 'lor', 'dolo', 'olor', 'si', 'it', 'sit', 'am', 'me', 'et', 'ame', + 'met', 'amet', 'co', 'on', 'ns', 'se', 'ec', 'ct', 'te', 'et', 'tu', 'ur', 'con', 'ons', 'nse', 'sec', 'ect', 'cte', + 'tet', 'etu', 'tur', 'cons', 'onse', 'nsec', 'sect', 'ecte', 'ctet', 'tetu', 'etur', 'ad', 'di', 'ip', 'pi', 'is', + 'sc', 'ci', 'in', 'ng', 'adi', 'dip', 'ipi', 'pis', 'isc', 'sci', 'cin', 'ing', 'adip', 'dipi', 'ipis', 'pisc', + 'isci', 'scin', 'cing', 'el', 'li', 'it', 'eli', 'lit', 'elit', 'Cr', 'ra', 'as', 'Cra', 'ras', 'Cras', 'co', 'on', + 'ns', 'se', 'ec', 'ct', 'te', 'et', 'tu', 'ur', 'con', 'ons', 'nse', 'sec', 'ect', 'cte', 'tet', 'etu', 'tur', + 'cons', 'onse', 'nsec', 'sect', 'ecte', 'ctet', 'tetu', 'etur', 'du', 'ui', 'dui', 'et', 'lo', 'ob', 'bo', 'or', + 'rt', 'ti', 'is', 'lob', 'obo', 'bor', 'ort', 'rti', 'tis', 'lobo', 'obor', 'bort', 'orti', 'rtis', 'au', 'uc', + 'ct', 'to', 'or', 'auc', 'uct', 'cto', 'tor', 'auct', 'ucto', 'ctor', 'Nu', 'ul', 'll', 'la', 'Nul', 'ull', 'lla', + 'Null', 'ulla', 'vi', 'it', 'ta', 'ae', 'vit', 'ita', 'tae', 'vita', 'itae', 'co', 'on', 'ng', 'gu', 'ue', 'con', + 'ong', 'ngu', 'gue', 'cong', 'ongu', 'ngue', 'lo', 'or', 're', 'em', 'lor', 'ore', 'rem', 'lore', 'orem', + ], + ], + [ + 2, 4, + $this->getUtf8Text(), + [ + '鋍鞎', '鞮鞢', '鞢騉', '鞮鞢騉', '袟袘', '袘觕', '袟袘觕', '炟砏', '謺貙', '貙蹖', '謺貙蹖', '偢偣', '偣唲', + '偢偣唲', '箷箯', '箯緷', '箷箯緷', '鑴鱱', '鱱爧', '鑴鱱爧', '覮轀', '剆坲', '煘煓', '煓瑐', '煘煓瑐', '鬐鶤', + '鶤鶐', '鬐鶤鶐', '飹勫', '勫嫢', '飹勫嫢', '枲柊', '柊氠', '枲柊氠', '鍎鞚', '鞚韕', '鍎鞚韕', '焲犈', '殍涾', + '涾烰', '殍涾烰', '齞齝', '齝囃', '齞齝囃', '蹅輶', '孻憵', '擙樲', '樲橚', '擙樲橚', '藒襓', '襓謥', '藒襓謥', + '岯岪', '岪弨', '岯岪弨', '廞徲', '孻憵', '憵懥', '孻憵懥', '趡趛', '趛踠', '趡趛踠', + ], + ], + ]; + } +} diff --git a/tests/Tokenization/NGramWordTokenizerTest.php b/tests/Tokenization/NGramWordTokenizerTest.php new file mode 100644 index 0000000..f9986d5 --- /dev/null +++ b/tests/Tokenization/NGramWordTokenizerTest.php @@ -0,0 +1,112 @@ +tokenize($text)); + } + + public function testMinGramGreaterThanMaxGramNotAllowed(): void + { + self::expectException(InvalidArgumentException::class); + + new NGramWordTokenizer(5, 2); + } + + public function testMinGramValueTooSmall(): void + { + self::expectException(InvalidArgumentException::class); + + new NGramWordTokenizer(0, 2); + } + + public function testMaxGramValueTooSmall(): void + { + self::expectException(InvalidArgumentException::class); + + new NGramWordTokenizer(1, 0); + } + + public function textDataProvider(): array + { + return [ + [ + 1, 1, + 'one two three four', + ['one', 'two', 'three', 'four'], + ], + [ + 1, 2, + 'one two three four', + ['one', 'two', 'three', 'four', 'one two', 'two three', 'three four'], + ], + [ + 1, 3, + 'one two three four', + ['one', 'two', 'three', 'four', 'one two', 'two three', 'three four', 'one two three', 'two three four'], + ], + [ + 2, 3, + 'one two three four', + ['one two', 'two three', 'three four', 'one two three', 'two three four'], + ], + [ + 1, 2, + '快狐跑过 边缘跑', + ['快狐跑过', '边缘跑', '快狐跑过 边缘跑'], + ], + [ + 2, 4, + $this->getSimpleText(), + [ + 'Lorem ipsum', 'ipsum dolor', 'dolor sit', 'sit amet', 'amet consectetur', 'consectetur adipiscing', + 'adipiscing elit', 'elit Cras', 'Cras consectetur', 'consectetur dui', 'dui et', 'et lobortis', + 'lobortis auctor', 'auctor Nulla', 'Nulla vitae', 'vitae congue', 'congue lorem', 'Lorem ipsum dolor', + 'ipsum dolor sit', 'dolor sit amet', 'sit amet consectetur', 'amet consectetur adipiscing', + 'consectetur adipiscing elit', 'adipiscing elit Cras', 'elit Cras consectetur', 'Cras consectetur dui', + 'consectetur dui et', 'dui et lobortis', 'et lobortis auctor', 'lobortis auctor Nulla', 'auctor Nulla vitae', + 'Nulla vitae congue', 'vitae congue lorem', 'Lorem ipsum dolor sit', 'ipsum dolor sit amet', + 'dolor sit amet consectetur', 'sit amet consectetur adipiscing', 'amet consectetur adipiscing elit', + 'consectetur adipiscing elit Cras', 'adipiscing elit Cras consectetur', 'elit Cras consectetur dui', + 'Cras consectetur dui et', 'consectetur dui et lobortis', 'dui et lobortis auctor', 'et lobortis auctor Nulla', + 'lobortis auctor Nulla vitae', 'auctor Nulla vitae congue', 'Nulla vitae congue lorem', + ], + ], + [ + 2, 4, + $this->getUtf8Text(), + [ + '鋍鞎 鞮鞢騉', '鞮鞢騉 袟袘觕', '袟袘觕 炟砏', '炟砏 謺貙蹖', '謺貙蹖 偢偣唲', '偢偣唲 箷箯緷', '箷箯緷 鑴鱱爧', '鑴鱱爧 覮轀', + '覮轀 剆坲', '剆坲 煘煓瑐', '煘煓瑐 鬐鶤鶐', '鬐鶤鶐 飹勫嫢', '飹勫嫢 枲柊氠', '枲柊氠 鍎鞚韕', '鍎鞚韕 焲犈', '焲犈 殍涾烰', + '殍涾烰 齞齝囃', '齞齝囃 蹅輶', '蹅輶 孻憵', '孻憵 擙樲橚', '擙樲橚 藒襓謥', '藒襓謥 岯岪弨', '岯岪弨 廞徲', '廞徲 孻憵懥', + '孻憵懥 趡趛踠', '鋍鞎 鞮鞢騉 袟袘觕', '鞮鞢騉 袟袘觕 炟砏', '袟袘觕 炟砏 謺貙蹖', '炟砏 謺貙蹖 偢偣唲', '謺貙蹖 偢偣唲 箷箯緷', + '偢偣唲 箷箯緷 鑴鱱爧', '箷箯緷 鑴鱱爧 覮轀', '鑴鱱爧 覮轀 剆坲', '覮轀 剆坲 煘煓瑐', '剆坲 煘煓瑐 鬐鶤鶐', '煘煓瑐 鬐鶤鶐 飹勫嫢', + '鬐鶤鶐 飹勫嫢 枲柊氠', '飹勫嫢 枲柊氠 鍎鞚韕', '枲柊氠 鍎鞚韕 焲犈', '鍎鞚韕 焲犈 殍涾烰', '焲犈 殍涾烰 齞齝囃', '殍涾烰 齞齝囃 蹅輶', + '齞齝囃 蹅輶 孻憵', '蹅輶 孻憵 擙樲橚', '孻憵 擙樲橚 藒襓謥', '擙樲橚 藒襓謥 岯岪弨', '藒襓謥 岯岪弨 廞徲', '岯岪弨 廞徲 孻憵懥', + '廞徲 孻憵懥 趡趛踠', '鋍鞎 鞮鞢騉 袟袘觕 炟砏', '鞮鞢騉 袟袘觕 炟砏 謺貙蹖', '袟袘觕 炟砏 謺貙蹖 偢偣唲', '炟砏 謺貙蹖 偢偣唲 箷箯緷', + '謺貙蹖 偢偣唲 箷箯緷 鑴鱱爧', '偢偣唲 箷箯緷 鑴鱱爧 覮轀', '箷箯緷 鑴鱱爧 覮轀 剆坲', '鑴鱱爧 覮轀 剆坲 煘煓瑐', + '覮轀 剆坲 煘煓瑐 鬐鶤鶐', '剆坲 煘煓瑐 鬐鶤鶐 飹勫嫢', '煘煓瑐 鬐鶤鶐 飹勫嫢 枲柊氠', '鬐鶤鶐 飹勫嫢 枲柊氠 鍎鞚韕', + '飹勫嫢 枲柊氠 鍎鞚韕 焲犈', '枲柊氠 鍎鞚韕 焲犈 殍涾烰', '鍎鞚韕 焲犈 殍涾烰 齞齝囃', '焲犈 殍涾烰 齞齝囃 蹅輶', + '殍涾烰 齞齝囃 蹅輶 孻憵', '齞齝囃 蹅輶 孻憵 擙樲橚', '蹅輶 孻憵 擙樲橚 藒襓謥', '孻憵 擙樲橚 藒襓謥 岯岪弨', '擙樲橚 藒襓謥 岯岪弨 廞徲', + '藒襓謥 岯岪弨 廞徲 孻憵懥', '岯岪弨 廞徲 孻憵懥 趡趛踠', + ], + ], + ]; + } +} diff --git a/tests/Tokenization/TokenizerTest.php b/tests/Tokenization/TokenizerTest.php new file mode 100644 index 0000000..5d0833c --- /dev/null +++ b/tests/Tokenization/TokenizerTest.php @@ -0,0 +1,24 @@ +tokenize($this->getSimpleText())); + } + + public function testTokenizationOnUtf8(): void + { + $tokenizer = new WhitespaceTokenizer(); + + $tokens = ['鋍鞎', '鳼', '鞮鞢騉', '袟袘觕,', '炟砏', '蒮', '謺貙蹖', '偢偣唲', '蒛', '箷箯緷', '鑴鱱爧', '覮轀,', + '剆坲', '煘煓瑐', '鬐鶤鶐', '飹勫嫢', '銪', '餀', '枲柊氠', '鍎鞚韕', '焲犈,', + '殍涾烰', '齞齝囃', '蹅輶', '鄜,', '孻憵', '擙樲橚', '藒襓謥', '岯岪弨', '蒮', '廞徲', '孻憵懥', '趡趛踠', '槏', ]; + + self::assertEquals($tokens, $tokenizer->tokenize($this->getUtf8Text())); + } +} diff --git a/tests/Tokenization/WordTokenizerTest.php b/tests/Tokenization/WordTokenizerTest.php new file mode 100644 index 0000000..9c55dd6 --- /dev/null +++ b/tests/Tokenization/WordTokenizerTest.php @@ -0,0 +1,32 @@ +tokenize($this->getSimpleText())); + } + + public function testTokenizationOnUtf8(): void + { + $tokenizer = new WordTokenizer(); + + $tokens = ['鋍鞎', '鞮鞢騉', '袟袘觕', '炟砏', '謺貙蹖', '偢偣唲', '箷箯緷', '鑴鱱爧', '覮轀', + '剆坲', '煘煓瑐', '鬐鶤鶐', '飹勫嫢', '枲柊氠', '鍎鞚韕', '焲犈', + '殍涾烰', '齞齝囃', '蹅輶', '孻憵', '擙樲橚', '藒襓謥', '岯岪弨', '廞徲', '孻憵懥', '趡趛踠', ]; + + self::assertEquals($tokens, $tokenizer->tokenize($this->getUtf8Text())); + } +} diff --git a/tests/Tree/Node/BinaryNodeTest.php b/tests/Tree/Node/BinaryNodeTest.php new file mode 100644 index 0000000..43db418 --- /dev/null +++ b/tests/Tree/Node/BinaryNodeTest.php @@ -0,0 +1,47 @@ +height()); + self::assertEquals(0, $node->balance()); + } + + public function testAttachDetachLeft(): void + { + $node = new BinaryNode(); + $node->attachLeft(new BinaryNode()); + + self::assertEquals(2, $node->height()); + self::assertEquals(-1, $node->balance()); + + $node->detachLeft(); + + self::assertEquals(1, $node->height()); + self::assertEquals(0, $node->balance()); + } + + public function testAttachDetachRight(): void + { + $node = new BinaryNode(); + $node->attachRight(new BinaryNode()); + + self::assertEquals(2, $node->height()); + self::assertEquals(1, $node->balance()); + + $node->detachRight(); + + self::assertEquals(1, $node->height()); + self::assertEquals(0, $node->balance()); + } +} diff --git a/tests/Tree/Node/DecisionNodeTest.php b/tests/Tree/Node/DecisionNodeTest.php new file mode 100644 index 0000000..2db3482 --- /dev/null +++ b/tests/Tree/Node/DecisionNodeTest.php @@ -0,0 +1,57 @@ +column()); + self::assertEquals(2, $node->samplesCount()); + } + + public function testImpurityIncrease(): void + { + $node = new DecisionNode(2, 4, [ + [[[1, 2, 3]], [1]], + [[[2, 3, 4]], [2]], + ], 400); + + $node->attachRight(new DecisionNode(2, 4, [ + [[[1, 2, 3]], [1]], + [[[2, 3, 4]], [2]], + ], 200)); + + $node->attachLeft(new DecisionNode(2, 4, [ + [[[1, 2, 3]], [1]], + [[[2, 3, 4]], [2]], + ], 100)); + + self::assertEquals(100, $node->purityIncrease()); + } + + public function testThrowExceptionOnInvalidGroupsCount(): void + { + $this->expectException(InvalidArgumentException::class); + + new DecisionNode(2, 3, [], 200); + } + + public function testThrowExceptionOnInvalidImpurity(): void + { + $this->expectException(InvalidArgumentException::class); + + new DecisionNode(2, 3, [[], []], -2); + } +} diff --git a/tools/php-cs-fixer.sh b/tools/php-cs-fixer.sh deleted file mode 100755 index dbf66e4..0000000 --- a/tools/php-cs-fixer.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -echo "Fixing src/ folder" -php-cs-fixer fix src/ --level=symfony - -echo "Fixing tests/ folder" -php-cs-fixer fix tests/ --level=symfony diff --git a/var/.gitkeep b/var/.gitkeep new file mode 100644 index 0000000..e69de29