mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-25 14:27:34 +00:00
add docs for Pipeline
This commit is contained in:
parent
cf0970c2ac
commit
bb35d045ba
@ -48,6 +48,8 @@ composer require php-ai/php-ml
|
||||
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan/)
|
||||
* Metric
|
||||
* [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/)
|
||||
* Workflow
|
||||
* [Pipeline](http://php-ml.readthedocs.io/en/latest/machine-learning/workflow/pipeline)
|
||||
* Cross Validation
|
||||
* [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split/)
|
||||
* [Stratified Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/stratified-random-split/)
|
||||
|
@ -38,35 +38,36 @@ composer require php-ai/php-ml
|
||||
|
||||
* Classification
|
||||
* [SVC](machine-learning/classification/svc/)
|
||||
* [k-Nearest Neighbors](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/k-nearest-neighbors/)
|
||||
* [Naive Bayes](http://php-ml.readthedocs.io/en/latest/machine-learning/classification/naive-bayes/)
|
||||
* [k-Nearest Neighbors](machine-learning/classification/k-nearest-neighbors/)
|
||||
* [Naive Bayes](machine-learning/classification/naive-bayes/)
|
||||
* Regression
|
||||
* [Least Squares](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/least-squares/)
|
||||
* [SVR](http://php-ml.readthedocs.io/en/latest/machine-learning/regression/svr/)
|
||||
* [Least Squares](machine-learning/regression/least-squares/)
|
||||
* [SVR](machine-learning/regression/svr/)
|
||||
* Clustering
|
||||
* [k-Means](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/k-means/)
|
||||
* [DBSCAN](http://php-ml.readthedocs.io/en/latest/machine-learning/clustering/dbscan/)
|
||||
*
|
||||
* [k-Means](machine-learning/clustering/k-means/)
|
||||
* [DBSCAN](machine-learning/clustering/dbscan/)
|
||||
* Metric
|
||||
* [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/)
|
||||
* [Accuracy](machine-learning/metric/accuracy/)
|
||||
* Workflow
|
||||
* [Pipeline](machine-learning/workflow/pipeline)
|
||||
* Cross Validation
|
||||
* [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split/)
|
||||
* [Stratified Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/stratified-random-split/)
|
||||
* [Random Split](machine-learning/cross-validation/random-split/)
|
||||
* [Stratified Random Split](machine-learning/cross-validation/stratified-random-split/)
|
||||
* Preprocessing
|
||||
* [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization/)
|
||||
* [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/)
|
||||
* [Normalization](machine-learning/preprocessing/normalization/)
|
||||
* [Imputation missing values](machine-learning/preprocessing/imputation-missing-values/)
|
||||
* Feature Extraction
|
||||
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
|
||||
* [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)
|
||||
* Datasets
|
||||
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
|
||||
* [CSV](machine-learning/datasets/csv-dataset/)
|
||||
* Ready to use:
|
||||
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
|
||||
* [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/)
|
||||
* [Glass](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/glass/)
|
||||
* [Iris](machine-learning/datasets/demo/iris/)
|
||||
* [Wine](machine-learning/datasets/demo/wine/)
|
||||
* [Glass](machine-learning/datasets/demo/glass/)
|
||||
* Math
|
||||
* [Distance](http://php-ml.readthedocs.io/en/latest/math/distance/)
|
||||
* [Matrix](http://php-ml.readthedocs.io/en/latest/math/matrix/)
|
||||
* [Statistic](http://php-ml.readthedocs.io/en/latest/math/statistic/)
|
||||
* [Distance](math/distance/)
|
||||
* [Matrix](math/matrix/)
|
||||
* [Statistic](math/statistic/)
|
||||
|
||||
|
||||
## Contribute
|
||||
|
65
docs/machine-learning/workflow/pipeline.md
Normal file
65
docs/machine-learning/workflow/pipeline.md
Normal file
@ -0,0 +1,65 @@
|
||||
# Pipeline
|
||||
|
||||
In machine learning, it is common to run a sequence of algorithms to process and learn from dataset. For example:
|
||||
|
||||
* Split each document’s text into tokens.
|
||||
* Convert each document’s words into a numerical feature vector ([Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)).
|
||||
* Learn a prediction model using the feature vectors and labels.
|
||||
|
||||
PHP-ML represents such a workflow as a Pipeline, which consists sequence of transformers and a estimator.
|
||||
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
* $transformers (array|Transformer[]) - sequence of objects that implements Transformer interface
|
||||
* $estimator (Estimator) - estimator that can train and predict
|
||||
|
||||
```
|
||||
use Phpml\Classification\SVC;
|
||||
use Phpml\FeatureExtraction\TfIdfTransformer;
|
||||
use Phpml\Pipeline;
|
||||
|
||||
$transformers = [
|
||||
new TfIdfTransformer(),
|
||||
];
|
||||
$estimator = new SVC();
|
||||
|
||||
$pipeline = new Pipeline($transformers, $estimator);
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
First our pipeline replace missing value, then normalize samples and finally train SVC estimator. Thus prepared pipeline repeats each transformation step for predicted sample.
|
||||
|
||||
```
|
||||
use Phpml\Classification\SVC;
|
||||
use Phpml\Pipeline;
|
||||
use Phpml\Preprocessing\Imputer;
|
||||
use Phpml\Preprocessing\Normalizer;
|
||||
use Phpml\Preprocessing\Imputer\Strategy\MostFrequentStrategy;
|
||||
|
||||
$transformers = [
|
||||
new Imputer(null, new MostFrequentStrategy()),
|
||||
new Normalizer(),
|
||||
];
|
||||
$estimator = new SVC();
|
||||
|
||||
$samples = [
|
||||
[1, -1, 2],
|
||||
[2, 0, null],
|
||||
[null, 1, -1],
|
||||
];
|
||||
|
||||
$targets = [
|
||||
4,
|
||||
1,
|
||||
4,
|
||||
];
|
||||
|
||||
$pipeline = new Pipeline($transformers, $estimator);
|
||||
$pipeline->train($samples, $targets);
|
||||
|
||||
$predicted = $pipeline->predict([[0, 0, 0]]);
|
||||
|
||||
// $predicted == 4
|
||||
```
|
@ -14,6 +14,8 @@ pages:
|
||||
- DBSCAN: machine-learning/clustering/dbscan.md
|
||||
- Metric:
|
||||
- Accuracy: machine-learning/metric/accuracy.md
|
||||
- Workflow:
|
||||
- Pipeline: machine-learning/workflow/pipeline.md
|
||||
- Cross Validation:
|
||||
- RandomSplit: machine-learning/cross-validation/random-split.md
|
||||
- Stratified Random Split: machine-learning/cross-validation/stratified-random-split.md
|
||||
|
Loading…
Reference in New Issue
Block a user