mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-21 20:45:10 +00:00
Updates to the documentation (linguistic corrections) (#414)
* Fix typo in Features list * Update distance.md documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation
This commit is contained in:
parent
f30e576c70
commit
7d5c6b15a4
@ -15,7 +15,7 @@ $associator = new Apriori($support = 0.5, $confidence = 0.5);
|
|||||||
|
|
||||||
### Train
|
### Train
|
||||||
|
|
||||||
To train a associator simply provide train samples and labels (as `array`). Example:
|
To train an associator, simply provide train samples and labels (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']];
|
$samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']];
|
||||||
@ -31,7 +31,7 @@ You can train the associator using multiple data sets, predictions will be based
|
|||||||
|
|
||||||
### Predict
|
### Predict
|
||||||
|
|
||||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
```
|
```
|
||||||
$associator->predict(['alpha','theta']);
|
$associator->predict(['alpha','theta']);
|
||||||
@ -43,7 +43,7 @@ $associator->predict([['alpha','epsilon'],['beta','theta']]);
|
|||||||
|
|
||||||
### Associating
|
### Associating
|
||||||
|
|
||||||
Get generated association rules simply use `rules` method.
|
To get generated association rules, simply use the `rules` method.
|
||||||
|
|
||||||
```
|
```
|
||||||
$associator->getRules();
|
$associator->getRules();
|
||||||
@ -52,7 +52,7 @@ $associator->getRules();
|
|||||||
|
|
||||||
### Frequent item sets
|
### Frequent item sets
|
||||||
|
|
||||||
Generating k-length frequent item sets simply use `apriori` method.
|
To generate k-length frequent item sets, simply use the `apriori` method.
|
||||||
|
|
||||||
```
|
```
|
||||||
$associator->apriori();
|
$associator->apriori();
|
||||||
|
@ -14,7 +14,7 @@ $classifier = new KNearestNeighbors($k=3, new Minkowski($lambda=4));
|
|||||||
|
|
||||||
## Train
|
## Train
|
||||||
|
|
||||||
To train a classifier simply provide train samples and labels (as `array`). Example:
|
To train a classifier, simply provide train samples and labels (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||||
@ -28,7 +28,7 @@ You can train the classifier using multiple data sets, predictions will be based
|
|||||||
|
|
||||||
## Predict
|
## Predict
|
||||||
|
|
||||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
```
|
```
|
||||||
$classifier->predict([3, 2]);
|
$classifier->predict([3, 2]);
|
||||||
|
@ -4,7 +4,7 @@ Classifier based on applying Bayes' theorem with strong (naive) independence ass
|
|||||||
|
|
||||||
### Train
|
### Train
|
||||||
|
|
||||||
To train a classifier simply provide train samples and labels (as `array`). Example:
|
To train a classifier, simply provide train samples and labels (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||||
@ -18,7 +18,7 @@ You can train the classifier using multiple data sets, predictions will be based
|
|||||||
|
|
||||||
### Predict
|
### Predict
|
||||||
|
|
||||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
```
|
```
|
||||||
$classifier->predict([3, 1, 1]);
|
$classifier->predict([3, 1, 1]);
|
||||||
|
@ -21,7 +21,7 @@ $classifier = new SVC(Kernel::RBF, $cost = 1000, $degree = 3, $gamma = 6);
|
|||||||
|
|
||||||
### Train
|
### Train
|
||||||
|
|
||||||
To train a classifier simply provide train samples and labels (as `array`). Example:
|
To train a classifier, simply provide train samples and labels (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
use Phpml\Classification\SVC;
|
use Phpml\Classification\SVC;
|
||||||
@ -38,7 +38,7 @@ You can train the classifier using multiple data sets, predictions will be based
|
|||||||
|
|
||||||
### Predict
|
### Predict
|
||||||
|
|
||||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
```
|
```
|
||||||
$classifier->predict([3, 2]);
|
$classifier->predict([3, 2]);
|
||||||
@ -74,7 +74,7 @@ $classifier = new SVC(
|
|||||||
$classifier->train($samples, $labels);
|
$classifier->train($samples, $labels);
|
||||||
```
|
```
|
||||||
|
|
||||||
Then use `predictProbability` method instead of `predict`:
|
Then use the `predictProbability` method instead of `predict`:
|
||||||
|
|
||||||
```
|
```
|
||||||
$classifier->predictProbability([3, 2]);
|
$classifier->predictProbability([3, 2]);
|
||||||
|
@ -16,12 +16,12 @@ $dbscan = new DBSCAN($epsilon = 2, $minSamples = 3, new Minkowski($lambda=4));
|
|||||||
|
|
||||||
### Clustering
|
### Clustering
|
||||||
|
|
||||||
To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside.
|
To divide the samples into clusters, simply use the `cluster` method. It returns the `array` of clusters with samples inside.
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||||
|
|
||||||
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3);
|
$dbscan = new DBSCAN($epsilon = 2, $minSamples = 3);
|
||||||
$dbscan->cluster($samples);
|
$dbscan->cluster($samples);
|
||||||
// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]]
|
// return [0=>[[1, 1], ...], 1=>[[8, 7], ...]]
|
||||||
```
|
```
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# K-means clustering
|
# K-means clustering
|
||||||
|
|
||||||
The K-Means algorithm clusters data by trying to separate samples in n groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares.
|
The K-Means algorithm clusters data by trying to separate samples in n groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares.
|
||||||
This algorithm requires the number of clusters to be specified.
|
This algorithm requires the number of clusters to be specified.
|
||||||
|
|
||||||
### Constructor Parameters
|
### Constructor Parameters
|
||||||
@ -15,11 +15,11 @@ $kmeans = new KMeans(4, KMeans::INIT_RANDOM);
|
|||||||
|
|
||||||
### Clustering
|
### Clustering
|
||||||
|
|
||||||
To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside.
|
To divide the samples into clusters, simply use the `cluster` method. It returns the `array` of clusters with samples inside.
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||||
Or if you need to keep your indentifiers along with yours samples you can use array keys as labels.
|
Or if you need to keep your identifiers along with yours samples you can use array keys as labels.
|
||||||
$samples = [ 'Label1' => [1, 1], 'Label2' => [8, 7], 'Label3' => [1, 2]];
|
$samples = [ 'Label1' => [1, 1], 'Label2' => [8, 7], 'Label3' => [1, 2]];
|
||||||
|
|
||||||
$kmeans = new KMeans(2);
|
$kmeans = new KMeans(2);
|
||||||
@ -32,8 +32,8 @@ $kmeans->cluster($samples);
|
|||||||
#### kmeans++ (default)
|
#### kmeans++ (default)
|
||||||
|
|
||||||
K-means++ method selects initial cluster centers for k-mean clustering in a smart way to speed up convergence.
|
K-means++ method selects initial cluster centers for k-mean clustering in a smart way to speed up convergence.
|
||||||
It use the DASV seeding method consists of finding good initial centroids for the clusters.
|
It uses the DASV seeding method consists of finding good initial centroids for the clusters.
|
||||||
|
|
||||||
#### random
|
#### random
|
||||||
|
|
||||||
Random initialization method chooses completely random centroid. It get the space boundaries to avoid placing clusters centroid too far from samples data.
|
Random initialization method chooses completely random centroid. It gets the space boundaries to avoid placing cluster centroids too far from samples data.
|
||||||
|
@ -1,20 +1,20 @@
|
|||||||
# Random Split
|
# Random Split
|
||||||
|
|
||||||
One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust number of samples in each group.
|
One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust the number of samples in each group.
|
||||||
|
|
||||||
### Constructor Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $dataset - object that implements `Dataset` interface
|
* $dataset - object that implements `Dataset` interface
|
||||||
* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3)
|
* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3)
|
||||||
* $seed - seed for random generator (e.g. for tests)
|
* $seed - seed for random generator (e.g. for tests)
|
||||||
|
|
||||||
```
|
```
|
||||||
$randomSplit = new RandomSplit($dataset, 0.2);
|
$randomSplit = new RandomSplit($dataset, 0.2);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Samples and labels groups
|
### Samples and labels groups
|
||||||
|
|
||||||
To get samples or labels from test and train group you can use getters:
|
To get samples or labels from test and train group, you can use getters:
|
||||||
|
|
||||||
```
|
```
|
||||||
$dataset = new RandomSplit($dataset, 0.3, 1234);
|
$dataset = new RandomSplit($dataset, 0.3, 1234);
|
||||||
|
@ -1,22 +1,22 @@
|
|||||||
# Stratified Random Split
|
# Stratified Random Split
|
||||||
|
|
||||||
Analogously to `RandomSpilt` class samples are split to two groups: train group and test group.
|
Analogously to `RandomSpilt` class, samples are split to two groups: train group and test group.
|
||||||
Distribution of samples takes into account their targets and trying to divide them equally.
|
Distribution of samples takes into account their targets and trying to divide them equally.
|
||||||
You can adjust number of samples in each group.
|
You can adjust the number of samples in each group.
|
||||||
|
|
||||||
### Constructor Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $dataset - object that implements `Dataset` interface
|
* $dataset - object that implements `Dataset` interface
|
||||||
* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3)
|
* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3)
|
||||||
* $seed - seed for random generator (e.g. for tests)
|
* $seed - seed for random generator (e.g. for tests)
|
||||||
|
|
||||||
```
|
```
|
||||||
$split = new StratifiedRandomSplit($dataset, 0.2);
|
$split = new StratifiedRandomSplit($dataset, 0.2);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Samples and labels groups
|
### Samples and labels groups
|
||||||
|
|
||||||
To get samples or labels from test and train group you can use getters:
|
To get samples or labels from test and train group, you can use getters:
|
||||||
|
|
||||||
```
|
```
|
||||||
$dataset = new StratifiedRandomSplit($dataset, 0.3, 1234);
|
$dataset = new StratifiedRandomSplit($dataset, 0.3, 1234);
|
||||||
@ -41,4 +41,4 @@ $dataset = new ArrayDataset(
|
|||||||
$split = new StratifiedRandomSplit($dataset, 0.5);
|
$split = new StratifiedRandomSplit($dataset, 0.5);
|
||||||
```
|
```
|
||||||
|
|
||||||
Split will have equals amount of each target. Two of the target `a` and two of `b`.
|
Split will have equal amounts of each target. Two of the target `a` and two of `b`.
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Helper class that holds data as PHP `array` type. Implements the `Dataset` interface which is used heavily in other classes.
|
Helper class that holds data as PHP `array` type. Implements the `Dataset` interface which is used heavily in other classes.
|
||||||
|
|
||||||
### Constructors Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $samples - (array) of samples
|
* $samples - (array) of samples
|
||||||
* $labels - (array) of labels
|
* $labels - (array) of labels
|
||||||
@ -15,7 +15,7 @@ $dataset = new ArrayDataset([[1, 1], [2, 1], [3, 2], [4, 1]], ['a', 'a', 'b', 'b
|
|||||||
|
|
||||||
### Samples and labels
|
### Samples and labels
|
||||||
|
|
||||||
To get samples or labels you can use getters:
|
To get samples or labels, you can use getters:
|
||||||
|
|
||||||
```
|
```
|
||||||
$dataset->getSamples();
|
$dataset->getSamples();
|
||||||
@ -24,7 +24,7 @@ $dataset->getTargets();
|
|||||||
|
|
||||||
### Remove columns
|
### Remove columns
|
||||||
|
|
||||||
You can remove columns by index numbers, for example:
|
You can remove columns by their index numbers, for example:
|
||||||
|
|
||||||
```
|
```
|
||||||
use Phpml\Dataset\ArrayDataset;
|
use Phpml\Dataset\ArrayDataset;
|
||||||
|
@ -2,11 +2,11 @@
|
|||||||
|
|
||||||
Helper class that loads data from CSV file. It extends the `ArrayDataset`.
|
Helper class that loads data from CSV file. It extends the `ArrayDataset`.
|
||||||
|
|
||||||
### Constructors Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $filepath - (string) path to `.csv` file
|
* $filepath - (string) path to `.csv` file
|
||||||
* $features - (int) number of columns that are features (starts from first column), last column must be a label
|
* $features - (int) number of columns that are features (starts from first column), last column must be a label
|
||||||
* $headingRow - (bool) define is file have a heading row (if `true` then first row will be ignored)
|
* $headingRow - (bool) define if the file has a heading row (if `true` then first row will be ignored)
|
||||||
|
|
||||||
```
|
```
|
||||||
$dataset = new CsvDataset('dataset.csv', 2, true);
|
$dataset = new CsvDataset('dataset.csv', 2, true);
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Helper class that loads dataset from files. Use folder names as targets. It extends the `ArrayDataset`.
|
Helper class that loads dataset from files. Use folder names as targets. It extends the `ArrayDataset`.
|
||||||
|
|
||||||
### Constructors Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $rootPath - (string) path to root folder that contains files dataset
|
* $rootPath - (string) path to root folder that contains files dataset
|
||||||
|
|
||||||
@ -42,7 +42,7 @@ data
|
|||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
Load files data with `FilesDataset`:
|
Load files data with `FilesDataset`:
|
||||||
|
|
||||||
```
|
```
|
||||||
use Phpml\Dataset\FilesDataset;
|
use Phpml\Dataset\FilesDataset;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# MnistDataset
|
# MnistDataset
|
||||||
|
|
||||||
Helper class that load data from MNIST dataset: [http://yann.lecun.com/exdb/mnist/](http://yann.lecun.com/exdb/mnist/)
|
Helper class that loads data from MNIST dataset: [http://yann.lecun.com/exdb/mnist/](http://yann.lecun.com/exdb/mnist/)
|
||||||
|
|
||||||
> The MNIST database of handwritten digits, available from this page, has a training set of 60,000 examples, and a test set of 10,000 examples. It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image.
|
> The MNIST database of handwritten digits, available from this page, has a training set of 60,000 examples, and a test set of 10,000 examples. It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image.
|
||||||
It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting.
|
It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting.
|
||||||
@ -18,7 +18,7 @@ $trainDataset = new MnistDataset('train-images-idx3-ubyte', 'train-labels-idx1-u
|
|||||||
|
|
||||||
### Samples and labels
|
### Samples and labels
|
||||||
|
|
||||||
To get samples or labels you can use getters:
|
To get samples or labels, you can use getters:
|
||||||
|
|
||||||
```
|
```
|
||||||
$dataset->getSamples();
|
$dataset->getSamples();
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Helper class that loads data from SVM-Light format file. It extends the `ArrayDataset`.
|
Helper class that loads data from SVM-Light format file. It extends the `ArrayDataset`.
|
||||||
|
|
||||||
### Constructors Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $filepath - (string) path to the file
|
* $filepath - (string) path to the file
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ $transformer = new TfIdfTransformer($samples);
|
|||||||
|
|
||||||
### Transformation
|
### Transformation
|
||||||
|
|
||||||
To transform a collection of text samples use `transform` method. Example:
|
To transform a collection of text samples, use the `transform` method. Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
use Phpml\FeatureExtraction\TfIdfTransformer;
|
use Phpml\FeatureExtraction\TfIdfTransformer;
|
||||||
@ -28,7 +28,7 @@ $samples = [
|
|||||||
[0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 0, 5 => 0],
|
[0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 0, 5 => 0],
|
||||||
[0 => 1, 1 => 1, 2 => 0, 3 => 0, 4 => 2, 5 => 3],
|
[0 => 1, 1 => 1, 2 => 0, 3 => 0, 4 => 2, 5 => 3],
|
||||||
];
|
];
|
||||||
|
|
||||||
$transformer = new TfIdfTransformer($samples);
|
$transformer = new TfIdfTransformer($samples);
|
||||||
$transformer->transform($samples);
|
$transformer->transform($samples);
|
||||||
|
|
||||||
@ -38,5 +38,5 @@ $samples = [
|
|||||||
[0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 0.602, 5 => 0.903],
|
[0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 0.602, 5 => 0.903],
|
||||||
];
|
];
|
||||||
*/
|
*/
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -16,7 +16,7 @@ $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
|||||||
|
|
||||||
### Transformation
|
### Transformation
|
||||||
|
|
||||||
To transform a collection of text samples use `transform` method. Example:
|
To transform a collection of text samples, use the `transform` method. Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [
|
$samples = [
|
||||||
@ -42,7 +42,7 @@ $vectorizer->transform($samples);
|
|||||||
|
|
||||||
### Vocabulary
|
### Vocabulary
|
||||||
|
|
||||||
You can extract vocabulary using `getVocabulary()` method. Example:
|
You can extract vocabulary using the `getVocabulary()` method. Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$vectorizer->getVocabulary();
|
$vectorizer->getVocabulary();
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
## Constructor Parameters
|
## Constructor Parameters
|
||||||
|
|
||||||
* $k (int) - number of top features to select, rest will be removed (default: 10)
|
* $k (int) - number of top features to select, rest will be removed (default: 10)
|
||||||
* $scoringFunction (ScoringFunction) - function that take samples and targets and return array with scores (default: ANOVAFValue)
|
* $scoringFunction (ScoringFunction) - function that takes samples and targets and returns an array with scores (default: ANOVAFValue)
|
||||||
|
|
||||||
```php
|
```php
|
||||||
use Phpml\FeatureSelection\SelectKBest;
|
use Phpml\FeatureSelection\SelectKBest;
|
||||||
@ -27,13 +27,13 @@ $selector->fit($samples = $dataset->getSamples(), $dataset->getTargets());
|
|||||||
$selector->transform($samples);
|
$selector->transform($samples);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
$samples[0] = [1.4, 0.2];
|
$samples[0] = [1.4, 0.2];
|
||||||
*/
|
*/
|
||||||
```
|
```
|
||||||
|
|
||||||
## Scores
|
## Scores
|
||||||
|
|
||||||
You can get a array with the calculated score for each feature.
|
You can get an array with the calculated score for each feature.
|
||||||
A higher value means that a given feature is better suited for learning.
|
A higher value means that a given feature is better suited for learning.
|
||||||
Of course, the rating depends on the scoring function used.
|
Of course, the rating depends on the scoring function used.
|
||||||
|
|
||||||
@ -56,7 +56,7 @@ $selector->scores();
|
|||||||
float(1179.0343277002)
|
float(1179.0343277002)
|
||||||
[3]=>
|
[3]=>
|
||||||
float(959.32440572573)
|
float(959.32440572573)
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -70,11 +70,11 @@ For classification:
|
|||||||
The test is applied to samples from two or more groups, possibly with differing sizes.
|
The test is applied to samples from two or more groups, possibly with differing sizes.
|
||||||
|
|
||||||
For regression:
|
For regression:
|
||||||
- **UnivariateLinearRegression**
|
- **UnivariateLinearRegression**
|
||||||
Quick linear model for testing the effect of a single regressor, sequentially for many regressors.
|
Quick linear model for testing the effect of a single regressor, sequentially for many regressors.
|
||||||
This is done in 2 steps:
|
This is done in 2 steps:
|
||||||
- 1. The cross correlation between each regressor and the target is computed, that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)).
|
- 1. The cross correlation between each regressor and the target is computed, that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)).
|
||||||
- 2. It is converted to an F score
|
- 2. It is converted to an F score
|
||||||
|
|
||||||
## Pipeline
|
## Pipeline
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# Variance Threshold
|
# Variance Threshold
|
||||||
|
|
||||||
`VarianceThreshold` is a simple baseline approach to feature selection.
|
`VarianceThreshold` is a simple baseline approach to feature selection.
|
||||||
It removes all features whose variance doesn’t meet some threshold.
|
It removes all features whose variance doesn’t meet some threshold.
|
||||||
By default, it removes all zero-variance features, i.e. features that have the same value in all samples.
|
By default, it removes all zero-variance features, i.e. features that have the same value in all samples.
|
||||||
|
|
||||||
## Constructor Parameters
|
## Constructor Parameters
|
||||||
@ -16,10 +16,10 @@ $transformer = new VarianceThreshold(0.15);
|
|||||||
|
|
||||||
## Example of use
|
## Example of use
|
||||||
|
|
||||||
As an example, suppose that we have a dataset with boolean features and
|
As an example, suppose that we have a dataset with boolean features and
|
||||||
we want to remove all features that are either one or zero (on or off)
|
we want to remove all features that are either one or zero (on or off)
|
||||||
in more than 80% of the samples.
|
in more than 80% of the samples.
|
||||||
Boolean features are Bernoulli random variables, and the variance of such
|
Boolean features are Bernoulli random variables, and the variance of such
|
||||||
variables is given by
|
variables is given by
|
||||||
```
|
```
|
||||||
Var[X] = p(1 - p)
|
Var[X] = p(1 - p)
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
# Accuracy
|
# Accuracy
|
||||||
|
|
||||||
Class for calculate classifier accuracy.
|
Class for calculating classifier accuracy.
|
||||||
|
|
||||||
### Score
|
### Score
|
||||||
|
|
||||||
To calculate classifier accuracy score use `score` static method. Parameters:
|
To calculate classifier accuracy score, use the `score` static method. Parameters:
|
||||||
|
|
||||||
* $actualLabels - (array) true sample labels
|
* $actualLabels - (array) true sample labels
|
||||||
* $predictedLabels - (array) predicted labels (e.x. from test group)
|
* $predictedLabels - (array) predicted labels (e.x. from test group)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Classification Report
|
# Classification Report
|
||||||
|
|
||||||
Class for calculate main classifier metrics: precision, recall, F1 score and support.
|
Class for calculating main classifier metrics: precision, recall, F1 score and support.
|
||||||
|
|
||||||
### Report
|
### Report
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Confusion Matrix
|
# Confusion Matrix
|
||||||
|
|
||||||
Class for compute confusion matrix to evaluate the accuracy of a classification.
|
Class for computing confusion matrix to evaluate the accuracy of a classification.
|
||||||
|
|
||||||
### Example (all targets)
|
### Example (all targets)
|
||||||
|
|
||||||
|
@ -39,8 +39,7 @@ $mlp = new MLPClassifier(4, [$layer1, $layer2], ['a', 'b', 'c']);
|
|||||||
|
|
||||||
## Train
|
## Train
|
||||||
|
|
||||||
To train a MLP simply provide train samples and labels (as array). Example:
|
To train a MLP, simply provide train samples and labels (as array). Example:
|
||||||
|
|
||||||
|
|
||||||
```
|
```
|
||||||
$mlp->train(
|
$mlp->train(
|
||||||
@ -71,7 +70,7 @@ $mlp->setLearningRate(0.1);
|
|||||||
|
|
||||||
## Predict
|
## Predict
|
||||||
|
|
||||||
To predict sample label use predict method. You can provide one sample or array of samples:
|
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
```
|
```
|
||||||
$mlp->predict([[1, 1, 1, 1], [0, 0, 0, 0]]);
|
$mlp->predict([[1, 1, 1, 1], [0, 0, 0, 0]]);
|
||||||
|
@ -49,7 +49,7 @@ $data = [
|
|||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also use `$samples` constructer parameter instead of `fit` method:
|
You can also use the `$samples` constructor parameter instead of the `fit` method:
|
||||||
|
|
||||||
```
|
```
|
||||||
use Phpml\Preprocessing\Imputer;
|
use Phpml\Preprocessing\Imputer;
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
# LeastSquares Linear Regression
|
# LeastSquares Linear Regression
|
||||||
|
|
||||||
Linear model that use least squares method to approximate solution.
|
Linear model that uses least squares method to approximate solution.
|
||||||
|
|
||||||
### Train
|
### Train
|
||||||
|
|
||||||
To train a model simply provide train samples and targets values (as `array`). Example:
|
To train a model, simply provide train samples and targets values (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[60], [61], [62], [63], [65]];
|
$samples = [[60], [61], [62], [63], [65]];
|
||||||
@ -18,7 +18,7 @@ You can train the model using multiple data sets, predictions will be based on a
|
|||||||
|
|
||||||
### Predict
|
### Predict
|
||||||
|
|
||||||
To predict sample target value use `predict` method with sample to check (as `array`). Example:
|
To predict sample target value, use the `predict` method with sample to check (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
$regression->predict([64]);
|
$regression->predict([64]);
|
||||||
@ -27,8 +27,8 @@ $regression->predict([64]);
|
|||||||
|
|
||||||
### Multiple Linear Regression
|
### Multiple Linear Regression
|
||||||
|
|
||||||
The term multiple attached to linear regression means that there are two or more sample parameters used to predict target.
|
The term multiple attached to linear regression means that there are two or more sample parameters used to predict target.
|
||||||
For example you can use: mileage and production year to predict price of a car.
|
For example you can use: mileage and production year to predict the price of a car.
|
||||||
|
|
||||||
```
|
```
|
||||||
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
||||||
@ -42,7 +42,7 @@ $regression->predict([60000, 1996])
|
|||||||
|
|
||||||
### Intercept and Coefficients
|
### Intercept and Coefficients
|
||||||
|
|
||||||
After you train your model you can get the intercept and coefficients array.
|
After you train your model, you can get the intercept and coefficients array.
|
||||||
|
|
||||||
```
|
```
|
||||||
$regression->getIntercept();
|
$regression->getIntercept();
|
||||||
|
@ -21,7 +21,7 @@ $regression = new SVR(Kernel::LINEAR, $degree = 3, $epsilon=10.0);
|
|||||||
|
|
||||||
### Train
|
### Train
|
||||||
|
|
||||||
To train a model simply provide train samples and targets values (as `array`). Example:
|
To train a model, simply provide train samples and targets values (as `array`). Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
use Phpml\Regression\SVR;
|
use Phpml\Regression\SVR;
|
||||||
@ -38,7 +38,7 @@ You can train the model using multiple data sets, predictions will be based on a
|
|||||||
|
|
||||||
### Predict
|
### Predict
|
||||||
|
|
||||||
To predict sample target value use `predict` method. You can provide one sample or array of samples:
|
To predict sample target value, use the `predict` method. You can provide one sample or array of samples:
|
||||||
|
|
||||||
```
|
```
|
||||||
$regression->predict([64])
|
$regression->predict([64])
|
||||||
|
@ -5,13 +5,12 @@ In machine learning, it is common to run a sequence of algorithms to process and
|
|||||||
* Split each document’s text into tokens.
|
* Split each document’s text into tokens.
|
||||||
* Convert each document’s words into a numerical feature vector ([Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)).
|
* Convert each document’s words into a numerical feature vector ([Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)).
|
||||||
* Learn a prediction model using the feature vectors and labels.
|
* Learn a prediction model using the feature vectors and labels.
|
||||||
|
|
||||||
PHP-ML represents such a workflow as a Pipeline, which consists sequence of transformers and a estimator.
|
|
||||||
|
|
||||||
|
PHP-ML represents such a workflow as a Pipeline, which consists of a sequence of transformers and an estimator.
|
||||||
|
|
||||||
### Constructor Parameters
|
### Constructor Parameters
|
||||||
|
|
||||||
* $transformers (array|Transformer[]) - sequence of objects that implements Transformer interface
|
* $transformers (array|Transformer[]) - sequence of objects that implements the Transformer interface
|
||||||
* $estimator (Estimator) - estimator that can train and predict
|
* $estimator (Estimator) - estimator that can train and predict
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -29,7 +28,8 @@ $pipeline = new Pipeline($transformers, $estimator);
|
|||||||
|
|
||||||
### Example
|
### Example
|
||||||
|
|
||||||
First our pipeline replace missing value, then normalize samples and finally train SVC estimator. Thus prepared pipeline repeats each transformation step for predicted sample.
|
First, our pipeline replaces the missing value, then normalizes samples and finally trains the SVC estimator.
|
||||||
|
Thus prepared pipeline repeats each transformation step for predicted sample.
|
||||||
|
|
||||||
```
|
```
|
||||||
use Phpml\Classification\SVC;
|
use Phpml\Classification\SVC;
|
||||||
|
@ -4,7 +4,7 @@ Selected algorithms require the use of a function for calculating the distance.
|
|||||||
|
|
||||||
### Euclidean
|
### Euclidean
|
||||||
|
|
||||||
Class for calculation Euclidean distance.
|
Class for calculating Euclidean distance.
|
||||||
|
|
||||||
![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance")
|
![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance")
|
||||||
|
|
||||||
@ -13,7 +13,7 @@ To calculate Euclidean distance:
|
|||||||
```
|
```
|
||||||
$a = [4, 6];
|
$a = [4, 6];
|
||||||
$b = [2, 5];
|
$b = [2, 5];
|
||||||
|
|
||||||
$euclidean = new Euclidean();
|
$euclidean = new Euclidean();
|
||||||
$euclidean->distance($a, $b);
|
$euclidean->distance($a, $b);
|
||||||
// return 2.2360679774998
|
// return 2.2360679774998
|
||||||
@ -21,7 +21,7 @@ $euclidean->distance($a, $b);
|
|||||||
|
|
||||||
### Manhattan
|
### Manhattan
|
||||||
|
|
||||||
Class for calculation Manhattan distance.
|
Class for calculating Manhattan distance.
|
||||||
|
|
||||||
![manhattan](https://upload.wikimedia.org/math/4/c/5/4c568bd1d76a6b15e19cb2ac3ad75350.png "Manhattan Distance")
|
![manhattan](https://upload.wikimedia.org/math/4/c/5/4c568bd1d76a6b15e19cb2ac3ad75350.png "Manhattan Distance")
|
||||||
|
|
||||||
@ -30,7 +30,7 @@ To calculate Manhattan distance:
|
|||||||
```
|
```
|
||||||
$a = [4, 6];
|
$a = [4, 6];
|
||||||
$b = [2, 5];
|
$b = [2, 5];
|
||||||
|
|
||||||
$manhattan = new Manhattan();
|
$manhattan = new Manhattan();
|
||||||
$manhattan->distance($a, $b);
|
$manhattan->distance($a, $b);
|
||||||
// return 3
|
// return 3
|
||||||
@ -38,7 +38,7 @@ $manhattan->distance($a, $b);
|
|||||||
|
|
||||||
### Chebyshev
|
### Chebyshev
|
||||||
|
|
||||||
Class for calculation Chebyshev distance.
|
Class for calculating Chebyshev distance.
|
||||||
|
|
||||||
![chebyshev](https://upload.wikimedia.org/math/7/1/2/71200f7dbb43b3bcfbcbdb9e02ab0a0c.png "Chebyshev Distance")
|
![chebyshev](https://upload.wikimedia.org/math/7/1/2/71200f7dbb43b3bcfbcbdb9e02ab0a0c.png "Chebyshev Distance")
|
||||||
|
|
||||||
@ -47,7 +47,7 @@ To calculate Chebyshev distance:
|
|||||||
```
|
```
|
||||||
$a = [4, 6];
|
$a = [4, 6];
|
||||||
$b = [2, 5];
|
$b = [2, 5];
|
||||||
|
|
||||||
$chebyshev = new Chebyshev();
|
$chebyshev = new Chebyshev();
|
||||||
$chebyshev->distance($a, $b);
|
$chebyshev->distance($a, $b);
|
||||||
// return 2
|
// return 2
|
||||||
@ -55,7 +55,7 @@ $chebyshev->distance($a, $b);
|
|||||||
|
|
||||||
### Minkowski
|
### Minkowski
|
||||||
|
|
||||||
Class for calculation Minkowski distance.
|
Class for calculating Minkowski distance.
|
||||||
|
|
||||||
![minkowski](https://upload.wikimedia.org/math/a/a/0/aa0c62083c12390cb15ac3217de88e66.png "Minkowski Distance")
|
![minkowski](https://upload.wikimedia.org/math/a/a/0/aa0c62083c12390cb15ac3217de88e66.png "Minkowski Distance")
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ To calculate Minkowski distance:
|
|||||||
```
|
```
|
||||||
$a = [4, 6];
|
$a = [4, 6];
|
||||||
$b = [2, 5];
|
$b = [2, 5];
|
||||||
|
|
||||||
$minkowski = new Minkowski();
|
$minkowski = new Minkowski();
|
||||||
$minkowski->distance($a, $b);
|
$minkowski->distance($a, $b);
|
||||||
// return 2.080
|
// return 2.080
|
||||||
@ -83,7 +83,7 @@ $minkowski->distance($a, $b);
|
|||||||
|
|
||||||
### Custom distance
|
### Custom distance
|
||||||
|
|
||||||
To apply your own function of distance use `Distance` interface. Example
|
To apply your own function of distance use the `Distance` interface. Example:
|
||||||
|
|
||||||
```
|
```
|
||||||
class CustomDistance implements Distance
|
class CustomDistance implements Distance
|
||||||
@ -103,7 +103,7 @@ class CustomDistance implements Distance
|
|||||||
$distance[] = $a[$i] * $b[$i];
|
$distance[] = $a[$i] * $b[$i];
|
||||||
}
|
}
|
||||||
|
|
||||||
return min($distance);
|
return min($distance);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
@ -7,7 +7,7 @@ Selected statistical methods.
|
|||||||
Correlation coefficients are used in statistics to measure how strong a relationship is between two variables. There are several types of correlation coefficient.
|
Correlation coefficients are used in statistics to measure how strong a relationship is between two variables. There are several types of correlation coefficient.
|
||||||
|
|
||||||
### Pearson correlation
|
### Pearson correlation
|
||||||
|
|
||||||
Pearson’s correlation or Pearson correlation is a correlation coefficient commonly used in linear regression.
|
Pearson’s correlation or Pearson correlation is a correlation coefficient commonly used in linear regression.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
Loading…
Reference in New Issue
Block a user