mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-10 15:50:57 +00:00
Updates to the documentation (linguistic corrections) (#414)
* Fix typo in Features list * Update distance.md documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation * Fix grammatical mistakes in documentation
This commit is contained in:
parent
f30e576c70
commit
7d5c6b15a4
@ -15,7 +15,7 @@ $associator = new Apriori($support = 0.5, $confidence = 0.5);
|
||||
|
||||
### Train
|
||||
|
||||
To train a associator simply provide train samples and labels (as `array`). Example:
|
||||
To train an associator, simply provide train samples and labels (as `array`). Example:
|
||||
|
||||
```
|
||||
$samples = [['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta'], ['alpha', 'beta', 'epsilon'], ['alpha', 'beta', 'theta']];
|
||||
@ -31,7 +31,7 @@ You can train the associator using multiple data sets, predictions will be based
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
||||
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$associator->predict(['alpha','theta']);
|
||||
@ -43,7 +43,7 @@ $associator->predict([['alpha','epsilon'],['beta','theta']]);
|
||||
|
||||
### Associating
|
||||
|
||||
Get generated association rules simply use `rules` method.
|
||||
To get generated association rules, simply use the `rules` method.
|
||||
|
||||
```
|
||||
$associator->getRules();
|
||||
@ -52,7 +52,7 @@ $associator->getRules();
|
||||
|
||||
### Frequent item sets
|
||||
|
||||
Generating k-length frequent item sets simply use `apriori` method.
|
||||
To generate k-length frequent item sets, simply use the `apriori` method.
|
||||
|
||||
```
|
||||
$associator->apriori();
|
||||
|
@ -14,7 +14,7 @@ $classifier = new KNearestNeighbors($k=3, new Minkowski($lambda=4));
|
||||
|
||||
## Train
|
||||
|
||||
To train a classifier simply provide train samples and labels (as `array`). Example:
|
||||
To train a classifier, simply provide train samples and labels (as `array`). Example:
|
||||
|
||||
```
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
@ -28,7 +28,7 @@ You can train the classifier using multiple data sets, predictions will be based
|
||||
|
||||
## Predict
|
||||
|
||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
||||
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$classifier->predict([3, 2]);
|
||||
|
@ -4,7 +4,7 @@ Classifier based on applying Bayes' theorem with strong (naive) independence ass
|
||||
|
||||
### Train
|
||||
|
||||
To train a classifier simply provide train samples and labels (as `array`). Example:
|
||||
To train a classifier, simply provide train samples and labels (as `array`). Example:
|
||||
|
||||
```
|
||||
$samples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||
@ -18,7 +18,7 @@ You can train the classifier using multiple data sets, predictions will be based
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
||||
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$classifier->predict([3, 1, 1]);
|
||||
|
@ -21,7 +21,7 @@ $classifier = new SVC(Kernel::RBF, $cost = 1000, $degree = 3, $gamma = 6);
|
||||
|
||||
### Train
|
||||
|
||||
To train a classifier simply provide train samples and labels (as `array`). Example:
|
||||
To train a classifier, simply provide train samples and labels (as `array`). Example:
|
||||
|
||||
```
|
||||
use Phpml\Classification\SVC;
|
||||
@ -38,7 +38,7 @@ You can train the classifier using multiple data sets, predictions will be based
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample label use `predict` method. You can provide one sample or array of samples:
|
||||
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$classifier->predict([3, 2]);
|
||||
@ -74,7 +74,7 @@ $classifier = new SVC(
|
||||
$classifier->train($samples, $labels);
|
||||
```
|
||||
|
||||
Then use `predictProbability` method instead of `predict`:
|
||||
Then use the `predictProbability` method instead of `predict`:
|
||||
|
||||
```
|
||||
$classifier->predictProbability([3, 2]);
|
||||
|
@ -16,7 +16,7 @@ $dbscan = new DBSCAN($epsilon = 2, $minSamples = 3, new Minkowski($lambda=4));
|
||||
|
||||
### Clustering
|
||||
|
||||
To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside.
|
||||
To divide the samples into clusters, simply use the `cluster` method. It returns the `array` of clusters with samples inside.
|
||||
|
||||
```
|
||||
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||
|
@ -15,11 +15,11 @@ $kmeans = new KMeans(4, KMeans::INIT_RANDOM);
|
||||
|
||||
### Clustering
|
||||
|
||||
To divide the samples into clusters simply use `cluster` method. It's return the `array` of clusters with samples inside.
|
||||
To divide the samples into clusters, simply use the `cluster` method. It returns the `array` of clusters with samples inside.
|
||||
|
||||
```
|
||||
$samples = [[1, 1], [8, 7], [1, 2], [7, 8], [2, 1], [8, 9]];
|
||||
Or if you need to keep your indentifiers along with yours samples you can use array keys as labels.
|
||||
Or if you need to keep your identifiers along with yours samples you can use array keys as labels.
|
||||
$samples = [ 'Label1' => [1, 1], 'Label2' => [8, 7], 'Label3' => [1, 2]];
|
||||
|
||||
$kmeans = new KMeans(2);
|
||||
@ -32,8 +32,8 @@ $kmeans->cluster($samples);
|
||||
#### kmeans++ (default)
|
||||
|
||||
K-means++ method selects initial cluster centers for k-mean clustering in a smart way to speed up convergence.
|
||||
It use the DASV seeding method consists of finding good initial centroids for the clusters.
|
||||
It uses the DASV seeding method consists of finding good initial centroids for the clusters.
|
||||
|
||||
#### random
|
||||
|
||||
Random initialization method chooses completely random centroid. It get the space boundaries to avoid placing clusters centroid too far from samples data.
|
||||
Random initialization method chooses completely random centroid. It gets the space boundaries to avoid placing cluster centroids too far from samples data.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Random Split
|
||||
|
||||
One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust number of samples in each group.
|
||||
One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust the number of samples in each group.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
@ -14,7 +14,7 @@ $randomSplit = new RandomSplit($dataset, 0.2);
|
||||
|
||||
### Samples and labels groups
|
||||
|
||||
To get samples or labels from test and train group you can use getters:
|
||||
To get samples or labels from test and train group, you can use getters:
|
||||
|
||||
```
|
||||
$dataset = new RandomSplit($dataset, 0.3, 1234);
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Stratified Random Split
|
||||
|
||||
Analogously to `RandomSpilt` class samples are split to two groups: train group and test group.
|
||||
Analogously to `RandomSpilt` class, samples are split to two groups: train group and test group.
|
||||
Distribution of samples takes into account their targets and trying to divide them equally.
|
||||
You can adjust number of samples in each group.
|
||||
You can adjust the number of samples in each group.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
@ -16,7 +16,7 @@ $split = new StratifiedRandomSplit($dataset, 0.2);
|
||||
|
||||
### Samples and labels groups
|
||||
|
||||
To get samples or labels from test and train group you can use getters:
|
||||
To get samples or labels from test and train group, you can use getters:
|
||||
|
||||
```
|
||||
$dataset = new StratifiedRandomSplit($dataset, 0.3, 1234);
|
||||
@ -41,4 +41,4 @@ $dataset = new ArrayDataset(
|
||||
$split = new StratifiedRandomSplit($dataset, 0.5);
|
||||
```
|
||||
|
||||
Split will have equals amount of each target. Two of the target `a` and two of `b`.
|
||||
Split will have equal amounts of each target. Two of the target `a` and two of `b`.
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
Helper class that holds data as PHP `array` type. Implements the `Dataset` interface which is used heavily in other classes.
|
||||
|
||||
### Constructors Parameters
|
||||
### Constructor Parameters
|
||||
|
||||
* $samples - (array) of samples
|
||||
* $labels - (array) of labels
|
||||
@ -15,7 +15,7 @@ $dataset = new ArrayDataset([[1, 1], [2, 1], [3, 2], [4, 1]], ['a', 'a', 'b', 'b
|
||||
|
||||
### Samples and labels
|
||||
|
||||
To get samples or labels you can use getters:
|
||||
To get samples or labels, you can use getters:
|
||||
|
||||
```
|
||||
$dataset->getSamples();
|
||||
@ -24,7 +24,7 @@ $dataset->getTargets();
|
||||
|
||||
### Remove columns
|
||||
|
||||
You can remove columns by index numbers, for example:
|
||||
You can remove columns by their index numbers, for example:
|
||||
|
||||
```
|
||||
use Phpml\Dataset\ArrayDataset;
|
||||
|
@ -2,11 +2,11 @@
|
||||
|
||||
Helper class that loads data from CSV file. It extends the `ArrayDataset`.
|
||||
|
||||
### Constructors Parameters
|
||||
### Constructor Parameters
|
||||
|
||||
* $filepath - (string) path to `.csv` file
|
||||
* $features - (int) number of columns that are features (starts from first column), last column must be a label
|
||||
* $headingRow - (bool) define is file have a heading row (if `true` then first row will be ignored)
|
||||
* $headingRow - (bool) define if the file has a heading row (if `true` then first row will be ignored)
|
||||
|
||||
```
|
||||
$dataset = new CsvDataset('dataset.csv', 2, true);
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
Helper class that loads dataset from files. Use folder names as targets. It extends the `ArrayDataset`.
|
||||
|
||||
### Constructors Parameters
|
||||
### Constructor Parameters
|
||||
|
||||
* $rootPath - (string) path to root folder that contains files dataset
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# MnistDataset
|
||||
|
||||
Helper class that load data from MNIST dataset: [http://yann.lecun.com/exdb/mnist/](http://yann.lecun.com/exdb/mnist/)
|
||||
Helper class that loads data from MNIST dataset: [http://yann.lecun.com/exdb/mnist/](http://yann.lecun.com/exdb/mnist/)
|
||||
|
||||
> The MNIST database of handwritten digits, available from this page, has a training set of 60,000 examples, and a test set of 10,000 examples. It is a subset of a larger set available from NIST. The digits have been size-normalized and centered in a fixed-size image.
|
||||
It is a good database for people who want to try learning techniques and pattern recognition methods on real-world data while spending minimal efforts on preprocessing and formatting.
|
||||
@ -18,7 +18,7 @@ $trainDataset = new MnistDataset('train-images-idx3-ubyte', 'train-labels-idx1-u
|
||||
|
||||
### Samples and labels
|
||||
|
||||
To get samples or labels you can use getters:
|
||||
To get samples or labels, you can use getters:
|
||||
|
||||
```
|
||||
$dataset->getSamples();
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
Helper class that loads data from SVM-Light format file. It extends the `ArrayDataset`.
|
||||
|
||||
### Constructors Parameters
|
||||
### Constructor Parameters
|
||||
|
||||
* $filepath - (string) path to the file
|
||||
|
||||
|
@ -19,7 +19,7 @@ $transformer = new TfIdfTransformer($samples);
|
||||
|
||||
### Transformation
|
||||
|
||||
To transform a collection of text samples use `transform` method. Example:
|
||||
To transform a collection of text samples, use the `transform` method. Example:
|
||||
|
||||
```
|
||||
use Phpml\FeatureExtraction\TfIdfTransformer;
|
||||
|
@ -16,7 +16,7 @@ $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
||||
|
||||
### Transformation
|
||||
|
||||
To transform a collection of text samples use `transform` method. Example:
|
||||
To transform a collection of text samples, use the `transform` method. Example:
|
||||
|
||||
```
|
||||
$samples = [
|
||||
@ -42,7 +42,7 @@ $vectorizer->transform($samples);
|
||||
|
||||
### Vocabulary
|
||||
|
||||
You can extract vocabulary using `getVocabulary()` method. Example:
|
||||
You can extract vocabulary using the `getVocabulary()` method. Example:
|
||||
|
||||
```
|
||||
$vectorizer->getVocabulary();
|
||||
|
@ -5,7 +5,7 @@
|
||||
## Constructor Parameters
|
||||
|
||||
* $k (int) - number of top features to select, rest will be removed (default: 10)
|
||||
* $scoringFunction (ScoringFunction) - function that take samples and targets and return array with scores (default: ANOVAFValue)
|
||||
* $scoringFunction (ScoringFunction) - function that takes samples and targets and returns an array with scores (default: ANOVAFValue)
|
||||
|
||||
```php
|
||||
use Phpml\FeatureSelection\SelectKBest;
|
||||
@ -33,7 +33,7 @@ $samples[0] = [1.4, 0.2];
|
||||
|
||||
## Scores
|
||||
|
||||
You can get a array with the calculated score for each feature.
|
||||
You can get an array with the calculated score for each feature.
|
||||
A higher value means that a given feature is better suited for learning.
|
||||
Of course, the rating depends on the scoring function used.
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
# Accuracy
|
||||
|
||||
Class for calculate classifier accuracy.
|
||||
Class for calculating classifier accuracy.
|
||||
|
||||
### Score
|
||||
|
||||
To calculate classifier accuracy score use `score` static method. Parameters:
|
||||
To calculate classifier accuracy score, use the `score` static method. Parameters:
|
||||
|
||||
* $actualLabels - (array) true sample labels
|
||||
* $predictedLabels - (array) predicted labels (e.x. from test group)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Classification Report
|
||||
|
||||
Class for calculate main classifier metrics: precision, recall, F1 score and support.
|
||||
Class for calculating main classifier metrics: precision, recall, F1 score and support.
|
||||
|
||||
### Report
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Confusion Matrix
|
||||
|
||||
Class for compute confusion matrix to evaluate the accuracy of a classification.
|
||||
Class for computing confusion matrix to evaluate the accuracy of a classification.
|
||||
|
||||
### Example (all targets)
|
||||
|
||||
|
@ -39,8 +39,7 @@ $mlp = new MLPClassifier(4, [$layer1, $layer2], ['a', 'b', 'c']);
|
||||
|
||||
## Train
|
||||
|
||||
To train a MLP simply provide train samples and labels (as array). Example:
|
||||
|
||||
To train a MLP, simply provide train samples and labels (as array). Example:
|
||||
|
||||
```
|
||||
$mlp->train(
|
||||
@ -71,7 +70,7 @@ $mlp->setLearningRate(0.1);
|
||||
|
||||
## Predict
|
||||
|
||||
To predict sample label use predict method. You can provide one sample or array of samples:
|
||||
To predict sample label use the `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$mlp->predict([[1, 1, 1, 1], [0, 0, 0, 0]]);
|
||||
|
@ -49,7 +49,7 @@ $data = [
|
||||
|
||||
```
|
||||
|
||||
You can also use `$samples` constructer parameter instead of `fit` method:
|
||||
You can also use the `$samples` constructor parameter instead of the `fit` method:
|
||||
|
||||
```
|
||||
use Phpml\Preprocessing\Imputer;
|
||||
|
@ -1,10 +1,10 @@
|
||||
# LeastSquares Linear Regression
|
||||
|
||||
Linear model that use least squares method to approximate solution.
|
||||
Linear model that uses least squares method to approximate solution.
|
||||
|
||||
### Train
|
||||
|
||||
To train a model simply provide train samples and targets values (as `array`). Example:
|
||||
To train a model, simply provide train samples and targets values (as `array`). Example:
|
||||
|
||||
```
|
||||
$samples = [[60], [61], [62], [63], [65]];
|
||||
@ -18,7 +18,7 @@ You can train the model using multiple data sets, predictions will be based on a
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample target value use `predict` method with sample to check (as `array`). Example:
|
||||
To predict sample target value, use the `predict` method with sample to check (as `array`). Example:
|
||||
|
||||
```
|
||||
$regression->predict([64]);
|
||||
@ -28,7 +28,7 @@ $regression->predict([64]);
|
||||
### Multiple Linear Regression
|
||||
|
||||
The term multiple attached to linear regression means that there are two or more sample parameters used to predict target.
|
||||
For example you can use: mileage and production year to predict price of a car.
|
||||
For example you can use: mileage and production year to predict the price of a car.
|
||||
|
||||
```
|
||||
$samples = [[73676, 1996], [77006, 1998], [10565, 2000], [146088, 1995], [15000, 2001], [65940, 2000], [9300, 2000], [93739, 1996], [153260, 1994], [17764, 2002], [57000, 1998], [15000, 2000]];
|
||||
@ -42,7 +42,7 @@ $regression->predict([60000, 1996])
|
||||
|
||||
### Intercept and Coefficients
|
||||
|
||||
After you train your model you can get the intercept and coefficients array.
|
||||
After you train your model, you can get the intercept and coefficients array.
|
||||
|
||||
```
|
||||
$regression->getIntercept();
|
||||
|
@ -21,7 +21,7 @@ $regression = new SVR(Kernel::LINEAR, $degree = 3, $epsilon=10.0);
|
||||
|
||||
### Train
|
||||
|
||||
To train a model simply provide train samples and targets values (as `array`). Example:
|
||||
To train a model, simply provide train samples and targets values (as `array`). Example:
|
||||
|
||||
```
|
||||
use Phpml\Regression\SVR;
|
||||
@ -38,7 +38,7 @@ You can train the model using multiple data sets, predictions will be based on a
|
||||
|
||||
### Predict
|
||||
|
||||
To predict sample target value use `predict` method. You can provide one sample or array of samples:
|
||||
To predict sample target value, use the `predict` method. You can provide one sample or array of samples:
|
||||
|
||||
```
|
||||
$regression->predict([64])
|
||||
|
@ -6,12 +6,11 @@ In machine learning, it is common to run a sequence of algorithms to process and
|
||||
* Convert each document’s words into a numerical feature vector ([Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)).
|
||||
* Learn a prediction model using the feature vectors and labels.
|
||||
|
||||
PHP-ML represents such a workflow as a Pipeline, which consists sequence of transformers and a estimator.
|
||||
|
||||
PHP-ML represents such a workflow as a Pipeline, which consists of a sequence of transformers and an estimator.
|
||||
|
||||
### Constructor Parameters
|
||||
|
||||
* $transformers (array|Transformer[]) - sequence of objects that implements Transformer interface
|
||||
* $transformers (array|Transformer[]) - sequence of objects that implements the Transformer interface
|
||||
* $estimator (Estimator) - estimator that can train and predict
|
||||
|
||||
```
|
||||
@ -29,7 +28,8 @@ $pipeline = new Pipeline($transformers, $estimator);
|
||||
|
||||
### Example
|
||||
|
||||
First our pipeline replace missing value, then normalize samples and finally train SVC estimator. Thus prepared pipeline repeats each transformation step for predicted sample.
|
||||
First, our pipeline replaces the missing value, then normalizes samples and finally trains the SVC estimator.
|
||||
Thus prepared pipeline repeats each transformation step for predicted sample.
|
||||
|
||||
```
|
||||
use Phpml\Classification\SVC;
|
||||
|
@ -4,7 +4,7 @@ Selected algorithms require the use of a function for calculating the distance.
|
||||
|
||||
### Euclidean
|
||||
|
||||
Class for calculation Euclidean distance.
|
||||
Class for calculating Euclidean distance.
|
||||
|
||||
![euclidean](https://upload.wikimedia.org/math/8/4/9/849f040fd10bb86f7c85eb0bbe3566a4.png "Euclidean Distance")
|
||||
|
||||
@ -21,7 +21,7 @@ $euclidean->distance($a, $b);
|
||||
|
||||
### Manhattan
|
||||
|
||||
Class for calculation Manhattan distance.
|
||||
Class for calculating Manhattan distance.
|
||||
|
||||
![manhattan](https://upload.wikimedia.org/math/4/c/5/4c568bd1d76a6b15e19cb2ac3ad75350.png "Manhattan Distance")
|
||||
|
||||
@ -38,7 +38,7 @@ $manhattan->distance($a, $b);
|
||||
|
||||
### Chebyshev
|
||||
|
||||
Class for calculation Chebyshev distance.
|
||||
Class for calculating Chebyshev distance.
|
||||
|
||||
![chebyshev](https://upload.wikimedia.org/math/7/1/2/71200f7dbb43b3bcfbcbdb9e02ab0a0c.png "Chebyshev Distance")
|
||||
|
||||
@ -55,7 +55,7 @@ $chebyshev->distance($a, $b);
|
||||
|
||||
### Minkowski
|
||||
|
||||
Class for calculation Minkowski distance.
|
||||
Class for calculating Minkowski distance.
|
||||
|
||||
![minkowski](https://upload.wikimedia.org/math/a/a/0/aa0c62083c12390cb15ac3217de88e66.png "Minkowski Distance")
|
||||
|
||||
@ -83,7 +83,7 @@ $minkowski->distance($a, $b);
|
||||
|
||||
### Custom distance
|
||||
|
||||
To apply your own function of distance use `Distance` interface. Example
|
||||
To apply your own function of distance use the `Distance` interface. Example:
|
||||
|
||||
```
|
||||
class CustomDistance implements Distance
|
||||
|
Loading…
Reference in New Issue
Block a user