mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-02-13 17:38:36 +00:00
update missing docs
This commit is contained in:
parent
ccfa38ba4d
commit
325427c723
@ -1 +1,45 @@
|
|||||||
# Imputation missing values
|
# Imputation missing values
|
||||||
|
|
||||||
|
For various reasons, many real world datasets contain missing values, often encoded as blanks, NaNs or other placeholders.
|
||||||
|
To solve this problem you can use the `Imputer` class.
|
||||||
|
|
||||||
|
## Constructor Parameters
|
||||||
|
|
||||||
|
* $missingValue (mixed) - this value will be replaced (default null)
|
||||||
|
* $strategy (Strategy) - imputation strategy (read to use: MeanStrategy, MedianStrategy, MostFrequentStrategy)
|
||||||
|
* $axis (int) - axis for strategy, Imputer::AXIS_COLUMN or Imputer::AXIS_ROW
|
||||||
|
|
||||||
|
```
|
||||||
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
|
||||||
|
$imputer = new Imputer(null, new MedianStrategy(), Imputer::AXIS_ROW);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Strategy
|
||||||
|
|
||||||
|
* MeanStrategy - replace missing values using the mean along the axis
|
||||||
|
* MedianStrategy - replace missing values using the median along the axis
|
||||||
|
* MostFrequentStrategy - replace missing using the most frequent value along the axis
|
||||||
|
|
||||||
|
## Example of use
|
||||||
|
|
||||||
|
```
|
||||||
|
$data = [
|
||||||
|
[1, null, 3, 4],
|
||||||
|
[4, 3, 2, 1],
|
||||||
|
[null, 6, 7, 8],
|
||||||
|
[8, 7, null, 5],
|
||||||
|
];
|
||||||
|
|
||||||
|
$imputer = new Imputer(null, new MeanStrategy(), Imputer::AXIS_COLUMN);
|
||||||
|
$imputer->preprocess($data);
|
||||||
|
|
||||||
|
/*
|
||||||
|
$data = [
|
||||||
|
[1, 5.33, 3, 4],
|
||||||
|
[4, 3, 2, 1],
|
||||||
|
[4.33, 6, 7, 8],
|
||||||
|
[8, 7, 4, 5],
|
||||||
|
];
|
||||||
|
*/
|
||||||
|
|
||||||
|
```
|
||||||
|
@ -1 +1,59 @@
|
|||||||
# Normalization
|
# Normalization
|
||||||
|
|
||||||
|
Normalization is the process of scaling individual samples to have unit norm.
|
||||||
|
|
||||||
|
## L2 norm
|
||||||
|
|
||||||
|
[http://mathworld.wolfram.com/L2-Norm.html](http://mathworld.wolfram.com/L2-Norm.html)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Preprocessing\Normalizer;
|
||||||
|
|
||||||
|
$samples = [
|
||||||
|
[1, -1, 2],
|
||||||
|
[2, 0, 0],
|
||||||
|
[0, 1, -1],
|
||||||
|
];
|
||||||
|
|
||||||
|
$normalizer = new Normalizer();
|
||||||
|
$normalizer->preprocess($samples);
|
||||||
|
|
||||||
|
/*
|
||||||
|
$samples = [
|
||||||
|
[0.4, -0.4, 0.81],
|
||||||
|
[1.0, 0.0, 0.0],
|
||||||
|
[0.0, 0.7, -0.7],
|
||||||
|
];
|
||||||
|
*/
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## L1 norm
|
||||||
|
|
||||||
|
[http://mathworld.wolfram.com/L1-Norm.html](http://mathworld.wolfram.com/L1-Norm.html)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Preprocessing\Normalizer;
|
||||||
|
|
||||||
|
$samples = [
|
||||||
|
[1, -1, 2],
|
||||||
|
[2, 0, 0],
|
||||||
|
[0, 1, -1],
|
||||||
|
];
|
||||||
|
|
||||||
|
$normalizer = new Normalizer(Normalizer::NORM_L1);
|
||||||
|
$normalizer->preprocess($samples);
|
||||||
|
|
||||||
|
/*
|
||||||
|
$samples = [
|
||||||
|
[0.25, -0.25, 0.5],
|
||||||
|
[1.0, 0.0, 0.0],
|
||||||
|
[0.0, 0.5, -0.5],
|
||||||
|
];
|
||||||
|
*/
|
||||||
|
|
||||||
|
```
|
||||||
|
@ -1,7 +1,80 @@
|
|||||||
# Statistic
|
# Statistic
|
||||||
|
|
||||||
### Correlation
|
Selected statistical methods.
|
||||||
|
|
||||||
### Mean
|
## Correlation
|
||||||
|
|
||||||
### Standard Deviation
|
Correlation coefficients are used in statistics to measure how strong a relationship is between two variables. There are several types of correlation coefficient.
|
||||||
|
|
||||||
|
### Pearson correlation
|
||||||
|
|
||||||
|
Pearson’s correlation or Pearson correlation is a correlation coefficient commonly used in linear regression.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Math\Statistic\Correlation;
|
||||||
|
|
||||||
|
$x = [43, 21, 25, 42, 57, 59];
|
||||||
|
$y = [99, 65, 79, 75, 87, 82];
|
||||||
|
|
||||||
|
Correlation::pearson($x, $y);
|
||||||
|
// return 0.549
|
||||||
|
```
|
||||||
|
|
||||||
|
## Mean
|
||||||
|
|
||||||
|
### Arithmetic
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Math\Statistic\Mean;
|
||||||
|
|
||||||
|
Mean::arithmetic([2, 5];
|
||||||
|
// return 3.5
|
||||||
|
|
||||||
|
Mean::arithmetic([0.5, 0.5, 1.5, 2.5, 3.5];
|
||||||
|
// return 1.7
|
||||||
|
```
|
||||||
|
|
||||||
|
## Median
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Math\Statistic\Mean;
|
||||||
|
|
||||||
|
Mean::median([5, 2, 6, 1, 3, 4]);
|
||||||
|
// return 3.5
|
||||||
|
|
||||||
|
Mean::median([5, 2, 6, 1, 3]);
|
||||||
|
// return 3
|
||||||
|
```
|
||||||
|
|
||||||
|
## Mode
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Math\Statistic\Mean;
|
||||||
|
|
||||||
|
Mean::mode([5, 2, 6, 1, 3, 4, 6, 6, 5]);
|
||||||
|
// return 6
|
||||||
|
```
|
||||||
|
|
||||||
|
## Standard Deviation
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Math\Statistic\StandardDeviation;
|
||||||
|
|
||||||
|
$population = [5, 6, 8, 9];
|
||||||
|
StandardDeviation::population($population)
|
||||||
|
// return 1.825
|
||||||
|
|
||||||
|
$population = [7100, 15500, 4400, 4400, 5900, 4600, 8800, 2000, 2750, 2550, 960, 1025];
|
||||||
|
StandardDeviation::population($population)
|
||||||
|
// return 4079
|
||||||
|
```
|
||||||
|
Loading…
x
Reference in New Issue
Block a user