From ee6ea3b85075ad81eaa7a1f0e3e85d6a5efb1556 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Mon, 11 Jul 2016 00:07:07 +0200 Subject: [PATCH] create docs for StratifiedRandomSplit --- README.md | 1 + docs/index.md | 1 + .../cross-validation/random-split.md | 4 +- .../stratified-random-split.md | 44 +++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 docs/machine-learning/cross-validation/stratified-random-split.md diff --git a/README.md b/README.md index c10cb7b..1f7d97c 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ composer require php-ai/php-ml * [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/) * Cross Validation * [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split/) + * [Stratified Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/stratified-random-split/) * Preprocessing * [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization/) * [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/) diff --git a/docs/index.md b/docs/index.md index db3c32b..7943c38 100644 --- a/docs/index.md +++ b/docs/index.md @@ -50,6 +50,7 @@ composer require php-ai/php-ml * [Accuracy](http://php-ml.readthedocs.io/en/latest/machine-learning/metric/accuracy/) * Cross Validation * [Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/random-split/) + * [Stratified Random Split](http://php-ml.readthedocs.io/en/latest/machine-learning/cross-validation/stratified-random-split/) * Preprocessing * [Normalization](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/normalization/) * [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/) diff --git a/docs/machine-learning/cross-validation/random-split.md b/docs/machine-learning/cross-validation/random-split.md index 464f0db..edfdded 100644 --- a/docs/machine-learning/cross-validation/random-split.md +++ b/docs/machine-learning/cross-validation/random-split.md @@ -1,4 +1,4 @@ -# RandomSplit +# Random Split One of the simplest methods from Cross-validation is implemented as `RandomSpilt` class. Samples are split to two groups: train group and test group. You can adjust number of samples in each group. @@ -6,7 +6,7 @@ One of the simplest methods from Cross-validation is implemented as `RandomSpilt * $dataset - object that implements `Dataset` interface * $testSize - a fraction of test split (float, from 0 to 1, default: 0.3) -* $seed - seed for random generator (for tests) +* $seed - seed for random generator (e.g. for tests) ``` $randomSplit = new RandomSplit($dataset, 0.2); diff --git a/docs/machine-learning/cross-validation/stratified-random-split.md b/docs/machine-learning/cross-validation/stratified-random-split.md new file mode 100644 index 0000000..d3f53be --- /dev/null +++ b/docs/machine-learning/cross-validation/stratified-random-split.md @@ -0,0 +1,44 @@ +# Stratified Random Split + +Analogously to `RandomSpilt` class samples are split to two groups: train group and test group. +Distribution of samples takes into account their targets and trying to divide them equally. +You can adjust number of samples in each group. + +### Constructor Parameters + +* $dataset - object that implements `Dataset` interface +* $testSize - a fraction of test split (float, from 0 to 1, default: 0.3) +* $seed - seed for random generator (e.g. for tests) + +``` +$split = new StratifiedRandomSplit($dataset, 0.2); +``` + +### Samples and labels groups + +To get samples or labels from test and train group you can use getters: + +``` +$dataset = new StratifiedRandomSplit($dataset, 0.3, 1234); + +// train group +$dataset->getTrainSamples(); +$dataset->getTrainLabels(); + +// test group +$dataset->getTestSamples(); +$dataset->getTestLabels(); +``` + +### Example + +``` +$dataset = new ArrayDataset( + $samples = [[1], [2], [3], [4], [5], [6], [7], [8]], + $targets = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] +); + +$split = new StratifiedRandomSplit($dataset, 0.5); +``` + +Split will have equals amount of each target. Two of the target `a` and two of `b`.