docs for files dataset and php-cs-fixer

This commit is contained in:
Arkadiusz Kondas 2016-07-16 23:56:52 +02:00
parent e0b560f31d
commit 7abee3061a
6 changed files with 72 additions and 5 deletions

View File

@ -61,7 +61,9 @@ composer require php-ai/php-ml
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/) * [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
* [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/) * [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/)
* Datasets * Datasets
* [Array](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/array-dataset/)
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/) * [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
* [Files](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/files-dataset/)
* Ready to use: * Ready to use:
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/) * [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
* [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/) * [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/)

View File

@ -61,7 +61,9 @@ composer require php-ai/php-ml
* [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/) * [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)
* [Tf-idf Transformer](machine-learning/feature-extraction/tf-idf-transformer/) * [Tf-idf Transformer](machine-learning/feature-extraction/tf-idf-transformer/)
* Datasets * Datasets
* [Array](machine-learning/datasets/array-dataset/)
* [CSV](machine-learning/datasets/csv-dataset/) * [CSV](machine-learning/datasets/csv-dataset/)
* [Files](machine-learning/datasets/files-dataset/)
* Ready to use: * Ready to use:
* [Iris](machine-learning/datasets/demo/iris/) * [Iris](machine-learning/datasets/demo/iris/)
* [Wine](machine-learning/datasets/demo/wine/) * [Wine](machine-learning/datasets/demo/wine/)

View File

@ -0,0 +1,57 @@
# FilesDataset
Helper class that loads dataset from files. Use folder names as targets. It extends the `ArrayDataset`.
### Constructors Parameters
* $rootPath - (string) path to root folder that contains files dataset
```
use Phpml\Dataset\FilesDataset;
$dataset = new FilesDataset('path/to/data');
```
See [ArrayDataset](machine-learning/datasets/array-dataset/) for more information.
### Example
Files structure:
```
data
business
001.txt
002.txt
...
entertainment
001.txt
002.txt
...
politics
001.txt
002.txt
...
sport
001.txt
002.txt
...
tech
001.txt
002.txt
...
```
Load files data with `FilesDataset`:
```
use Phpml\Dataset\FilesDataset;
$dataset = new FilesDataset('path/to/data');
$dataset->getSamples()[0][0] // content from file path/to/data/business/001.txt
$dataset->getTargets()[0] // business
$dataset->getSamples()[40][0] // content from file path/to/data/tech/001.txt
$dataset->getTargets()[0] // tech
```

View File

@ -29,6 +29,7 @@ pages:
- Datasets: - Datasets:
- Array Dataset: machine-learning/datasets/array-dataset.md - Array Dataset: machine-learning/datasets/array-dataset.md
- CSV Dataset: machine-learning/datasets/csv-dataset.md - CSV Dataset: machine-learning/datasets/csv-dataset.md
- Files Dataset: machine-learning/datasets/files-dataset.md
- Ready to use datasets: - Ready to use datasets:
- Iris: machine-learning/datasets/demo/iris.md - Iris: machine-learning/datasets/demo/iris.md
- Wine: machine-learning/datasets/demo/wine.md - Wine: machine-learning/datasets/demo/wine.md

View File

@ -1,5 +1,6 @@
<?php <?php
declare(strict_types = 1);
declare (strict_types = 1);
namespace Phpml\Dataset; namespace Phpml\Dataset;
@ -26,7 +27,7 @@ class FilesDataset extends ArrayDataset
*/ */
private function scanRootPath(string $rootPath) private function scanRootPath(string $rootPath)
{ {
foreach(glob($rootPath . DIRECTORY_SEPARATOR . '*', GLOB_ONLYDIR) as $dir) { foreach (glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR) as $dir) {
$this->scanDir($dir); $this->scanDir($dir);
} }
} }
@ -38,10 +39,9 @@ class FilesDataset extends ArrayDataset
{ {
$target = basename($dir); $target = basename($dir);
foreach(array_filter(glob($dir. DIRECTORY_SEPARATOR . '*'), 'is_file') as $file) { foreach (array_filter(glob($dir.DIRECTORY_SEPARATOR.'*'), 'is_file') as $file) {
$this->samples[] = [file_get_contents($file)]; $this->samples[] = [file_get_contents($file)];
$this->targets[] = $target; $this->targets[] = $target;
} }
} }
} }

View File

@ -31,8 +31,13 @@ class FilesDatasetTest extends \PHPUnit_Framework_TestCase
$firstSample = file_get_contents($rootPath.'/business/001.txt'); $firstSample = file_get_contents($rootPath.'/business/001.txt');
$this->assertEquals($firstSample, $dataset->getSamples()[0][0]); $this->assertEquals($firstSample, $dataset->getSamples()[0][0]);
$firstTarget = 'business';
$this->assertEquals($firstTarget, $dataset->getTargets()[0]);
$lastSample = file_get_contents($rootPath.'/tech/010.txt'); $lastSample = file_get_contents($rootPath.'/tech/010.txt');
$this->assertEquals($lastSample, $dataset->getSamples()[49][0]); $this->assertEquals($lastSample, $dataset->getSamples()[49][0]);
}
$lastTarget = 'tech';
$this->assertEquals($lastTarget, $dataset->getTargets()[49]);
}
} }