mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-22 04:55:10 +00:00
docs for files dataset and php-cs-fixer
This commit is contained in:
parent
e0b560f31d
commit
7abee3061a
@ -61,7 +61,9 @@ composer require php-ai/php-ml
|
|||||||
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
|
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
|
||||||
* [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/)
|
* [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/)
|
||||||
* Datasets
|
* Datasets
|
||||||
|
* [Array](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/array-dataset/)
|
||||||
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
|
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
|
||||||
|
* [Files](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/files-dataset/)
|
||||||
* Ready to use:
|
* Ready to use:
|
||||||
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
|
* [Iris](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/iris/)
|
||||||
* [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/)
|
* [Wine](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/demo/wine/)
|
||||||
|
@ -61,7 +61,9 @@ composer require php-ai/php-ml
|
|||||||
* [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)
|
* [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)
|
||||||
* [Tf-idf Transformer](machine-learning/feature-extraction/tf-idf-transformer/)
|
* [Tf-idf Transformer](machine-learning/feature-extraction/tf-idf-transformer/)
|
||||||
* Datasets
|
* Datasets
|
||||||
|
* [Array](machine-learning/datasets/array-dataset/)
|
||||||
* [CSV](machine-learning/datasets/csv-dataset/)
|
* [CSV](machine-learning/datasets/csv-dataset/)
|
||||||
|
* [Files](machine-learning/datasets/files-dataset/)
|
||||||
* Ready to use:
|
* Ready to use:
|
||||||
* [Iris](machine-learning/datasets/demo/iris/)
|
* [Iris](machine-learning/datasets/demo/iris/)
|
||||||
* [Wine](machine-learning/datasets/demo/wine/)
|
* [Wine](machine-learning/datasets/demo/wine/)
|
||||||
|
57
docs/machine-learning/datasets/files-dataset.md
Normal file
57
docs/machine-learning/datasets/files-dataset.md
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# FilesDataset
|
||||||
|
|
||||||
|
Helper class that loads dataset from files. Use folder names as targets. It extends the `ArrayDataset`.
|
||||||
|
|
||||||
|
### Constructors Parameters
|
||||||
|
|
||||||
|
* $rootPath - (string) path to root folder that contains files dataset
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Dataset\FilesDataset;
|
||||||
|
|
||||||
|
$dataset = new FilesDataset('path/to/data');
|
||||||
|
```
|
||||||
|
|
||||||
|
See [ArrayDataset](machine-learning/datasets/array-dataset/) for more information.
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
Files structure:
|
||||||
|
|
||||||
|
```
|
||||||
|
data
|
||||||
|
business
|
||||||
|
001.txt
|
||||||
|
002.txt
|
||||||
|
...
|
||||||
|
entertainment
|
||||||
|
001.txt
|
||||||
|
002.txt
|
||||||
|
...
|
||||||
|
politics
|
||||||
|
001.txt
|
||||||
|
002.txt
|
||||||
|
...
|
||||||
|
sport
|
||||||
|
001.txt
|
||||||
|
002.txt
|
||||||
|
...
|
||||||
|
tech
|
||||||
|
001.txt
|
||||||
|
002.txt
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
Load files data with `FilesDataset`:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\Dataset\FilesDataset;
|
||||||
|
|
||||||
|
$dataset = new FilesDataset('path/to/data');
|
||||||
|
|
||||||
|
$dataset->getSamples()[0][0] // content from file path/to/data/business/001.txt
|
||||||
|
$dataset->getTargets()[0] // business
|
||||||
|
|
||||||
|
$dataset->getSamples()[40][0] // content from file path/to/data/tech/001.txt
|
||||||
|
$dataset->getTargets()[0] // tech
|
||||||
|
```
|
@ -29,6 +29,7 @@ pages:
|
|||||||
- Datasets:
|
- Datasets:
|
||||||
- Array Dataset: machine-learning/datasets/array-dataset.md
|
- Array Dataset: machine-learning/datasets/array-dataset.md
|
||||||
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
||||||
|
- Files Dataset: machine-learning/datasets/files-dataset.md
|
||||||
- Ready to use datasets:
|
- Ready to use datasets:
|
||||||
- Iris: machine-learning/datasets/demo/iris.md
|
- Iris: machine-learning/datasets/demo/iris.md
|
||||||
- Wine: machine-learning/datasets/demo/wine.md
|
- Wine: machine-learning/datasets/demo/wine.md
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
declare(strict_types = 1);
|
|
||||||
|
declare (strict_types = 1);
|
||||||
|
|
||||||
namespace Phpml\Dataset;
|
namespace Phpml\Dataset;
|
||||||
|
|
||||||
@ -26,7 +27,7 @@ class FilesDataset extends ArrayDataset
|
|||||||
*/
|
*/
|
||||||
private function scanRootPath(string $rootPath)
|
private function scanRootPath(string $rootPath)
|
||||||
{
|
{
|
||||||
foreach(glob($rootPath . DIRECTORY_SEPARATOR . '*', GLOB_ONLYDIR) as $dir) {
|
foreach (glob($rootPath.DIRECTORY_SEPARATOR.'*', GLOB_ONLYDIR) as $dir) {
|
||||||
$this->scanDir($dir);
|
$this->scanDir($dir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -38,10 +39,9 @@ class FilesDataset extends ArrayDataset
|
|||||||
{
|
{
|
||||||
$target = basename($dir);
|
$target = basename($dir);
|
||||||
|
|
||||||
foreach(array_filter(glob($dir. DIRECTORY_SEPARATOR . '*'), 'is_file') as $file) {
|
foreach (array_filter(glob($dir.DIRECTORY_SEPARATOR.'*'), 'is_file') as $file) {
|
||||||
$this->samples[] = [file_get_contents($file)];
|
$this->samples[] = [file_get_contents($file)];
|
||||||
$this->targets[] = $target;
|
$this->targets[] = $target;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -31,8 +31,13 @@ class FilesDatasetTest extends \PHPUnit_Framework_TestCase
|
|||||||
$firstSample = file_get_contents($rootPath.'/business/001.txt');
|
$firstSample = file_get_contents($rootPath.'/business/001.txt');
|
||||||
$this->assertEquals($firstSample, $dataset->getSamples()[0][0]);
|
$this->assertEquals($firstSample, $dataset->getSamples()[0][0]);
|
||||||
|
|
||||||
|
$firstTarget = 'business';
|
||||||
|
$this->assertEquals($firstTarget, $dataset->getTargets()[0]);
|
||||||
|
|
||||||
$lastSample = file_get_contents($rootPath.'/tech/010.txt');
|
$lastSample = file_get_contents($rootPath.'/tech/010.txt');
|
||||||
$this->assertEquals($lastSample, $dataset->getSamples()[49][0]);
|
$this->assertEquals($lastSample, $dataset->getSamples()[49][0]);
|
||||||
}
|
|
||||||
|
|
||||||
|
$lastTarget = 'tech';
|
||||||
|
$this->assertEquals($lastTarget, $dataset->getTargets()[49]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user