2016-07-16 21:29:40 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-07-16 21:29:40 +00:00
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests\Dataset;
|
2016-07-16 21:29:40 +00:00
|
|
|
|
|
|
|
use Phpml\Dataset\FilesDataset;
|
2017-11-28 07:00:13 +00:00
|
|
|
use Phpml\Exception\DatasetException;
|
2017-02-03 11:58:25 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-07-16 21:29:40 +00:00
|
|
|
|
2017-02-03 11:58:25 +00:00
|
|
|
class FilesDatasetTest extends TestCase
|
2016-07-16 21:29:40 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testThrowExceptionOnMissingRootFolder(): void
|
2016-07-16 21:29:40 +00:00
|
|
|
{
|
2017-11-28 07:00:13 +00:00
|
|
|
$this->expectException(DatasetException::class);
|
2016-07-16 21:29:40 +00:00
|
|
|
new FilesDataset('some/not/existed/path');
|
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testLoadFilesDatasetWithBBCData(): void
|
2016-07-16 21:29:40 +00:00
|
|
|
{
|
|
|
|
$rootPath = dirname(__FILE__).'/Resources/bbc';
|
|
|
|
|
|
|
|
$dataset = new FilesDataset($rootPath);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertCount(50, $dataset->getSamples());
|
|
|
|
self::assertCount(50, $dataset->getTargets());
|
2016-07-16 21:29:40 +00:00
|
|
|
|
|
|
|
$targets = ['business', 'entertainment', 'politics', 'sport', 'tech'];
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals($targets, array_values(array_unique($dataset->getTargets())));
|
2016-07-16 21:29:40 +00:00
|
|
|
|
|
|
|
$firstSample = file_get_contents($rootPath.'/business/001.txt');
|
2019-03-20 22:22:45 +00:00
|
|
|
self::assertEquals($firstSample, $dataset->getSamples()[0]);
|
2016-07-16 21:29:40 +00:00
|
|
|
|
2016-07-16 21:56:52 +00:00
|
|
|
$firstTarget = 'business';
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals($firstTarget, $dataset->getTargets()[0]);
|
2016-07-16 21:56:52 +00:00
|
|
|
|
2016-07-16 21:29:40 +00:00
|
|
|
$lastSample = file_get_contents($rootPath.'/tech/010.txt');
|
2019-03-20 22:22:45 +00:00
|
|
|
self::assertEquals($lastSample, $dataset->getSamples()[49]);
|
2016-07-16 21:56:52 +00:00
|
|
|
|
|
|
|
$lastTarget = 'tech';
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals($lastTarget, $dataset->getTargets()[49]);
|
2016-07-16 21:29:40 +00:00
|
|
|
}
|
|
|
|
}
|