2016-07-10 12:13:35 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-07-10 12:13:35 +00:00
|
|
|
|
2018-01-06 12:09:33 +00:00
|
|
|
namespace Phpml\Tests\CrossValidation;
|
2016-07-10 12:13:35 +00:00
|
|
|
|
|
|
|
use Phpml\CrossValidation\StratifiedRandomSplit;
|
|
|
|
use Phpml\Dataset\ArrayDataset;
|
2017-02-03 11:58:25 +00:00
|
|
|
use PHPUnit\Framework\TestCase;
|
2016-07-10 12:13:35 +00:00
|
|
|
|
2017-02-03 11:58:25 +00:00
|
|
|
class StratifiedRandomSplitTest extends TestCase
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testDatasetStratifiedRandomSplitWithEvenDistribution(): void
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
$dataset = new ArrayDataset(
|
|
|
|
$samples = [[1], [2], [3], [4], [5], [6], [7], [8]],
|
|
|
|
$labels = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b']
|
|
|
|
);
|
|
|
|
|
|
|
|
$split = new StratifiedRandomSplit($dataset, 0.5);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals(2, $this->countSamplesByTarget($split->getTestLabels(), 'a'));
|
|
|
|
self::assertEquals(2, $this->countSamplesByTarget($split->getTestLabels(), 'b'));
|
2016-07-10 12:13:35 +00:00
|
|
|
|
|
|
|
$split = new StratifiedRandomSplit($dataset, 0.25);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 'a'));
|
|
|
|
self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 'b'));
|
2016-07-10 12:13:35 +00:00
|
|
|
}
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function testDatasetStratifiedRandomSplitWithEvenDistributionAndNumericTargets(): void
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
$dataset = new ArrayDataset(
|
|
|
|
$samples = [[1], [2], [3], [4], [5], [6], [7], [8]],
|
|
|
|
$labels = [1, 2, 1, 2, 1, 2, 1, 2]
|
|
|
|
);
|
|
|
|
|
|
|
|
$split = new StratifiedRandomSplit($dataset, 0.5);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals(2, $this->countSamplesByTarget($split->getTestLabels(), 1));
|
|
|
|
self::assertEquals(2, $this->countSamplesByTarget($split->getTestLabels(), 2));
|
2016-07-10 12:13:35 +00:00
|
|
|
|
|
|
|
$split = new StratifiedRandomSplit($dataset, 0.25);
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 1));
|
|
|
|
self::assertEquals(1, $this->countSamplesByTarget($split->getTestLabels(), 2));
|
2016-07-10 12:13:35 +00:00
|
|
|
}
|
|
|
|
|
2018-10-28 06:44:52 +00:00
|
|
|
/**
|
|
|
|
* @param string|int $countTarget
|
|
|
|
*/
|
|
|
|
private function countSamplesByTarget(array $splitTargets, $countTarget): int
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
$count = 0;
|
|
|
|
foreach ($splitTargets as $target) {
|
|
|
|
if ($target === $countTarget) {
|
|
|
|
++$count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $count;
|
|
|
|
}
|
|
|
|
}
|