2016-07-10 12:13:35 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-07-10 12:13:35 +00:00
|
|
|
|
|
|
|
namespace Phpml\CrossValidation;
|
|
|
|
|
|
|
|
use Phpml\Dataset\Dataset;
|
|
|
|
use Phpml\Exception\InvalidArgumentException;
|
|
|
|
|
|
|
|
abstract class Split
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $trainSamples = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $testSamples = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $trainLabels = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $testLabels = [];
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
public function __construct(Dataset $dataset, float $testSize = 0.3, ?int $seed = null)
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
2017-11-22 21:16:10 +00:00
|
|
|
if ($testSize <= 0 || $testSize >= 1) {
|
2016-07-10 12:13:35 +00:00
|
|
|
throw InvalidArgumentException::percentNotInRange('testSize');
|
|
|
|
}
|
2017-11-22 21:16:10 +00:00
|
|
|
|
2016-07-10 12:13:35 +00:00
|
|
|
$this->seedGenerator($seed);
|
|
|
|
|
|
|
|
$this->splitDataset($dataset, $testSize);
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function getTrainSamples(): array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->trainSamples;
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function getTestSamples(): array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->testSamples;
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function getTrainLabels(): array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->trainLabels;
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
public function getTestLabels(): array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->testLabels;
|
|
|
|
}
|
|
|
|
|
2017-11-22 21:16:10 +00:00
|
|
|
abstract protected function splitDataset(Dataset $dataset, float $testSize);
|
|
|
|
|
2017-11-14 20:21:23 +00:00
|
|
|
protected function seedGenerator(?int $seed = null): void
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
2017-11-22 21:16:10 +00:00
|
|
|
if ($seed === null) {
|
2016-07-10 12:13:35 +00:00
|
|
|
mt_srand();
|
|
|
|
} else {
|
|
|
|
mt_srand($seed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|