2016-07-10 12:13:35 +00:00
|
|
|
<?php
|
|
|
|
|
2016-11-20 21:53:17 +00:00
|
|
|
declare(strict_types=1);
|
2016-07-10 12:13:35 +00:00
|
|
|
|
|
|
|
namespace Phpml\CrossValidation;
|
|
|
|
|
|
|
|
use Phpml\Dataset\Dataset;
|
|
|
|
use Phpml\Exception\InvalidArgumentException;
|
|
|
|
|
|
|
|
abstract class Split
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $trainSamples = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $testSamples = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $trainLabels = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
protected $testLabels = [];
|
|
|
|
|
|
|
|
public function __construct(Dataset $dataset, float $testSize = 0.3, int $seed = null)
|
|
|
|
{
|
|
|
|
if (0 >= $testSize || 1 <= $testSize) {
|
|
|
|
throw InvalidArgumentException::percentNotInRange('testSize');
|
|
|
|
}
|
|
|
|
$this->seedGenerator($seed);
|
|
|
|
|
|
|
|
$this->splitDataset($dataset, $testSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
abstract protected function splitDataset(Dataset $dataset, float $testSize);
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
public function getTrainSamples() : array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->trainSamples;
|
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
public function getTestSamples() : array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->testSamples;
|
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
public function getTrainLabels() : array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->trainLabels;
|
|
|
|
}
|
|
|
|
|
2017-11-06 07:56:37 +00:00
|
|
|
public function getTestLabels() : array
|
2016-07-10 12:13:35 +00:00
|
|
|
{
|
|
|
|
return $this->testLabels;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function seedGenerator(int $seed = null)
|
|
|
|
{
|
|
|
|
if (null === $seed) {
|
|
|
|
mt_srand();
|
|
|
|
} else {
|
|
|
|
mt_srand($seed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|