prepare cross validation random splitter

This commit is contained in:
Arkadiusz Kondas 2016-04-06 22:38:08 +02:00
parent e521fb8f80
commit 649cbdb9a6
6 changed files with 84 additions and 61 deletions

View File

@ -0,0 +1,65 @@
<?php
declare (strict_types = 1);
namespace Phpml\Dataset;
use Phpml\Exception\DatasetException;
abstract class CsvDataset implements Dataset
{
/**
* @var string
*/
protected $filepath;
/**
* @var array
*/
private $samples = [];
/**
* @var array
*/
private $lables = [];
public function __construct()
{
$filepath = dirname(__FILE__).'/../../../data/'.$this->filepath;
if (!file_exists($filepath)) {
throw DatasetException::missingFile(basename($filepath));
}
$row = 0;
if (($handle = fopen($filepath, 'r')) !== false) {
while (($data = fgetcsv($handle, 1000, ',')) !== false) {
++$row;
if ($row == 1) {
continue;
}
$this->samples[] = array_slice($data, 0, 4);
$this->lables[] = $data[4];
}
fclose($handle);
} else {
throw DatasetException::cantOpenFile(basename($filepath));
}
}
/**
* @return array
*/
public function getSamples(): array
{
return $this->samples;
}
/**
* @return array
*/
public function getLabels(): array
{
return $this->lables;
}
}

View File

@ -4,64 +4,15 @@ declare (strict_types = 1);
namespace Phpml\Dataset;
use Phpml\Exception\DatasetException;
abstract class Dataset
interface Dataset
{
/**
* @var string
* @return array
*/
protected $filepath;
/**
* @var array
*/
private $samples = [];
/**
* @var array
*/
private $lables = [];
public function __construct()
{
$filepath = dirname(__FILE__) . '/../../../data/' . $this->filepath;
if(!file_exists($filepath)) {
throw DatasetException::missingFile(basename($filepath));
}
$row = 0;
if (($handle = fopen($filepath, "r")) !== FALSE) {
while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) {
$row++;
if($row==1) {
continue;
}
$this->samples[] = array_slice($data, 0, 4);
$this->lables[] = $data[4];
}
fclose($handle);
} else {
throw DatasetException::cantOpenFile(basename($filepath));
}
}
public function getSamples(): array;
/**
* @return array
*/
public function getSamples()
{
return $this->samples;
}
/**
* @return array
*/
public function getLabels()
{
return $this->lables;
}
public function getLabels(): array;
}

View File

@ -8,13 +8,12 @@ namespace Phpml\Dataset;
* Classes: 3
* Samples per class: 50
* Samples total: 150
* Features per sample: 4
* Features per sample: 4.
*/
class Iris extends Dataset
class Iris extends CsvDataset
{
/**
* @var string
*/
protected $filepath = 'iris.csv';
}

View File

@ -18,5 +18,4 @@ class DatasetException extends \Exception
{
return new self(sprintf('Dataset file %s can\'t be open.', $filepath));
}
}

View File

@ -13,4 +13,14 @@ class InvalidArgumentException extends \Exception
{
return new self('Size of given arguments not match');
}
/**
* @param $name
*
* @return InvalidArgumentException
*/
public static function percentNotInRange($name)
{
return new self(sprintf('%s must be between 0.0 and 1.0', $name));
}
}

View File

@ -1,5 +1,6 @@
<?php
declare(strict_types = 1);
declare (strict_types = 1);
namespace tests\Phpml\Dataset;
@ -7,7 +8,6 @@ use Phpml\Dataset\Iris;
class IrisTest extends \PHPUnit_Framework_TestCase
{
public function testLoadingIrisDataset()
{
$iris = new Iris();
@ -19,5 +19,4 @@ class IrisTest extends \PHPUnit_Framework_TestCase
// one sample features count
$this->assertEquals(4, count($iris->getSamples()[0]));
}
}
}