Persistence class to save and restore models (#37)

* Models manager with save/restore capabilities

* Refactoring dataset exceptions

* Persistency layer docs

* New tests for serializable estimators

* ModelManager static methods to instance methods
This commit is contained in:
David Monllaó 2017-02-02 09:03:09 +01:00 committed by Arkadiusz Kondas
parent c1b1a5d6ac
commit 8f122fde90
17 changed files with 361 additions and 24 deletions

View File

@ -84,6 +84,8 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples](
* [Iris](machine-learning/datasets/demo/iris/)
* [Wine](machine-learning/datasets/demo/wine/)
* [Glass](machine-learning/datasets/demo/glass/)
* Models management
* [Persistency](machine-learning/model-manager/persistency/)
* Math
* [Distance](math/distance/)
* [Matrix](math/matrix/)

View File

@ -0,0 +1,24 @@
# Persistency
You can save trained models for future use. Persistency across requests achieved by saving and restoring serialized estimators into files.
### Example
```
use Phpml\Classification\KNearestNeighbors;
use Phpml\ModelManager;
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
$classifier = new KNearestNeighbors();
$classifier->train($samples, $labels);
$filepath = '/path/to/store/the/model';
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$restoredClassifier->predict([3, 2]);
// return 'b'
```

View File

@ -40,6 +40,8 @@ pages:
- Iris: machine-learning/datasets/demo/iris.md
- Wine: machine-learning/datasets/demo/wine.md
- Glass: machine-learning/datasets/demo/glass.md
- Models management:
- Persistency: machine-learning/model-manager/persistency.md
- Math:
- Distance: math/distance.md
- Matrix: math/matrix.md

View File

@ -4,7 +4,7 @@ declare(strict_types=1);
namespace Phpml\Dataset;
use Phpml\Exception\DatasetException;
use Phpml\Exception\FileException;
class CsvDataset extends ArrayDataset
{
@ -13,16 +13,16 @@ class CsvDataset extends ArrayDataset
* @param int $features
* @param bool $headingRow
*
* @throws DatasetException
* @throws FileException
*/
public function __construct(string $filepath, int $features, bool $headingRow = true)
{
if (!file_exists($filepath)) {
throw DatasetException::missingFile(basename($filepath));
throw FileException::missingFile(basename($filepath));
}
if (false === $handle = fopen($filepath, 'rb')) {
throw DatasetException::cantOpenFile(basename($filepath));
throw FileException::cantOpenFile(basename($filepath));
}
if ($headingRow) {

View File

@ -6,15 +6,6 @@ namespace Phpml\Exception;
class DatasetException extends \Exception
{
/**
* @param string $filepath
*
* @return DatasetException
*/
public static function missingFile(string $filepath)
{
return new self(sprintf('Dataset file "%s" missing.', $filepath));
}
/**
* @param string $path
@ -25,14 +16,4 @@ class DatasetException extends \Exception
{
return new self(sprintf('Dataset root folder "%s" missing.', $path));
}
/**
* @param string $filepath
*
* @return DatasetException
*/
public static function cantOpenFile(string $filepath)
{
return new self(sprintf('Dataset file "%s" can\'t be open.', $filepath));
}
}

View File

@ -0,0 +1,39 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
class FileException extends \Exception
{
/**
* @param string $filepath
*
* @return FileException
*/
public static function missingFile(string $filepath)
{
return new self(sprintf('File "%s" missing.', $filepath));
}
/**
* @param string $filepath
*
* @return FileException
*/
public static function cantOpenFile(string $filepath)
{
return new self(sprintf('File "%s" can\'t be open.', $filepath));
}
/**
* @param string $filepath
*
* @return FileException
*/
public static function cantSaveFile(string $filepath)
{
return new self(sprintf('File "%s" can\'t be saved.', $filepath));
}
}

View File

@ -0,0 +1,30 @@
<?php
declare(strict_types=1);
namespace Phpml\Exception;
class SerializeException extends \Exception
{
/**
* @param string $filepath
*
* @return SerializeException
*/
public static function cantUnserialize(string $filepath)
{
return new self(sprintf('"%s" can not be unserialized.', $filepath));
}
/**
* @param string $classname
*
* @return SerializeException
*/
public static function cantSerialize(string $classname)
{
return new self(sprintf('Class "%s" can not be serialized.', $classname));
}
}

View File

@ -0,0 +1,52 @@
<?php
declare(strict_types=1);
namespace Phpml;
use Phpml\Estimator;
use Phpml\Exception\SerializeException;
use Phpml\Exception\FileException;
class ModelManager
{
/**
* @param Estimator $object
* @param string $filepath
*/
public function saveToFile(Estimator $object, string $filepath)
{
if (!file_exists($filepath) || !is_writable(dirname($filepath))) {
throw FileException::cantSaveFile(basename($filepath));
}
$serialized = serialize($object);
if (empty($serialized)) {
throw SerializeException::cantSerialize(get_type($object));
}
$result = file_put_contents($filepath, $serialized, LOCK_EX);
if ($result === false) {
throw FileException::cantSaveFile(basename($filepath));
}
}
/**
* @param string $filepath
*
* @return Estimator
*/
public function restoreFromFile(string $filepath)
{
if (!file_exists($filepath) || !is_readable($filepath)) {
throw FileException::cantOpenFile(basename($filepath));
}
$object = unserialize(file_get_contents($filepath));
if ($object === false) {
throw SerializeException::cantUnserialize(basename($filepath));
}
return $object;
}
}

View File

@ -5,6 +5,7 @@ declare(strict_types=1);
namespace tests\Classification;
use Phpml\Association\Apriori;
use Phpml\ModelManager;
class AprioriTest extends \PHPUnit_Framework_TestCase
{
@ -184,4 +185,22 @@ class AprioriTest extends \PHPUnit_Framework_TestCase
return $method->invokeArgs($object, $params);
}
public function testSaveAndRestore()
{
$classifier = new Apriori(0.5, 0.5);
$classifier->train($this->sampleGreek, []);
$testSamples = [['alpha', 'epsilon'], ['beta', 'theta']];
$predicted = $classifier->predict($testSamples);
$filename = 'apriori-test-'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$this->assertEquals($classifier, $restoredClassifier);
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
}
}

View File

@ -5,6 +5,7 @@ declare(strict_types=1);
namespace tests\Classification;
use Phpml\Classification\DecisionTree;
use Phpml\ModelManager;
class DecisionTreeTest extends \PHPUnit_Framework_TestCase
{
@ -55,6 +56,26 @@ class DecisionTreeTest extends \PHPUnit_Framework_TestCase
return $classifier;
}
public function testSaveAndRestore()
{
list($data, $targets) = $this->getData($this->data);
$classifier = new DecisionTree(5);
$classifier->train($data, $targets);
$testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']];
$predicted = $classifier->predict($testSamples);
$filename = 'decision-tree-test-'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$this->assertEquals($classifier, $restoredClassifier);
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
}
public function testTreeDepth()
{
list($data, $targets) = $this->getData($this->data);

View File

@ -6,6 +6,7 @@ namespace tests\Classification;
use Phpml\Classification\KNearestNeighbors;
use Phpml\Math\Distance\Chebyshev;
use Phpml\ModelManager;
class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
{
@ -57,4 +58,27 @@ class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($testLabels, $predicted);
}
public function testSaveAndRestore()
{
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
$testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]];
$testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'];
// Using non-default constructor parameters to check that their values are restored.
$classifier = new KNearestNeighbors(3, new Chebyshev());
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$filename = 'knearest-neighbors-test-'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$this->assertEquals($classifier, $restoredClassifier);
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
}
}

View File

@ -5,6 +5,7 @@ declare(strict_types=1);
namespace tests\Classification;
use Phpml\Classification\NaiveBayes;
use Phpml\ModelManager;
class NaiveBayesTest extends \PHPUnit_Framework_TestCase
{
@ -45,4 +46,27 @@ class NaiveBayesTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($testLabels, $classifier->predict($testSamples));
}
public function testSaveAndRestore()
{
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
$trainLabels = ['a', 'b', 'c'];
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8]];
$testLabels = ['a', 'a', 'c'];
$classifier = new NaiveBayes();
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$filename = 'naive-bayes-test-'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$this->assertEquals($classifier, $restoredClassifier);
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
}
}

View File

@ -6,6 +6,7 @@ namespace tests\Classification;
use Phpml\Classification\SVC;
use Phpml\SupportVectorMachine\Kernel;
use Phpml\ModelManager;
class SVCTest extends \PHPUnit_Framework_TestCase
{
@ -42,4 +43,26 @@ class SVCTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($testLabels, $predictions);
}
public function testSaveAndRestore()
{
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
$testSamples = [[3, 2], [5, 1], [4, 3]];
$testLabels = ['b', 'b', 'b'];
$classifier = new SVC(Kernel::LINEAR, $cost = 1000);
$classifier->train($trainSamples, $trainLabels);
$predicted = $classifier->predict($testSamples);
$filename = 'svc-test-'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($classifier, $filepath);
$restoredClassifier = $modelManager->restoreFromFile($filepath);
$this->assertEquals($classifier, $restoredClassifier);
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
}
}

View File

@ -9,7 +9,7 @@ use Phpml\Dataset\CsvDataset;
class CsvDatasetTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException \Phpml\Exception\DatasetException
* @expectedException \Phpml\Exception\FileException
*/
public function testThrowExceptionOnMissingFile()
{

View File

@ -0,0 +1,47 @@
<?php
declare(strict_types=1);
namespace tests;
use Phpml\ModelManager;
use Phpml\Regression\LeastSquares;
class ModelManagerTest extends \PHPUnit_Framework_TestCase
{
public function testSaveAndRestore()
{
$filename = 'test-save-to-file-'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$obj = new LeastSquares();
$modelManager = new ModelManager();
$modelManager->saveToFile($obj, $filepath);
$restored = $modelManager->restoreFromFile($filepath);
$this->assertEquals($obj, $restored);
}
/**
* @expectedException \Phpml\Exception\FileException
*/
public function testSaveToWrongFile()
{
$filepath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'unexisting';
$obj = new LeastSquares();
$modelManager = new ModelManager();
$modelManager->saveToFile($obj, $filepath);
}
/**
* @expectedException \Phpml\Exception\FileException
*/
public function testRestoreWrongFile()
{
$filepath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'unexisting';
$modelManager = new ModelManager();
$modelManager->restoreFromFile($filepath);
}
}

View File

@ -5,6 +5,7 @@ declare(strict_types=1);
namespace tests\Regression;
use Phpml\Regression\LeastSquares;
use Phpml\ModelManager;
class LeastSquaresTest extends \PHPUnit_Framework_TestCase
{
@ -65,4 +66,28 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase
$this->assertEquals(4094.82, $regression->predict([60000, 1996]), '', $delta);
$this->assertEquals(5711.40, $regression->predict([60000, 2000]), '', $delta);
}
public function testSaveAndRestore()
{
//https://www.easycalculation.com/analytical/learn-least-square-regression.php
$samples = [[60], [61], [62], [63], [65]];
$targets = [[3.1], [3.6], [3.8], [4], [4.1]];
$regression = new LeastSquares();
$regression->train($samples, $targets);
//http://www.stat.wmich.edu/s216/book/node127.html
$testSamples = [[9300], [10565], [15000]];
$predicted = $regression->predict($testSamples);
$filename = 'least-squares-test-'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($regression, $filepath);
$restoredRegression = $modelManager->restoreFromFile($filepath);
$this->assertEquals($regression, $restoredRegression);
$this->assertEquals($predicted, $restoredRegression->predict($testSamples));
}
}

View File

@ -6,6 +6,7 @@ namespace tests\Regression;
use Phpml\Regression\SVR;
use Phpml\SupportVectorMachine\Kernel;
use Phpml\ModelManager;
class SVRTest extends \PHPUnit_Framework_TestCase
{
@ -34,4 +35,27 @@ class SVRTest extends \PHPUnit_Framework_TestCase
$this->assertEquals([4109.82, 4112.28], $regression->predict([[60000, 1996], [60000, 2000]]), '', $delta);
}
public function testSaveAndRestore()
{
$samples = [[60], [61], [62], [63], [65]];
$targets = [3.1, 3.6, 3.8, 4, 4.1];
$regression = new SVR(Kernel::LINEAR);
$regression->train($samples, $targets);
$testSamples = [64];
$predicted = $regression->predict($testSamples);
$filename = 'svr-test'.rand(100, 999).'-'.uniqid();
$filepath = tempnam(sys_get_temp_dir(), $filename);
$modelManager = new ModelManager();
$modelManager->saveToFile($regression, $filepath);
$restoredRegression = $modelManager->restoreFromFile($filepath);
$this->assertEquals($regression, $restoredRegression);
$this->assertEquals($predicted, $restoredRegression->predict($testSamples));
}
}