mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-21 20:45:10 +00:00
Persistence class to save and restore models (#37)
* Models manager with save/restore capabilities * Refactoring dataset exceptions * Persistency layer docs * New tests for serializable estimators * ModelManager static methods to instance methods
This commit is contained in:
parent
c1b1a5d6ac
commit
8f122fde90
@ -84,6 +84,8 @@ Example scripts are available in a separate repository [php-ai/php-ml-examples](
|
||||
* [Iris](machine-learning/datasets/demo/iris/)
|
||||
* [Wine](machine-learning/datasets/demo/wine/)
|
||||
* [Glass](machine-learning/datasets/demo/glass/)
|
||||
* Models management
|
||||
* [Persistency](machine-learning/model-manager/persistency/)
|
||||
* Math
|
||||
* [Distance](math/distance/)
|
||||
* [Matrix](math/matrix/)
|
||||
|
24
docs/machine-learning/model-manager/persistency.md
Normal file
24
docs/machine-learning/model-manager/persistency.md
Normal file
@ -0,0 +1,24 @@
|
||||
# Persistency
|
||||
|
||||
You can save trained models for future use. Persistency across requests achieved by saving and restoring serialized estimators into files.
|
||||
|
||||
### Example
|
||||
|
||||
```
|
||||
use Phpml\Classification\KNearestNeighbors;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
$samples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$labels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$classifier = new KNearestNeighbors();
|
||||
$classifier->train($samples, $labels);
|
||||
|
||||
$filepath = '/path/to/store/the/model';
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
||||
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||
$restoredClassifier->predict([3, 2]);
|
||||
// return 'b'
|
||||
```
|
@ -40,6 +40,8 @@ pages:
|
||||
- Iris: machine-learning/datasets/demo/iris.md
|
||||
- Wine: machine-learning/datasets/demo/wine.md
|
||||
- Glass: machine-learning/datasets/demo/glass.md
|
||||
- Models management:
|
||||
- Persistency: machine-learning/model-manager/persistency.md
|
||||
- Math:
|
||||
- Distance: math/distance.md
|
||||
- Matrix: math/matrix.md
|
||||
|
@ -4,7 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Dataset;
|
||||
|
||||
use Phpml\Exception\DatasetException;
|
||||
use Phpml\Exception\FileException;
|
||||
|
||||
class CsvDataset extends ArrayDataset
|
||||
{
|
||||
@ -13,16 +13,16 @@ class CsvDataset extends ArrayDataset
|
||||
* @param int $features
|
||||
* @param bool $headingRow
|
||||
*
|
||||
* @throws DatasetException
|
||||
* @throws FileException
|
||||
*/
|
||||
public function __construct(string $filepath, int $features, bool $headingRow = true)
|
||||
{
|
||||
if (!file_exists($filepath)) {
|
||||
throw DatasetException::missingFile(basename($filepath));
|
||||
throw FileException::missingFile(basename($filepath));
|
||||
}
|
||||
|
||||
if (false === $handle = fopen($filepath, 'rb')) {
|
||||
throw DatasetException::cantOpenFile(basename($filepath));
|
||||
throw FileException::cantOpenFile(basename($filepath));
|
||||
}
|
||||
|
||||
if ($headingRow) {
|
||||
|
@ -6,15 +6,6 @@ namespace Phpml\Exception;
|
||||
|
||||
class DatasetException extends \Exception
|
||||
{
|
||||
/**
|
||||
* @param string $filepath
|
||||
*
|
||||
* @return DatasetException
|
||||
*/
|
||||
public static function missingFile(string $filepath)
|
||||
{
|
||||
return new self(sprintf('Dataset file "%s" missing.', $filepath));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $path
|
||||
@ -25,14 +16,4 @@ class DatasetException extends \Exception
|
||||
{
|
||||
return new self(sprintf('Dataset root folder "%s" missing.', $path));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
*
|
||||
* @return DatasetException
|
||||
*/
|
||||
public static function cantOpenFile(string $filepath)
|
||||
{
|
||||
return new self(sprintf('Dataset file "%s" can\'t be open.', $filepath));
|
||||
}
|
||||
}
|
||||
|
39
src/Phpml/Exception/FileException.php
Normal file
39
src/Phpml/Exception/FileException.php
Normal file
@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class FileException extends \Exception
|
||||
{
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
*
|
||||
* @return FileException
|
||||
*/
|
||||
public static function missingFile(string $filepath)
|
||||
{
|
||||
return new self(sprintf('File "%s" missing.', $filepath));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
*
|
||||
* @return FileException
|
||||
*/
|
||||
public static function cantOpenFile(string $filepath)
|
||||
{
|
||||
return new self(sprintf('File "%s" can\'t be open.', $filepath));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
*
|
||||
* @return FileException
|
||||
*/
|
||||
public static function cantSaveFile(string $filepath)
|
||||
{
|
||||
return new self(sprintf('File "%s" can\'t be saved.', $filepath));
|
||||
}
|
||||
}
|
30
src/Phpml/Exception/SerializeException.php
Normal file
30
src/Phpml/Exception/SerializeException.php
Normal file
@ -0,0 +1,30 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml\Exception;
|
||||
|
||||
class SerializeException extends \Exception
|
||||
{
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
*
|
||||
* @return SerializeException
|
||||
*/
|
||||
public static function cantUnserialize(string $filepath)
|
||||
{
|
||||
return new self(sprintf('"%s" can not be unserialized.', $filepath));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $classname
|
||||
*
|
||||
* @return SerializeException
|
||||
*/
|
||||
public static function cantSerialize(string $classname)
|
||||
{
|
||||
return new self(sprintf('Class "%s" can not be serialized.', $classname));
|
||||
}
|
||||
|
||||
}
|
52
src/Phpml/ModelManager.php
Normal file
52
src/Phpml/ModelManager.php
Normal file
@ -0,0 +1,52 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Phpml;
|
||||
|
||||
use Phpml\Estimator;
|
||||
use Phpml\Exception\SerializeException;
|
||||
use Phpml\Exception\FileException;
|
||||
|
||||
class ModelManager
|
||||
{
|
||||
/**
|
||||
* @param Estimator $object
|
||||
* @param string $filepath
|
||||
*/
|
||||
public function saveToFile(Estimator $object, string $filepath)
|
||||
{
|
||||
if (!file_exists($filepath) || !is_writable(dirname($filepath))) {
|
||||
throw FileException::cantSaveFile(basename($filepath));
|
||||
}
|
||||
|
||||
$serialized = serialize($object);
|
||||
if (empty($serialized)) {
|
||||
throw SerializeException::cantSerialize(get_type($object));
|
||||
}
|
||||
|
||||
$result = file_put_contents($filepath, $serialized, LOCK_EX);
|
||||
if ($result === false) {
|
||||
throw FileException::cantSaveFile(basename($filepath));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $filepath
|
||||
*
|
||||
* @return Estimator
|
||||
*/
|
||||
public function restoreFromFile(string $filepath)
|
||||
{
|
||||
if (!file_exists($filepath) || !is_readable($filepath)) {
|
||||
throw FileException::cantOpenFile(basename($filepath));
|
||||
}
|
||||
|
||||
$object = unserialize(file_get_contents($filepath));
|
||||
if ($object === false) {
|
||||
throw SerializeException::cantUnserialize(basename($filepath));
|
||||
}
|
||||
|
||||
return $object;
|
||||
}
|
||||
}
|
@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace tests\Classification;
|
||||
|
||||
use Phpml\Association\Apriori;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
class AprioriTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -184,4 +185,22 @@ class AprioriTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
return $method->invokeArgs($object, $params);
|
||||
}
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
$classifier = new Apriori(0.5, 0.5);
|
||||
$classifier->train($this->sampleGreek, []);
|
||||
|
||||
$testSamples = [['alpha', 'epsilon'], ['beta', 'theta']];
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'apriori-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
||||
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($classifier, $restoredClassifier);
|
||||
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace tests\Classification;
|
||||
|
||||
use Phpml\Classification\DecisionTree;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
class DecisionTreeTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -55,6 +56,26 @@ class DecisionTreeTest extends \PHPUnit_Framework_TestCase
|
||||
return $classifier;
|
||||
}
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
list($data, $targets) = $this->getData($this->data);
|
||||
$classifier = new DecisionTree(5);
|
||||
$classifier->train($data, $targets);
|
||||
|
||||
$testSamples = [['sunny', 78, 72, 'false'], ['overcast', 60, 60, 'false']];
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'decision-tree-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
||||
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($classifier, $restoredClassifier);
|
||||
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||
|
||||
}
|
||||
|
||||
public function testTreeDepth()
|
||||
{
|
||||
list($data, $targets) = $this->getData($this->data);
|
||||
|
@ -6,6 +6,7 @@ namespace tests\Classification;
|
||||
|
||||
use Phpml\Classification\KNearestNeighbors;
|
||||
use Phpml\Math\Distance\Chebyshev;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -57,4 +58,27 @@ class KNearestNeighborsTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
$this->assertEquals($testLabels, $predicted);
|
||||
}
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$testSamples = [[3, 2], [5, 1], [4, 3], [4, -5], [2, 3], [1, 2], [1, 5], [3, 10]];
|
||||
$testLabels = ['b', 'b', 'b', 'b', 'a', 'a', 'a', 'a'];
|
||||
|
||||
// Using non-default constructor parameters to check that their values are restored.
|
||||
$classifier = new KNearestNeighbors(3, new Chebyshev());
|
||||
$classifier->train($trainSamples, $trainLabels);
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'knearest-neighbors-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
||||
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($classifier, $restoredClassifier);
|
||||
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace tests\Classification;
|
||||
|
||||
use Phpml\Classification\NaiveBayes;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
class NaiveBayesTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -45,4 +46,27 @@ class NaiveBayesTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals($testLabels, $classifier->predict($testSamples));
|
||||
|
||||
}
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
$trainSamples = [[5, 1, 1], [1, 5, 1], [1, 1, 5]];
|
||||
$trainLabels = ['a', 'b', 'c'];
|
||||
|
||||
$testSamples = [[3, 1, 1], [5, 1, 1], [4, 3, 8]];
|
||||
$testLabels = ['a', 'a', 'c'];
|
||||
|
||||
$classifier = new NaiveBayes();
|
||||
$classifier->train($trainSamples, $trainLabels);
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'naive-bayes-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
||||
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($classifier, $restoredClassifier);
|
||||
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ namespace tests\Classification;
|
||||
|
||||
use Phpml\Classification\SVC;
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
class SVCTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -42,4 +43,26 @@ class SVCTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
$this->assertEquals($testLabels, $predictions);
|
||||
}
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
$trainSamples = [[1, 3], [1, 4], [2, 4], [3, 1], [4, 1], [4, 2]];
|
||||
$trainLabels = ['a', 'a', 'a', 'b', 'b', 'b'];
|
||||
|
||||
$testSamples = [[3, 2], [5, 1], [4, 3]];
|
||||
$testLabels = ['b', 'b', 'b'];
|
||||
|
||||
$classifier = new SVC(Kernel::LINEAR, $cost = 1000);
|
||||
$classifier->train($trainSamples, $trainLabels);
|
||||
$predicted = $classifier->predict($testSamples);
|
||||
|
||||
$filename = 'svc-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($classifier, $filepath);
|
||||
|
||||
$restoredClassifier = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($classifier, $restoredClassifier);
|
||||
$this->assertEquals($predicted, $restoredClassifier->predict($testSamples));
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ use Phpml\Dataset\CsvDataset;
|
||||
class CsvDatasetTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\DatasetException
|
||||
* @expectedException \Phpml\Exception\FileException
|
||||
*/
|
||||
public function testThrowExceptionOnMissingFile()
|
||||
{
|
||||
|
47
tests/Phpml/ModelManagerTest.php
Normal file
47
tests/Phpml/ModelManagerTest.php
Normal file
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace tests;
|
||||
|
||||
use Phpml\ModelManager;
|
||||
use Phpml\Regression\LeastSquares;
|
||||
|
||||
class ModelManagerTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
$filename = 'test-save-to-file-'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
|
||||
$obj = new LeastSquares();
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($obj, $filepath);
|
||||
|
||||
$restored = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($obj, $restored);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\FileException
|
||||
*/
|
||||
public function testSaveToWrongFile()
|
||||
{
|
||||
$filepath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'unexisting';
|
||||
|
||||
$obj = new LeastSquares();
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($obj, $filepath);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Phpml\Exception\FileException
|
||||
*/
|
||||
public function testRestoreWrongFile()
|
||||
{
|
||||
$filepath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . 'unexisting';
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->restoreFromFile($filepath);
|
||||
}
|
||||
}
|
@ -5,6 +5,7 @@ declare(strict_types=1);
|
||||
namespace tests\Regression;
|
||||
|
||||
use Phpml\Regression\LeastSquares;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
class LeastSquaresTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -65,4 +66,28 @@ class LeastSquaresTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertEquals(4094.82, $regression->predict([60000, 1996]), '', $delta);
|
||||
$this->assertEquals(5711.40, $regression->predict([60000, 2000]), '', $delta);
|
||||
}
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
//https://www.easycalculation.com/analytical/learn-least-square-regression.php
|
||||
$samples = [[60], [61], [62], [63], [65]];
|
||||
$targets = [[3.1], [3.6], [3.8], [4], [4.1]];
|
||||
|
||||
$regression = new LeastSquares();
|
||||
$regression->train($samples, $targets);
|
||||
|
||||
//http://www.stat.wmich.edu/s216/book/node127.html
|
||||
$testSamples = [[9300], [10565], [15000]];
|
||||
$predicted = $regression->predict($testSamples);
|
||||
|
||||
$filename = 'least-squares-test-'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($regression, $filepath);
|
||||
|
||||
$restoredRegression = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($regression, $restoredRegression);
|
||||
$this->assertEquals($predicted, $restoredRegression->predict($testSamples));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ namespace tests\Regression;
|
||||
|
||||
use Phpml\Regression\SVR;
|
||||
use Phpml\SupportVectorMachine\Kernel;
|
||||
use Phpml\ModelManager;
|
||||
|
||||
class SVRTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
@ -34,4 +35,27 @@ class SVRTest extends \PHPUnit_Framework_TestCase
|
||||
|
||||
$this->assertEquals([4109.82, 4112.28], $regression->predict([[60000, 1996], [60000, 2000]]), '', $delta);
|
||||
}
|
||||
|
||||
public function testSaveAndRestore()
|
||||
{
|
||||
|
||||
$samples = [[60], [61], [62], [63], [65]];
|
||||
$targets = [3.1, 3.6, 3.8, 4, 4.1];
|
||||
|
||||
$regression = new SVR(Kernel::LINEAR);
|
||||
$regression->train($samples, $targets);
|
||||
|
||||
$testSamples = [64];
|
||||
$predicted = $regression->predict($testSamples);
|
||||
|
||||
$filename = 'svr-test'.rand(100, 999).'-'.uniqid();
|
||||
$filepath = tempnam(sys_get_temp_dir(), $filename);
|
||||
$modelManager = new ModelManager();
|
||||
$modelManager->saveToFile($regression, $filepath);
|
||||
|
||||
$restoredRegression = $modelManager->restoreFromFile($filepath);
|
||||
$this->assertEquals($regression, $restoredRegression);
|
||||
$this->assertEquals($predicted, $restoredRegression->predict($testSamples));
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user