From 9f140d5b6ff611b36a33af620d156b9f5c317cda Mon Sep 17 00:00:00 2001 From: Arkadiusz Kondas Date: Thu, 14 Jul 2016 13:25:11 +0200 Subject: [PATCH] fix problem with token count vectorizer array order --- .../FeatureExtraction/TokenCountVectorizer.php | 2 ++ .../FeatureExtraction/TokenCountVectorizerTest.php | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Phpml/FeatureExtraction/TokenCountVectorizer.php b/src/Phpml/FeatureExtraction/TokenCountVectorizer.php index 3ec6af1..56d63e2 100644 --- a/src/Phpml/FeatureExtraction/TokenCountVectorizer.php +++ b/src/Phpml/FeatureExtraction/TokenCountVectorizer.php @@ -116,6 +116,8 @@ class TokenCountVectorizer implements Transformer } } + ksort($counts); + $sample = $counts; } diff --git a/tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php b/tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php index b18db60..22ff1a9 100644 --- a/tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php +++ b/tests/Phpml/FeatureExtraction/TokenCountVectorizerTest.php @@ -40,10 +40,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer()); $vectorizer->fit($samples); - $this->assertEquals($vocabulary, $vectorizer->getVocabulary()); + $this->assertSame($vocabulary, $vectorizer->getVocabulary()); $vectorizer->transform($samples); - $this->assertEquals($tokensCounts, $samples); + $this->assertSame($tokensCounts, $samples); } public function testTransformationWithMinimumDocumentTokenCountFrequency() @@ -74,10 +74,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5); $vectorizer->fit($samples); - $this->assertEquals($vocabulary, $vectorizer->getVocabulary()); + $this->assertSame($vocabulary, $vectorizer->getVocabulary()); $vectorizer->transform($samples); - $this->assertEquals($tokensCounts, $samples); + $this->assertSame($tokensCounts, $samples); // word at least once in all samples $samples = [ @@ -96,7 +96,7 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase $vectorizer->fit($samples); $vectorizer->transform($samples); - $this->assertEquals($tokensCounts, $samples); + $this->assertSame($tokensCounts, $samples); } public function testTransformationWithStopWords() @@ -131,9 +131,9 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords); $vectorizer->fit($samples); - $this->assertEquals($vocabulary, $vectorizer->getVocabulary()); + $this->assertSame($vocabulary, $vectorizer->getVocabulary()); $vectorizer->transform($samples); - $this->assertEquals($tokensCounts, $samples); + $this->assertSame($tokensCounts, $samples); } }