fix problem with token count vectorizer array order

This commit is contained in:
Arkadiusz Kondas 2016-07-14 13:25:11 +02:00
parent 7c0767c15a
commit 9f140d5b6f
2 changed files with 9 additions and 7 deletions

View File

@ -116,6 +116,8 @@ class TokenCountVectorizer implements Transformer
} }
} }
ksort($counts);
$sample = $counts; $sample = $counts;
} }

View File

@ -40,10 +40,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer()); $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
$vectorizer->fit($samples); $vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary()); $this->assertSame($vocabulary, $vectorizer->getVocabulary());
$vectorizer->transform($samples); $vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples); $this->assertSame($tokensCounts, $samples);
} }
public function testTransformationWithMinimumDocumentTokenCountFrequency() public function testTransformationWithMinimumDocumentTokenCountFrequency()
@ -74,10 +74,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5); $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5);
$vectorizer->fit($samples); $vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary()); $this->assertSame($vocabulary, $vectorizer->getVocabulary());
$vectorizer->transform($samples); $vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples); $this->assertSame($tokensCounts, $samples);
// word at least once in all samples // word at least once in all samples
$samples = [ $samples = [
@ -96,7 +96,7 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer->fit($samples); $vectorizer->fit($samples);
$vectorizer->transform($samples); $vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples); $this->assertSame($tokensCounts, $samples);
} }
public function testTransformationWithStopWords() public function testTransformationWithStopWords()
@ -131,9 +131,9 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords); $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords);
$vectorizer->fit($samples); $vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary()); $this->assertSame($vocabulary, $vectorizer->getVocabulary());
$vectorizer->transform($samples); $vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples); $this->assertSame($tokensCounts, $samples);
} }
} }