fix problem with token count vectorizer array order

This commit is contained in:
Arkadiusz Kondas 2016-07-14 13:25:11 +02:00
parent 7c0767c15a
commit 9f140d5b6f
2 changed files with 9 additions and 7 deletions

View File

@ -116,6 +116,8 @@ class TokenCountVectorizer implements Transformer
}
}
ksort($counts);
$sample = $counts;
}

View File

@ -40,10 +40,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
$vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
$vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);
}
public function testTransformationWithMinimumDocumentTokenCountFrequency()
@ -74,10 +74,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5);
$vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
$vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);
// word at least once in all samples
$samples = [
@ -96,7 +96,7 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer->fit($samples);
$vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);
}
public function testTransformationWithStopWords()
@ -131,9 +131,9 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords);
$vectorizer->fit($samples);
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
$vectorizer->transform($samples);
$this->assertEquals($tokensCounts, $samples);
$this->assertSame($tokensCounts, $samples);
}
}