mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2024-11-04 20:47:53 +00:00
fix problem with token count vectorizer array order
This commit is contained in:
parent
7c0767c15a
commit
9f140d5b6f
@ -116,6 +116,8 @@ class TokenCountVectorizer implements Transformer
|
||||
}
|
||||
}
|
||||
|
||||
ksort($counts);
|
||||
|
||||
$sample = $counts;
|
||||
}
|
||||
|
||||
|
@ -40,10 +40,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
||||
|
||||
$vectorizer->fit($samples);
|
||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
||||
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
|
||||
|
||||
$vectorizer->transform($samples);
|
||||
$this->assertEquals($tokensCounts, $samples);
|
||||
$this->assertSame($tokensCounts, $samples);
|
||||
}
|
||||
|
||||
public function testTransformationWithMinimumDocumentTokenCountFrequency()
|
||||
@ -74,10 +74,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5);
|
||||
|
||||
$vectorizer->fit($samples);
|
||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
||||
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
|
||||
|
||||
$vectorizer->transform($samples);
|
||||
$this->assertEquals($tokensCounts, $samples);
|
||||
$this->assertSame($tokensCounts, $samples);
|
||||
|
||||
// word at least once in all samples
|
||||
$samples = [
|
||||
@ -96,7 +96,7 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
||||
$vectorizer->fit($samples);
|
||||
$vectorizer->transform($samples);
|
||||
|
||||
$this->assertEquals($tokensCounts, $samples);
|
||||
$this->assertSame($tokensCounts, $samples);
|
||||
}
|
||||
|
||||
public function testTransformationWithStopWords()
|
||||
@ -131,9 +131,9 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords);
|
||||
|
||||
$vectorizer->fit($samples);
|
||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
||||
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
|
||||
|
||||
$vectorizer->transform($samples);
|
||||
$this->assertEquals($tokensCounts, $samples);
|
||||
$this->assertSame($tokensCounts, $samples);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user