mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-01-23 15:18:24 +00:00
fix problem with token count vectorizer array order
This commit is contained in:
parent
7c0767c15a
commit
9f140d5b6f
@ -116,6 +116,8 @@ class TokenCountVectorizer implements Transformer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ksort($counts);
|
||||||
|
|
||||||
$sample = $counts;
|
$sample = $counts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,10 +40,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
|||||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer());
|
||||||
|
|
||||||
$vectorizer->fit($samples);
|
$vectorizer->fit($samples);
|
||||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
|
||||||
|
|
||||||
$vectorizer->transform($samples);
|
$vectorizer->transform($samples);
|
||||||
$this->assertEquals($tokensCounts, $samples);
|
$this->assertSame($tokensCounts, $samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testTransformationWithMinimumDocumentTokenCountFrequency()
|
public function testTransformationWithMinimumDocumentTokenCountFrequency()
|
||||||
@ -74,10 +74,10 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
|||||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5);
|
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), null, 0.5);
|
||||||
|
|
||||||
$vectorizer->fit($samples);
|
$vectorizer->fit($samples);
|
||||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
|
||||||
|
|
||||||
$vectorizer->transform($samples);
|
$vectorizer->transform($samples);
|
||||||
$this->assertEquals($tokensCounts, $samples);
|
$this->assertSame($tokensCounts, $samples);
|
||||||
|
|
||||||
// word at least once in all samples
|
// word at least once in all samples
|
||||||
$samples = [
|
$samples = [
|
||||||
@ -96,7 +96,7 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
|||||||
$vectorizer->fit($samples);
|
$vectorizer->fit($samples);
|
||||||
$vectorizer->transform($samples);
|
$vectorizer->transform($samples);
|
||||||
|
|
||||||
$this->assertEquals($tokensCounts, $samples);
|
$this->assertSame($tokensCounts, $samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function testTransformationWithStopWords()
|
public function testTransformationWithStopWords()
|
||||||
@ -131,9 +131,9 @@ class TokenCountVectorizerTest extends \PHPUnit_Framework_TestCase
|
|||||||
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords);
|
$vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer(), $stopWords);
|
||||||
|
|
||||||
$vectorizer->fit($samples);
|
$vectorizer->fit($samples);
|
||||||
$this->assertEquals($vocabulary, $vectorizer->getVocabulary());
|
$this->assertSame($vocabulary, $vectorizer->getVocabulary());
|
||||||
|
|
||||||
$vectorizer->transform($samples);
|
$vectorizer->transform($samples);
|
||||||
$this->assertEquals($tokensCounts, $samples);
|
$this->assertSame($tokensCounts, $samples);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user