mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-01-10 09:02:15 +00:00
create docs for tf-idf transformer
This commit is contained in:
parent
ba8927459c
commit
7c0767c15a
@ -9,7 +9,7 @@ This changelog references the relevant changes done in PHP-ML library.
|
|||||||
|
|
||||||
* 0.1.1 (2016-07-12)
|
* 0.1.1 (2016-07-12)
|
||||||
* feature [Cross Validation] Stratified Random Split - equal distribution for targets in split
|
* feature [Cross Validation] Stratified Random Split - equal distribution for targets in split
|
||||||
* feature [General] Documentation - add missing pages and fix links
|
* feature [General] Documentation - add missing pages (Pipeline, ConfusionMatrix and TfIdfTransformer) and fix links
|
||||||
|
|
||||||
* 0.1.0 (2016-07-08)
|
* 0.1.0 (2016-07-08)
|
||||||
* first develop release
|
* first develop release
|
||||||
|
@ -59,6 +59,7 @@ composer require php-ai/php-ml
|
|||||||
* [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/)
|
* [Imputation missing values](http://php-ml.readthedocs.io/en/latest/machine-learning/preprocessing/imputation-missing-values/)
|
||||||
* Feature Extraction
|
* Feature Extraction
|
||||||
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
|
* [Token Count Vectorizer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/token-count-vectorizer/)
|
||||||
|
* [Tf-idf Transformer](http://php-ml.readthedocs.io/en/latest/machine-learning/feature-extraction/tf-idf-transformer/)
|
||||||
* Datasets
|
* Datasets
|
||||||
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
|
* [CSV](http://php-ml.readthedocs.io/en/latest/machine-learning/datasets/csv-dataset/)
|
||||||
* Ready to use:
|
* Ready to use:
|
||||||
|
@ -59,6 +59,7 @@ composer require php-ai/php-ml
|
|||||||
* [Imputation missing values](machine-learning/preprocessing/imputation-missing-values/)
|
* [Imputation missing values](machine-learning/preprocessing/imputation-missing-values/)
|
||||||
* Feature Extraction
|
* Feature Extraction
|
||||||
* [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)
|
* [Token Count Vectorizer](machine-learning/feature-extraction/token-count-vectorizer/)
|
||||||
|
* [Tf-idf Transformer](machine-learning/feature-extraction/tf-idf-transformer/)
|
||||||
* Datasets
|
* Datasets
|
||||||
* [CSV](machine-learning/datasets/csv-dataset/)
|
* [CSV](machine-learning/datasets/csv-dataset/)
|
||||||
* Ready to use:
|
* Ready to use:
|
||||||
|
@ -0,0 +1,42 @@
|
|||||||
|
# Tf-idf Transformer
|
||||||
|
|
||||||
|
Tf–idf, short for term frequency–inverse document frequency, is a numerical statistic that is intended to reflect how important a word is to a document in a collection or corpus.
|
||||||
|
|
||||||
|
### Constructor Parameters
|
||||||
|
|
||||||
|
* $samples (array) - samples for fit tf-idf model
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\FeatureExtraction\TfIdfTransformer;
|
||||||
|
|
||||||
|
$samples = [
|
||||||
|
[1, 2, 4],
|
||||||
|
[0, 2, 1]
|
||||||
|
];
|
||||||
|
|
||||||
|
$transformer = new TfIdfTransformer($samples);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Transformation
|
||||||
|
|
||||||
|
To transform a collection of text samples use `transform` method. Example:
|
||||||
|
|
||||||
|
```
|
||||||
|
use Phpml\FeatureExtraction\TfIdfTransformer;
|
||||||
|
|
||||||
|
$samples = [
|
||||||
|
[0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 0, 5 => 0],
|
||||||
|
[0 => 1, 1 => 1, 2 => 0, 3 => 0, 4 => 2, 5 => 3],
|
||||||
|
];
|
||||||
|
|
||||||
|
$transformer = new TfIdfTransformer($samples);
|
||||||
|
$transformer->transform($samples);
|
||||||
|
|
||||||
|
/*
|
||||||
|
$samples = [
|
||||||
|
[0 => 0, 1 => 0, 2 => 0.602, 3 => 0.301, 4 => 0, 5 => 0],
|
||||||
|
[0 => 0, 1 => 0, 2 => 0, 3 => 0, 4 => 0.602, 5 => 0.903],
|
||||||
|
];
|
||||||
|
*/
|
||||||
|
|
||||||
|
```
|
@ -25,6 +25,7 @@ pages:
|
|||||||
- Imputation missing values: machine-learning/preprocessing/imputation-missing-values.md
|
- Imputation missing values: machine-learning/preprocessing/imputation-missing-values.md
|
||||||
- Feature Extraction:
|
- Feature Extraction:
|
||||||
- Token Count Vectorizer: machine-learning/feature-extraction/token-count-vectorizer.md
|
- Token Count Vectorizer: machine-learning/feature-extraction/token-count-vectorizer.md
|
||||||
|
- Tf-idf Transformer: machine-learning/feature-extraction/tf-idf-transformer.md
|
||||||
- Datasets:
|
- Datasets:
|
||||||
- Array Dataset: machine-learning/datasets/array-dataset.md
|
- Array Dataset: machine-learning/datasets/array-dataset.md
|
||||||
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
- CSV Dataset: machine-learning/datasets/csv-dataset.md
|
||||||
|
@ -10,7 +10,7 @@ class TfIdfTransformerTest extends \PHPUnit_Framework_TestCase
|
|||||||
{
|
{
|
||||||
public function testTfIdfTransformation()
|
public function testTfIdfTransformation()
|
||||||
{
|
{
|
||||||
//https://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
// https://en.wikipedia.org/wiki/Tf-idf
|
||||||
|
|
||||||
$samples = [
|
$samples = [
|
||||||
[0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 0, 5 => 0],
|
[0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 0, 5 => 0],
|
||||||
|
Loading…
Reference in New Issue
Block a user