From f7537c049af8ecf77f317b1ed29af49791731eca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Monlla=C3=B3?= Date: Thu, 16 Nov 2017 21:40:11 +0100 Subject: [PATCH] documentation add tokenizer->fit required to build the dictionary (#155) --- .../feature-extraction/token-count-vectorizer.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/machine-learning/feature-extraction/token-count-vectorizer.md b/docs/machine-learning/feature-extraction/token-count-vectorizer.md index 83c6aaa..c4ede68 100644 --- a/docs/machine-learning/feature-extraction/token-count-vectorizer.md +++ b/docs/machine-learning/feature-extraction/token-count-vectorizer.md @@ -26,13 +26,18 @@ $samples = [ ]; $vectorizer = new TokenCountVectorizer(new WhitespaceTokenizer()); -$vectorizer->transform($samples) -// return $vector = [ + +// Build the dictionary. +$vectorizer->fit($samples); + +// Transform the provided text samples into a vectorized list. +$vectorizer->transform($samples); +// return $samples = [ // [0 => 1, 1 => 1, 2 => 2, 3 => 1, 4 => 1], // [5 => 1, 6 => 1, 1 => 1, 2 => 1], // [5 => 1, 7 => 2, 8 => 1, 9 => 1], //]; - + ``` ### Vocabulary