mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-02-04 21:18:33 +00:00
22 lines
347 B
PHP
22 lines
347 B
PHP
<?php
|
|
|
|
declare (strict_types = 1);
|
|
|
|
namespace Phpml\Tokenization;
|
|
|
|
class WordTokenizer implements Tokenizer
|
|
{
|
|
/**
|
|
* @param string $text
|
|
*
|
|
* @return array
|
|
*/
|
|
public function tokenize(string $text): array
|
|
{
|
|
$tokens = [];
|
|
preg_match_all('/\w\w+/u', $text, $tokens);
|
|
|
|
return $tokens[0];
|
|
}
|
|
}
|