mirror of
https://github.com/Llewellynvdm/php-ml.git
synced 2025-01-26 16:48:25 +00:00
22 lines
344 B
PHP
22 lines
344 B
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace Phpml\Tokenization;
|
|
|
|
class WordTokenizer implements Tokenizer
|
|
{
|
|
/**
|
|
* @param string $text
|
|
*
|
|
* @return array
|
|
*/
|
|
public function tokenize(string $text): array
|
|
{
|
|
$tokens = [];
|
|
preg_match_all('/\w\w+/u', $text, $tokens);
|
|
|
|
return $tokens[0];
|
|
}
|
|
}
|