[PHP] Add EregToPregMatchRector

This commit is contained in:
Tomas Votruba 2018-10-08 13:19:10 +08:00
parent ae308689f3
commit 39c9c6982a
16 changed files with 596 additions and 0 deletions

View File

@ -96,6 +96,8 @@ parameters:
Symplify\CodingStandard\Sniffs\CleanCode\CognitiveComplexitySniff:
# tough logic
- 'src/Rector/MethodBody/NormalToFluentRector.php'
# copied 3rd party logic
- 'packages/Php/src/EregToPcreTransformer.php'
SlevomatCodingStandard\Sniffs\Functions\UnusedParameterSniff.UnusedParameter:
# enforced by interface

View File

@ -0,0 +1,244 @@
<?php declare(strict_types=1);
namespace Rector\Php;
use Nette\Utils\Strings;
use Rector\Php\Exception\InvalidEregException;
/**
* @author Kang Seonghoon <public+ere2pcre@mearie.org>
* @source https://gist.github.com/lifthrasiir/704754/7e486f43e62fd1c9d3669330c251f8ca4a59a3f8
*/
final class EregToPcreTransformer
{
/**
* @var string
*/
private $pcreDelimiter;
/**
* Change this via services configuratoin in rector.yml if you need it
* Single type is chosen to prevent every regular with different delimiter.
*/
public function __construct(string $pcreDelimiter = '#')
{
$this->pcreDelimiter = $pcreDelimiter;
}
public function transform(string $ereg, bool $isCaseInsensitive): string
{
if (! Strings::contains($ereg, $this->pcreDelimiter)) {
return $this->ere2pcre($ereg, $isCaseInsensitive);
}
// fallback
return $this->ere2pcre(preg_quote($ereg, '#'), $isCaseInsensitive);
}
// converts the ERE $s into the PCRE $r. triggers error on any invalid input.
public function ere2pcre(string $s, bool $ignorecase): string
{
static $cache = [], $icache = [];
if ($ignorecase) {
if (isset($icache[$s])) {
return $icache[$s];
}
} else {
if (isset($cache[$s])) {
return $cache[$s];
}
}
[$r, $i] = $this->_ere2pcre($s, 0);
if ($i !== strlen($s)) {
throw new InvalidEregException('unescaped metacharacter ")"');
}
if ($ignorecase) {
return $icache[$s] = '#' . $r . '#mi';
}
return $cache[$s] = '#' . $r . '#m';
}
private function _ere2pcre_escape(string $c): string
{
if ($c === "\0") {
throw new InvalidEregException('a literal null byte in the regex');
} elseif (strpos('\^$.[]|()?*+{}-/', $c) !== false) {
return '\\' . $c;
}
return $c;
}
/**
* Recursively converts ERE into PCRE, starting at the position $i.
*
* @return mixed[]
*/
private function _ere2pcre(string $s, int $i): array
{
$r = [''];
$rr = 0;
$l = strlen($s);
while ($i < $l) {
// atom
$c = $s[$i];
if ($c === '(') {
if ($i + 1 < $l && $s[$i + 1] === ')') { // special case
$r[$rr] .= '()';
++$i;
} else {
[$t, $ii] = $this->_ere2pcre($s, $i + 1);
if ($ii >= $l || $s[$ii] !== ')') {
throw new InvalidEregException('"(" does not have a matching ")"');
}
$r[$rr] .= '(' . $t . ')';
$i = $ii;
}
} elseif ($c === '[') {
++$i;
$cls = '';
if ($i < $l && $s[$i] === '^') {
$cls .= '^';
++$i;
}
if ($i >= $l) {
throw new InvalidEregException('"[" does not have a matching "]"');
}
$start = true;
do {
if ($s[$i] === '[' &&
$i + 1 < $l && strpos('.=:', $s[$i + 1]) !== false) {
$ii = strpos($s, ']', $i);
if ($ii === false) {
throw new InvalidEregException('"[" does not have a matching ' . '"]"');
}
$ccls = substr($s, $i + 1, $ii - ($i + 1));
$cclsmap = [
':alnum:' => '[:alnum:]',
':alpha:' => '[:alpha:]',
':blank:' => '[:blank:]',
':cntrl:' => '[:cntrl:]',
':digit:' => '\d',
':graph:' => '[:graph:]',
':lower:' => '[:lower:]',
':print:' => '[:print:]',
':punct:' => '[:punct:]',
':space:' => '\013\s', // should include VT
':upper:' => '[:upper:]',
':xdigit:' => '[:xdigit:]',
];
if (! isset($cclsmap[$ccls])) {
throw new InvalidEregException(
'an invalid or unsupported ' .
'character class [' . $ccls . ']'
);
}
$cls .= $cclsmap[$ccls];
$i = $ii + 1;
} else {
$a = $s[$i++];
if ($a === '-' && ! $start && ! ($i < $l && $s[$i] === ']')) {
throw new InvalidEregException(
'"-" is invalid for the start ' .
'character in the brackets'
);
}
if ($i < $l && $s[$i] === '-') {
++$i;
$b = $s[$i++];
if ($b === ']') {
$cls .= $this->_ere2pcre_escape($a) . '\-';
break;
} elseif (ord($a) > ord($b)) {
throw new InvalidEregException(sprintf('an invalid character range %d-%d"', $a, $b));
}
$cls .= $this->_ere2pcre_escape($a) . '-' . $this->_ere2pcre_escape($b);
} else {
$cls .= $this->_ere2pcre_escape($a);
}
}
$start = false;
} while ($i < $l && $s[$i] !== ']');
if ($i >= $l) {
throw new InvalidEregException('"[" does not have a matching "]"');
}
$r[$rr] .= '[' . $cls . ']';
} elseif ($c === ')') {
break;
} elseif ($c === '*' || $c === '+' || $c === '?') {
throw new InvalidEregException('unescaped metacharacter "' . $c . '"');
} elseif ($c === '{') {
if ($i + 1 < $l && strpos('0123456789', $s[$i + 1]) !== false) {
$r[$rr] .= '\{';
} else {
throw new InvalidEregException('unescaped metacharacter "' . $c . '"');
}
} elseif ($c === '.') {
$r[$rr] .= $c;
} elseif ($c === '^' || $c === '$') {
$r[$rr] .= $c;
++$i;
continue;
} elseif ($c === '|') {
if ($r[$rr] === '') {
throw new InvalidEregException('empty branch');
}
$r[] = '';
++$rr;
++$i;
continue;
} elseif ($c === '\\') {
if (++$i >= $l) {
throw new InvalidEregException('an invalid escape sequence at the end');
}
$r[$rr] .= $this->_ere2pcre_escape($s[$i]);
} else { // including ] and } which are allowed as a literal character
$r[$rr] .= $this->_ere2pcre_escape($c);
}
++$i;
if ($i >= $l) {
break;
}
// piece after the atom (only ONE of them is possible)
$c = $s[$i];
if ($c === '*' || $c === '+' || $c === '?') {
$r[$rr] .= $c;
++$i;
} elseif ($c === '{') {
$ii = strpos($s, '}', $i);
if ($ii === false) {
throw new InvalidEregException('"{" does not have a matching "}"');
}
$bound = substr($s, $i + 1, $ii - ($i + 1));
if (! preg_match(
'/^([0-9]|[1-9][0-9]|1[0-9][0-9]|
2[0-4][0-9]|25[0-5])
(,([0-9]|[1-9][0-9]|1[0-9][0-9]|
2[0-4][0-9]|25[0-5])?)?$/x',
$bound,
$m
)) {
throw new InvalidEregException('an invalid bound');
}
if (isset($m[3])) {
if ($m[1] > $m[3]) {
throw new InvalidEregException('an invalid bound');
}
$r[$rr] .= '{' . $m[1] . ',' . $m[3] . '}';
} elseif (isset($m[2])) {
$r[$rr] .= '{' . $m[1] . ',}';
} else {
$r[$rr] .= '{' . $m[1] . '}';
}
$i = $ii + 1;
}
}
if ($r[$rr] === '') {
throw new InvalidEregException('empty regular expression or branch');
}
return [implode('|', $r), $i];
}
}

View File

@ -0,0 +1,9 @@
<?php declare(strict_types=1);
namespace Rector\Php\Exception;
use Exception;
final class InvalidEregException extends Exception
{
}

View File

@ -0,0 +1,175 @@
<?php declare(strict_types=1);
namespace Rector\Php\Rector\FuncCall;
use Nette\Utils\Strings;
use PhpParser\Node;
use PhpParser\Node\Arg;
use PhpParser\Node\Expr\ArrayDimFetch;
use PhpParser\Node\Expr\Assign;
use PhpParser\Node\Expr\BinaryOp\Concat;
use PhpParser\Node\Expr\ConstFetch;
use PhpParser\Node\Expr\FuncCall;
use PhpParser\Node\Expr\Ternary;
use PhpParser\Node\Expr\Variable;
use PhpParser\Node\Name;
use PhpParser\Node\Scalar\LNumber;
use PhpParser\Node\Scalar\String_;
use Rector\NodeTypeResolver\Node\Attribute;
use Rector\Php\EregToPcreTransformer;
use Rector\Rector\AbstractRector;
use Rector\RectorDefinition\CodeSample;
use Rector\RectorDefinition\RectorDefinition;
/**
* @see http://php.net/reference.pcre.pattern.posix
* @see https://stackoverflow.com/a/17033826/1348344
* @see https://docstore.mik.ua/orelly/webprog/pcook/ch13_02.htm
*/
final class EregToPregMatchRector extends AbstractRector
{
/**
* @var string[]
*/
private $oldNamesToNewOnes = [
'ereg' => 'preg_match',
'eregi' => 'preg_match',
'ereg_replace' => 'preg_replace',
'eregi_replace' => 'preg_replace',
'split' => 'preg_split',
'spliti' => 'preg_split',
];
/**
* @var EregToPcreTransformer
*/
private $eregToPcreTransformer;
public function __construct(EregToPcreTransformer $eregToPcreTransformer)
{
$this->eregToPcreTransformer = $eregToPcreTransformer;
}
public function getDefinition(): RectorDefinition
{
return new RectorDefinition(
'Changes ereg*() to preg*() calls',
[new CodeSample('ereg("hi")', 'preg_match("#hi#");')]
);
}
/**
* @return string[]
*/
public function getNodeTypes(): array
{
return [FuncCall::class];
}
/**
* @param FuncCall $funcCallNode
*/
public function refactor(Node $funcCallNode): ?Node
{
$functionName = (string) $funcCallNode->name;
if (! isset($this->oldNamesToNewOnes[$functionName])) {
return $funcCallNode;
}
$patternNode = $funcCallNode->args[0]->value;
if ($patternNode instanceof String_) {
$this->processStringPattern($funcCallNode, $patternNode, $functionName);
} elseif ($patternNode instanceof Variable) {
$this->processVariablePattern($funcCallNode, $patternNode, $functionName);
}
$this->processSplitLimitArgument($funcCallNode, $functionName);
$funcCallNode->name = new Name($this->oldNamesToNewOnes[$functionName]);
// ereg|eregi 3rd argument return value fix
if (in_array($functionName, ['ereg', 'eregi'], true) && isset($funcCallNode->args[2])) {
$parentNode = $funcCallNode->getAttribute(Attribute::PARENT_NODE);
if ($parentNode instanceof Assign) {
return $this->createTernaryWithStrlenOfFirstMatch($funcCallNode);
}
}
return $funcCallNode;
}
private function processStringPattern(FuncCall $funcCallNode, String_ $patternNode, string $functionName): void
{
$pattern = $patternNode->value;
$pattern = $this->eregToPcreTransformer->transform($pattern, $this->isCaseInsensitiveFunction($functionName));
$funcCallNode->args[0]->value = new String_($pattern);
}
private function processVariablePattern(FuncCall $funcCallNode, Variable $patternNode, string $functionName): void
{
$pregQuotePatternNode = new FuncCall(new Name('preg_quote'), [
new Arg($patternNode),
new Arg(new String_('#')),
]);
$startConcat = new Concat(new String_('#'), $pregQuotePatternNode);
$endDelimiter = $this->isCaseInsensitiveFunction($functionName) ? '#mi' : '#m';
$concat = new Concat($startConcat, new String_($endDelimiter));
$funcCallNode->args[0]->value = $concat;
}
private function isCaseInsensitiveFunction(string $functionName): bool
{
if (Strings::contains($functionName, 'eregi')) {
return true;
}
if (Strings::contains($functionName, 'spliti')) {
return true;
}
return false;
}
/**
* Equivalent of:
* split(' ', 'hey Tom', 0);
*
* preg_split('# #', 'hey Tom', 1);
*/
private function processSplitLimitArgument(FuncCall $funcCallNode, string $functionName): void
{
if (! Strings::startsWith($functionName, 'split')) {
return;
}
// 3rd argument - $limit, 0 → 1
if (! isset($funcCallNode->args[2])) {
return;
}
if (! $funcCallNode->args[2]->value instanceof LNumber) {
return;
}
/** @var LNumber $limitNumberNode */
$limitNumberNode = $funcCallNode->args[2]->value;
if ($limitNumberNode->value !== 0) {
return;
}
$limitNumberNode->value = 1;
}
private function createTernaryWithStrlenOfFirstMatch(FuncCall $funcCallNode): Ternary
{
$arrayDimFetch = new ArrayDimFetch($funcCallNode->args[2]->value, new LNumber(0));
$strlenFuncCall = new FuncCall(new Name('strlen'), [new Arg($arrayDimFetch)]);
return new Ternary($funcCallNode, $strlenFuncCall, new ConstFetch(new Name('false')));
}
}

View File

@ -0,0 +1,72 @@
<?php declare(strict_types=1);
namespace Rector\Php\Tests;
use Iterator;
use PHPUnit\Framework\TestCase;
use Rector\Php\EregToPcreTransformer;
final class EregToPcreTransformerTest extends TestCase
{
/**
* @var EregToPcreTransformer
*/
private $eregToPcreTransformer;
protected function setUp(): void
{
$this->eregToPcreTransformer = new EregToPcreTransformer();
}
/**
* @dataProvider provideDataDroping()
* @dataProvider provideDataCaseSensitive()
*/
public function testCaseSensitive(string $ereg, string $expectedPcre): void
{
$pcre = $this->eregToPcreTransformer->transform($ereg, false);
$this->assertSame($expectedPcre, $pcre);
}
public function provideDataCaseSensitive(): Iterator
{
yield ['hi', '#hi#m'];
}
/**
* @dataProvider provideDataCaseInsensitive()
*/
public function testCaseInsensitive(string $ereg, string $expectedPcre): void
{
$pcre = $this->eregToPcreTransformer->transform($ereg, true);
$this->assertSame($expectedPcre, $pcre);
}
public function provideDataCaseInsensitive(): Iterator
{
yield ['hi', '#hi#mi'];
}
public function provideDataDroping(): Iterator
{
yield ['mearie\.org', '#mearie\.org#m'];
yield ['mearie[.,]org', '#mearie[\.,]org#m'];
yield ['[a-z]+[.,][a-z]+', '#[a-z]+[\.,][a-z]+#m'];
yield ['^[a-z]+[.,][a-z]+$', '#^[a-z]+[\.,][a-z]+$#m'];
yield ['^[a-z]+[.,][a-z]{3,}$', '#^[a-z]+[\.,][a-z]{3,}$#m'];
yield ['a|b|(c|d)|e', '#a|b|(c|d)|e#m'];
yield ['a|b|()|c', '#a|b|()|c#m'];
yield ['[[:alnum:][:punct:]]', '#[[:alnum:][:punct:]]#m'];
yield ['[]-z]', '#[\]-z]#m'];
yield ['[[a]]', '#[\[a]\]#m'];
yield ['[---]', '#[\--\-]#m'];
yield ['[a\z]', '#[a\\\z]#m'];
yield ['[^^]', '#[^\^]#m'];
yield ['^$^$^$^$', '#^$^$^$^$#m'];
yield ['\([^>]*\"?[^)]*\)', '#\([^>]*"?[^\)]*\)#m'];
yield [
'^(http(s?):\/\/|ftp:\/\/)*([[:alpha:]][-[:alnum:]]*[[:alnum:]])(\.[[:alpha:]][-[:alnum:]]*[[:alpha:]])+(/[[:alpha:]][-[:alnum:]]*[[:alnum:]])*(\/?)(/[[:alpha:]][-[:alnum:]]*\.[[:alpha:]]{3,5})?(\?([[:alnum:]][-_%[:alnum:]]*=[-_%[:alnum:]]+)(&([[:alnum:]][-_%[:alnum:]]*=[-_%[:alnum:]]+))*)?$',
'#^(http(s?):\/\/|ftp:\/\/)*([[:alpha:]][\-[:alnum:]]*[[:alnum:]])(\.[[:alpha:]][\-[:alnum:]]*[[:alpha:]])+(\/[[:alpha:]][\-[:alnum:]]*[[:alnum:]])*(\/?)(\/[[:alpha:]][\-[:alnum:]]*\.[[:alpha:]]{3,5})?(\?([[:alnum:]][\-_%[:alnum:]]*=[\-_%[:alnum:]]+)(&([[:alnum:]][\-_%[:alnum:]]*=[\-_%[:alnum:]]+))*)?$#m',
];
}
}

View File

@ -0,0 +1,8 @@
<?php declare(strict_types=1);
preg_match('#hi#m', 'hi, she said');
preg_match('#hi#mi', 'hi, she said');
$pattern = 'hi';
preg_match('#' . preg_quote($pattern, '#') . '#m', 'hi, she said');

View File

@ -0,0 +1,5 @@
<?php declare(strict_types=1);
preg_replace('#hi#m', '\\1', 'hi, she said');
preg_replace('#hi#mi', '\\1', 'hi, she said');

View File

@ -0,0 +1,7 @@
<?php declare(strict_types=1);
preg_split('#hi#m', 'hi, she said');
preg_split('#hi#m', 'hi, she said', 1);
preg_split('#hi#mi', 'hi, she said', 1);

View File

@ -0,0 +1,7 @@
<?php declare(strict_types=1);
$result = preg_match('#hitom#m', '...hitom...', $matches) ? strlen($matches[0]) : false;
var_dump($result); // 5
// no return value
preg_match('#hitom#m', '...hitom...', $matches);

View File

@ -0,0 +1,36 @@
<?php declare(strict_types=1);
namespace Rector\Php\Tests\Rector\FuncCall\EregToPregMatchRector;
use Iterator;
use Rector\Testing\PHPUnit\AbstractRectorTestCase;
/**
* @covers \Rector\Php\Rector\FuncCall\EregToPregMatchRector
*
* @see https://stackoverflow.com/a/35355700/1348344
*/
final class EregToPregMatchRectorTest extends AbstractRectorTestCase
{
/**
* @dataProvider provideWrongToFixedFiles()
*/
public function test(string $wrong, string $fixed): void
{
$this->doTestFileMatchesExpectedContent($wrong, $fixed);
}
public function provideWrongToFixedFiles(): Iterator
{
yield [__DIR__ . '/Wrong/wrong.php.inc', __DIR__ . '/Correct/correct.php.inc'];
yield [__DIR__ . '/Wrong/wrong2.php.inc', __DIR__ . '/Correct/correct2.php.inc'];
yield [__DIR__ . '/Wrong/wrong3.php.inc', __DIR__ . '/Correct/correct3.php.inc'];
// see https://3v4l.org/TcqvH for the return different values
yield [__DIR__ . '/Wrong/wrong4.php.inc', __DIR__ . '/Correct/correct4.php.inc'];
}
protected function provideConfig(): string
{
return __DIR__ . '/config.yml';
}
}

View File

@ -0,0 +1,8 @@
<?php declare(strict_types=1);
ereg('hi', 'hi, she said');
eregi('hi', 'hi, she said');
$pattern = 'hi';
ereg($pattern, 'hi, she said');

View File

@ -0,0 +1,5 @@
<?php declare(strict_types=1);
ereg_replace('hi', '\\1', 'hi, she said');
eregi_replace('hi', '\\1', 'hi, she said');

View File

@ -0,0 +1,7 @@
<?php declare(strict_types=1);
split('hi', 'hi, she said');
split('hi', 'hi, she said', 0);
spliti('hi', 'hi, she said', 1);

View File

@ -0,0 +1,7 @@
<?php declare(strict_types=1);
$result = ereg('hitom', '...hitom...', $matches);
var_dump($result); // 5
// no return value
ereg('hitom', '...hitom...', $matches);

View File

@ -0,0 +1,2 @@
services:
Rector\Php\Rector\FuncCall\EregToPregMatchRector: ~

View File

@ -14,6 +14,8 @@ parameters:
- '*tests/**/Wrong/*'
- '*tests/**/Correct/*'
- '*tests/**/Source/*'
# intetionally original
- 'packages/Php/src/EregToPcreTransformer.php'
ignoreErrors:
# missuse of interface and class