Skip to content

Commit

Permalink
Strings::matchAll(): added option 'lazy'
Browse files Browse the repository at this point in the history
  • Loading branch information
dg committed Aug 7, 2024
1 parent 31f4684 commit 736c567
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 9 deletions.
32 changes: 26 additions & 6 deletions src/Utils/Strings.php
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ public static function match(
/**
* Searches the string for all occurrences matching the regular expression and
* returns an array of arrays containing the found expression and each subexpression.
* @return ($lazy is true ? \Generator<int, array> : array[])
*/
public static function matchAll(
string $subject,
Expand All @@ -599,21 +600,41 @@ public static function matchAll(
bool $unmatchedAsNull = false,
bool $patternOrder = false,
bool $utf8 = false,
): array
bool $lazy = false,
): array|\Generator
{
$flags = is_int($captureOffset) // back compatibility
? $captureOffset
: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0);

if ($utf8) {
$offset = strlen(self::substring($subject, 0, $offset));
$pattern .= 'u';
}

if ($lazy) {
$flags = PREG_OFFSET_CAPTURE | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
return (function () use ($utf8, $captureOffset, $flags, $subject, $pattern, $offset) {
$counter = 0;
while (
$offset <= strlen($subject) - ($counter ? 1 : 0)
&& self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])
) {
$offset = $m[0][1] + max(1, strlen($m[0][0]));
if (!$captureOffset) {
$m = array_map(fn($item) => $item[0], $m);
} elseif ($utf8) {
$m = self::bytesToChars($subject, [$m])[0];
}
yield $counter++ => $m;
}
})();
}

if ($offset > strlen($subject)) {
return [];
}

$flags = is_int($captureOffset) // back compatibility
? $captureOffset
: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0);

self::pcre('preg_match_all', [
$pattern, $subject, &$m,
($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
Expand All @@ -622,7 +643,6 @@ public static function matchAll(
return $utf8 && $captureOffset
? self::bytesToChars($subject, $m)
: $m;

}


Expand Down
5 changes: 3 additions & 2 deletions tests/Utils/Strings.match().phpt
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,6 @@ Assert::same([['k', 7]], Strings::match('žluťoučký kůň', '#[e-l]+#u', capt


// right edge
Assert::null(Strings::match('hello world!', '', offset: 50));
Assert::null(Strings::match('', '', offset: 1));
Assert::same([''], Strings::match('he', '#(?<=e)#', offset: 2));
Assert::same(null, Strings::match('he', '#(?<=x)#', offset: 2));
Assert::same(null, Strings::match('he', '##', offset: 3));
10 changes: 9 additions & 1 deletion tests/Utils/Strings.matchAll().phpt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ require __DIR__ . '/../bootstrap.php';
Assert::same([], Strings::matchAll('hello world!', '#([E-L])+#'));


// sentinel
Assert::same([
[''], [''], [''],
], Strings::matchAll('he', '##'));


// capturing
Assert::same([
['hell', 'l'],
Expand Down Expand Up @@ -81,4 +87,6 @@ Assert::same([['e', null]], Strings::matchAll('hello world!', '#e(x)*#', unmatch


// right edge
Assert::same([], Strings::matchAll('hello world!', '', offset: 50));
Assert::same([['']], Strings::matchAll('he', '#(?<=e)#', offset: 2));
Assert::same([], Strings::matchAll('he', '#(?<=x)#', offset: 2));
Assert::same([], Strings::matchAll('he', '##', offset: 3));
89 changes: 89 additions & 0 deletions tests/Utils/Strings.matchAll.lazy().phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?php

/**
* Test: Nette\Utils\Strings::matchAll()
*/

declare(strict_types=1);

use Nette\Utils\Strings;
use Tester\Assert;

require __DIR__ . '/../bootstrap.php';


// not matched
Assert::type(Generator::class, Strings::matchAll('hello world!', '#([E-L])+#', lazy: true));
Assert::same(0, iterator_count(Strings::matchAll('hello world!', '#([E-L])+#', lazy: true)));


// sentinel
Assert::same(
[['h'], ['e']],
iterator_to_array(Strings::matchAll('he', '#.#', lazy: true)),
);

Assert::same(
[[''], ['']],
iterator_to_array(Strings::matchAll('he', '##', lazy: true)),
);


// right edge
Assert::same(
[['']],
iterator_to_array(Strings::matchAll('he', '#(?<=e)#', offset: 2, lazy: true)),
);

Assert::same(
[],
iterator_to_array(Strings::matchAll('he', '#(?<=x)#', offset: 2, lazy: true)),
);

Assert::same(
[],
iterator_to_array(Strings::matchAll('he', '##', offset: 3, lazy: true)),
);


// capturing
Assert::same([
['hell', 'l'],
['l', 'l'],
], iterator_to_array(Strings::matchAll('hello world!', '#([e-l])+#', lazy: true)));

Assert::same([
['hell'],
['l'],
], iterator_to_array(Strings::matchAll('hello world!', '#[e-l]+#', lazy: true)));


// options
Assert::same([
[['lu', 2], ['l', 2], ['u', 3]],
[['ou', 6], ['o', 6], ['u', 7]],
[['k', 10], ['k', 10], ['', 11]],
[['k', 14], ['k', 14], ['', 15]],
], iterator_to_array(Strings::matchAll('žluťoučký kůň!', '#([a-z])([a-z]*)#u', captureOffset: true, lazy: true)));

Assert::same([
[['lu', 1], ['l', 1], ['u', 2]],
[['ou', 4], ['o', 4], ['u', 5]],
[['k', 7], ['k', 7], ['', 8]],
[['k', 10], ['k', 10], ['', 11]],
], iterator_to_array(Strings::matchAll('žluťoučký kůň!', '#([a-z])([a-z]*)#u', captureOffset: true, utf8: true, lazy: true)));

Assert::same(
[['l'], ['k'], ['k']],
iterator_to_array(Strings::matchAll('žluťoučký kůň', '#[e-l]+#u', offset: 2, lazy: true)),
);

Assert::same(
[['k'], ['k']],
iterator_to_array(Strings::matchAll('žluťoučký kůň', '#[e-l]+#u', offset: 2, utf8: true, lazy: true)),
);

Assert::same(
[['e', null]],
iterator_to_array(Strings::matchAll('hello world!', '#e(x)*#', unmatchedAsNull: true, lazy: true)),
);

0 comments on commit 736c567

Please sign in to comment.