From 51695333b7f1c94144eaee595e700556b6e6e454 Mon Sep 17 00:00:00 2001 From: Camille Hodoul Date: Sun, 5 Jul 2020 18:33:05 +0200 Subject: [PATCH] benchmark against native functions and improve readme --- README.md | 77 ++++++++++++++++++++++--- benchmarks/MapBench.php | 46 +++++++++++++++ benchmarks/PipeFilterMapReduceBench.php | 65 +++++++++++++++++++++ benchmarks/PipeMapFilterTakeBench.php | 61 ++++++++++++++++++++ composer.json | 6 ++ phpbench.json | 3 + 6 files changed, 251 insertions(+), 7 deletions(-) create mode 100644 benchmarks/MapBench.php create mode 100644 benchmarks/PipeFilterMapReduceBench.php create mode 100644 benchmarks/PipeMapFilterTakeBench.php create mode 100644 phpbench.json diff --git a/README.md b/README.md index 4920146..c297446 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,32 @@ [![Coverage Status][badge-coverage]][coverage] [![Total Downloads][badge-downloads]][downloads] -Lazy list processing functions. +LazyLists is a toolbox for iterating over a `Traversable` and transforming it. This is your typical `map`, `filter`, `pipe` library, but with a twist: you only ever iterate at most once. -This project adheres to a [Contributor Code of Conduct][conduct]. By -participating in this project and its community, you are expected to uphold this -code. +Using `\LazyLists\pipe()`, you can compose `filter`, `map`, `reduce`, `each`, `until`(...) into a single function that will iterate over your input only once (even less than once if you use `take` or `until`), thus, "Lazy". + +For example, let's say we want to compute something about the 50 first products ordered in the Technology category of an online store: +```php +$getUsefulInsight = pipe( + map($getProductsInOrder), + flatten(1), + filter($isTechnologyRelated), + take(50), + reduce($computeInsight, $initialValue) +); +$insight = $getUsefulInsight($orders); +``` +Even if `$orders` is very large, `$getUsefulInsight` will only step through it until `$isTechnologyRelated` has let 50 items through (or `$orders` runs out before that), then stop the iteration early and return the final result of `$computeInsight`. +`$getProductsInOrder` and `$isTechnologyRelated` will be called only as long as they are needed. +This is particularly useful if the cost of iteration is high. + + +Alternatively, you can just use the functions directly: `$output = \LazyLists\map($transformation, $input)`. + +You can use these features on arrays or `Traversable`s such as iterators. + +See the examples below. ## Installation @@ -26,7 +46,7 @@ composer require camille-hdl/lazy-lists ``` -## Documentation +## Usage You can use the functions directly on arrays or `Traversable`s @@ -58,7 +78,10 @@ use LazyLists\flatten; use LazyLists\take; use LazyLists\until; -$pipeline = pipe( +/** + * Compose steps together into a single function + */ +$computeFinalResult = pipe( flatten(1), filter($myPredicate), map($myTransformation), @@ -67,7 +90,7 @@ $pipeline = pipe( reduce($myAggregator, 0) ); // returns an array -$result = $pipeline($myArrayOrIterator); +$result = $computeFinalResult($myArrayOrIterator); // returns an iterator $filterIterator = iterate( @@ -89,8 +112,48 @@ foreach ($reduceIterator([1, 5, 10]) as $reduction) { // 1, 5 ``` +### Gotchas + +* When iterating over arrays, keys are *never* kept, by design. +* The return value of `pipe(...)($input)` depends on the last function in the composition, according to the following heuristic, which is intuitive to me but maybe not to you: + * if the last function is one that returns a single value, such as `reduce()`, the return value will be this value; + * if the last function is one that transforms an input set into an output set (such as `map` or `flatten`), the return value will be the output set (as a flat array); + * if the last function is one that restricts the output set, such as `filter`, `take` or `until`, the return value will be the output set (as a flat array); +* `iterate(...)($input)` behaves in the same way, but returns an iterator instead of an array. + +### Extending LazyLists + +* You can create your own function usable with `pipe()` and `iterate()` by having it return a `\Lazy\Transducer\TransducerInterface` +* You can create your own composition function (to use instead of `pipe()` or `iterate()`). It should be relatively easy to implement something that can take a stream as input for example. Look-up the source code of `pipe()` (its very short!) and `\Lazy\LazyWorker` (its less short!). + +## Performance considerations + +LazyLists is optimized for minimizing the number of iterations while (hopefully) allowing for API elegance when transforming an input set. + +Thus, the lower the cost of iteration, the less incentive there is to use this library. + +If your program is only iterating over fast, in-memory data structures, performance will almost always be worse than using the built-in `array_*` functions. That said, if you use `\LazyLists\pipe()` to compose your functions, the performance gap reduces as the number of iterations increases. + +You can see for yourself by running `composer run phpbench`. + +However, using `\LazyLists\pipe()` will probably be beneficial in terms of performance if you either: + +* use I/O during iteration, or +* use `\LazyLists\take()` and `\LazyLists\until()` to restrict your output set. + +## Inspiration + +This library attempts a subset of what transducers can do. There are transducer libraries out there, but I hope to bring a simpler API. + +* [Watch a talk on transducers](https://www.youtube.com/watch?v=6mTbuzafcII) +* https://github.com/mtdowling/transducers.php + ## Contributing +This project adheres to a [Contributor Code of Conduct][conduct]. By +participating in this project and its community, you are expected to uphold this +code. + Contributions are welcome! Please read [CONTRIBUTING][] for details. diff --git a/benchmarks/MapBench.php b/benchmarks/MapBench.php new file mode 100644 index 0000000..82a50a0 --- /dev/null +++ b/benchmarks/MapBench.php @@ -0,0 +1,46 @@ +items = self::getItems(); + } + + private static function getItems() + { + $numberOfItems = 10000; + $items = []; + for($i = 0; $i < $numberOfItems; $i++) { + $object = new stdClass; + $object->number = $i; + $items[] = $object; + } + return $items; + } + /** + * @Revs(1000) + * @Iterations(5) + */ + public function benchNative() + { + $items = $this->items; + $numbers = \array_map(static function($item) { + return $item->number; + }, $items); + } + /** + * @Revs(1000) + * @Iterations(5) + */ + public function benchLazy() + { + $items = $this->items; + $numbers = \LazyLists\map(static function($item) { + return $item->number; + }, $items); + } +} \ No newline at end of file diff --git a/benchmarks/PipeFilterMapReduceBench.php b/benchmarks/PipeFilterMapReduceBench.php new file mode 100644 index 0000000..06dd848 --- /dev/null +++ b/benchmarks/PipeFilterMapReduceBench.php @@ -0,0 +1,65 @@ +pipe = \LazyLists\pipe( + \LazyLists\filter(static function($item) { + return $item->number % 5 === 0; + }), + \LazyLists\map(static function($item) { + return $item->number; + }), + \LazyLists\reduce(static function($sum, $number) { + return $sum + $number; + }, 0) + ); + } + public function init() + { + $this->items = self::getItems(); + } + + private static function getItems() + { + $numberOfItems = 10000; + $items = []; + for($i = 0; $i < $numberOfItems; $i++) { + $object = new stdClass; + $object->number = $i; + $items[] = $object; + } + return $items; + } + /** + * @Revs(400) + * @Iterations(5) + */ + public function benchNative() + { + $items = $this->items; + $divisibleBy5 = \array_filter($items, static function($item) { + return $item->number % 5 === 0; + }); + $numbers = \array_map(static function($item) { + return $item->number; + }, $divisibleBy5); + $sum = \array_reduce($numbers, static function($sum, $number) { + return $sum + $number; + }, 0); + } + /** + * @Revs(400) + * @Iterations(5) + */ + public function benchLazy() + { + $items = $this->items; + $pipe = $this->pipe; + $sum = $pipe($items); + } +} \ No newline at end of file diff --git a/benchmarks/PipeMapFilterTakeBench.php b/benchmarks/PipeMapFilterTakeBench.php new file mode 100644 index 0000000..d367576 --- /dev/null +++ b/benchmarks/PipeMapFilterTakeBench.php @@ -0,0 +1,61 @@ +pipe = \LazyLists\pipe( + \LazyLists\map(static function($item) { + return $item->number; + }), + \LazyLists\filter(static function($number) { + return $number % 5 === 0; + }), + \LazyLists\take(50) + ); + } + public function init() + { + $this->items = self::getItems(); + } + + private static function getItems() + { + $numberOfItems = 10000; + $items = []; + for($i = 0; $i < $numberOfItems; $i++) { + $object = new stdClass; + $object->number = $i; + $items[] = $object; + } + return $items; + } + /** + * @Revs(400) + * @Iterations(5) + */ + public function benchNative() + { + $items = $this->items; + $numbers = \array_map(static function($item) { + return $item->number; + }, $items); + $divisibleBy5 = \array_filter($numbers, static function($number) { + return $number % 5 === 0; + }); + $first50 = \array_slice($divisibleBy5, 0, 50); + } + /** + * @Revs(400) + * @Iterations(5) + */ + public function benchLazy() + { + $items = $this->items; + $pipe = $this->pipe; + $first50 = $pipe($items); + } +} \ No newline at end of file diff --git a/composer.json b/composer.json index 8a662cd..54d64e2 100644 --- a/composer.json +++ b/composer.json @@ -24,6 +24,7 @@ "require-dev": { "jakub-onderka/php-parallel-lint": "^1", "mockery/mockery": "^1", + "phpbench/phpbench": "^0.17.1", "phpstan/phpstan": "^0.11", "phpstan/phpstan-mockery": "^0.11", "phpunit/phpunit": "^8", @@ -59,6 +60,11 @@ "phpstan analyse src -c phpstan.neon --level max --no-progress --memory-limit=256M", "phpstan analyse tests -c phpstan.neon --level 4 --no-progress --memory-limit=256M" ], + "phpbench": [ + "phpbench run benchmarks/MapBench.php --report=aggregate", + "phpbench run benchmarks/PipeFilterMapReduceBench.php --report=aggregate", + "phpbench run benchmarks/PipeMapFilterTakeBench.php --report=aggregate" + ], "phpunit": "phpunit --verbose --colors=always", "phpunit-ci": "phpunit --verbose --coverage-clover build/logs/clover.xml", "phpunit-coverage": "phpunit --verbose --colors=always --coverage-html build/coverage", diff --git a/phpbench.json b/phpbench.json new file mode 100644 index 0000000..466f9e5 --- /dev/null +++ b/phpbench.json @@ -0,0 +1,3 @@ +{ + "bootstrap": "vendor/autoload.php" +} \ No newline at end of file