diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml new file mode 100644 index 0000000..30e5f77 --- /dev/null +++ b/.github/workflows/erlang.yml @@ -0,0 +1,65 @@ +name: CI + +on: + workflow_dispatch: + inputs: + erlang: + description: Erlang OTP version to test + default: '26.0.2.0-slim' + required: false + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + checks: + runs-on: [self-hosted, ubuntu-22.04-medium] + strategy: + matrix: + erlang: [26] + container: + image: erlang:${{ matrix.erlang }} + + steps: + - uses: actions/checkout@v4 + + - name: Cache + uses: actions/cache@v2 + env: + cache-name: rebar3 + with: + path: | + ~/.cache/rebar3 + _build + key: ci-${{runner.os}}-${{env.cache-name}}-erlang_${{matrix.erlang}}-rebar3-${{hashFiles('rebar.lock')}} + restore-keys: | + ci-${{runner.os}}-${{env.cache-name}}-erlang_${{matrix.erlang}}-rebar3 + ci-${{runner.os}}-${{env.cache-name}}-erlang_${{matrix.erlang}} + + - name: Get GitHub token + id: get_token + uses: getsentry/action-github-app-token@v2 + with: + private_key: ${{ secrets.PRIVATE_REPO_APP_PEM_RAW }} + app_id: ${{ secrets.PRIVATE_REPO_APP_ID }} + + - name: Setup private repo access + run: | + git config --global url."https://oauth2:${GITHUB_TOKEN}@github.com/adgear".insteadOf "https://github.com/adgear" + git config --global --add url."https://oauth2:${GITHUB_TOKEN}@github.com/".insteadOf "ssh://git@github.com:" + git config --global --add url."https://oauth2:${GITHUB_TOKEN}@github.com/".insteadOf "ssh://git@github.com/" + git config --global --add url."https://oauth2:${GITHUB_TOKEN}@github.com/".insteadOf "git@github.com:" + git config --global --add url."https://oauth2:${GITHUB_TOKEN}@github.com/".insteadOf "git@github.com/" + env: + GITHUB_TOKEN: ${{ steps.get_token.outputs.token }} + + - name: Compile + run: | + ./rebar3 compile + + - name: Check + run: | + ./rebar3 xref + ./rebar3 dialyzer + ./rebar3 eunit diff --git a/.gitignore b/.gitignore index f543359..24a51f1 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,10 @@ tags c_src/*.o c_src/compile_commands.json be-tree/* + +.idea +cmake-build-debug +**/*.txt +*.beam +be-tree*/ +saved diff --git a/Erlang_Scheduler_friendly_processing_with_BE_Tree.md b/Erlang_Scheduler_friendly_processing_with_BE_Tree.md new file mode 100644 index 0000000..147ec47 --- /dev/null +++ b/Erlang_Scheduler_friendly_processing_with_BE_Tree.md @@ -0,0 +1,466 @@ +# Erlang Scheduler-friendly Processing with BE-Tree + +[1] emphasizes that "it is of vital importance that a native function returns relatively fast". + +`erl_betree:betree_search/2` function might take long time to complete. + +This document describes an approach to represent the `erl_betree:betree_search/2` functionality +with shorter execution time calls to native functions. + +# Contents + +1. [Introduction](https://github.com/adgear/erl-be-tree/blob/PEDSP-2941/Erlang_Scheduler_friendly_processing_with_BE_Tree.md#introduction) + +2. [Implementation](https://github.com/adgear/erl-be-tree/blob/PEDSP-2941/Erlang_Scheduler_friendly_processing_with_BE_Tree.md#implementation) + +3. [Benchmarks](https://github.com/adgear/erl-be-tree/blob/PEDSP-2941/Erlang_Scheduler_friendly_processing_with_BE_Tree.md#benchmarks) + +4. [Resources](https://github.com/adgear/erl-be-tree/blob/PEDSP-2941/Erlang_Scheduler_friendly_processing_with_BE_Tree.md#resources) + + +# Introduction + +The implementation follows recommendations from [1], Section "Yielding NIF", +and project `jiffy, JSON NIFs for Erlang`, [2]. + +There are two essential goals to be achieved +by representing the `erl_betree:betree_search/2` functionality +with a series of shorter NIF calls: + +1. Provide information to the Eralng Runtime System (ERTS) + how many `reductions` were consumed during the NIF call; +2. Give the Erlang Scheduler an opportunity to schedule another process for execution + if the NIF call consumed all allocated `reductions`. + +The details about the Scheduler and Reductions are in [3] and [4]. + +# Implementation + +## Shorter NIF calls + +Currently `erl_betree:betree_search/2` performs matching of an `event` and `boolean expressions` +and returns list of matched `boolean expressions Ids`. + +[5], [9.7-10. Boolean Expressions: Evaluation](https://github.com/adgear/beval?tab=readme-ov-file#9-boolean-expressions-evaluation) +describes that the matching process consists of two steps: + +1. Select `boolean expression candidates` for evaluation. On this step some `boolean expressions` are rejected without evaluation; +2. Evaluate the selected `boolean expression candidates`. +``` + erl_betree: +------------+ +-----------+ + betree_search/2--->| |-->| |-->--+ select + (Betree, Event) | erl_betree | | be_tree | | expression + | NIF | | C library |<----+ candidates + | C code | | | + | | | |-->--+ evaluate + | | | | | candidate 1 + | | | |<----+ + | | | | ... + | | | |-->--+ evaluate + | | | | | candidate N + | | | |<----+ + Ids <--------------| |<--| | + +------------+ +-----------+ +``` + +The implementation extends `erl_betree` with the following functions: + +1. `search_iterator/2` - to perform selection `boolean expression candidates` for evaluation; +``` + erl_betree: +------------+ +-----------+ + search_iterator/2->| |-->| |-->--+ select + (Betree, Event) | erl_betree | | be_tree | | expression + | NIF | | C library |<----+ candidates + | C code | | | + Iterator <---------| |<--| | + +------------+ +-----------+ +``` +2. `search_all/1` - to evaluate the selected `boolean expression candidates` all at once; +``` + erl_betree: +------------+ +-----------+ + search_all/1------>| |-->| |-->--+ evaluate + (Iterator) | erl_betree | | be_tree | | candidate 1 + | NIF | | C library |<----+ + | C code | | | ... + | | | |-->--+ evaluate + | | | | | candidate N + | | | |<----+ + Ids <--------------| |<--| | + +------------+ +-----------+ +``` +3. `search_next/1` - to evaluate the selected `boolean expression candidates` one by one. +``` + erl_betree: +------------+ +-----------+ + search_next/1----->| |-->| |-->--+ evaluate + (Iterator) | erl_betree | | be_tree | | candidate + | NIF | | C library |<----+ + `continue` to | C code | | | + indicate a need<---| |<--| | + for another +------------+ +-----------+ + `search_next` + + ... + + erl_betree: +------------+ +-----------+ + search_next/1----->| |-->| |-->--+ evaluate + (Iterator) | erl_betree | | be_tree | | candidate + | NIF | | C library |<----+ + `ok` | C code | | | + Ids <--------------| |<--| | + +------------+ +-----------+ +``` + +## Reductions counting + +### Counting reductions for `search_iterator/2` + +`search_iterator/2` traverses nodes of BE-Tree Index. + +A visit to the BE-Tree Index node counts as `one` reduction. + +### Counting reductions for `search_all/1` and `search_next/1` + +`search_all/1` and `search_next/1` traverse nodes of `boolean expressions`. + +A visit to the `boolean expression` node counts as `one` reduction. + +Also `search_all/1` and `search_next/1` perform `boolean expressions` evaluations. + +Such evaluations can consist of a time-wise short operations, like comparing two numbers, +or a time-consuming operations, like search in a list. + +One time-wise short operation counts as `one` reduction. + +For the `boolean expression` evaluation requiring iterations each iteration counts as `one` reduction. + +# Benchmarks + +I will compare how yielding control back to the Erlang Scheduler +using `search_iterator/search_all` and `search_iterator/search_next` +affects `boolean expressions` evaluation. + +`betree_search` evaluation will be the baseline. + +## Benchmark data + +The following data for benchmarking will be generated: +1. 5,000 randomized `boolean expressions`, each `boolean expression` with 64 parameters; + + 100,000 randomized boolean events, each event with 64 parameters; +2. 5,000 randomized `boolean expressions`, each `boolean expression` with 96 parameters; + + 100,000 randomized boolean events, each event with 96 parameters; +3. 5,000 randomized `boolean expressions`, each `boolean expression` with 128 parameters; + + 100,000 randomized boolean events, each event with 128 parameters. + +Project `beval`, [5], is used for `boolean expressions` and `boolean events` generations. + +Examples: + +To generate 5,000 randomized `boolean expressions`, each `boolean expression` with 128 parameters run +```erlang +> be_tools:write_random_trees_to_file("par", 128, 5_000, "./P_128_Ex_5_000.txt"). +``` +It will +- create 128 boolean parameters `par1`, `par2`, ..., `par128`; +- create 5,000 boolean expressions with parameters `par1`, `par2`, ..., `par128`; +- write the boolean expressions to file `P_128_Ex_5_000.txt`. + +To generate 100,000 randomized boolean events corresponding to 128 parameters run +```erlang +> be_tools:write_random_0_1_events_to_file(128, 100_000, "./P_128_Ev_100_000.txt"). +``` +It will write 100,000 events to file `P_128_Ev_100_000.txt`. + +The following files were used for benchmarks and can be provided upon request (11 MB gzipped data): + +- `P_64_Ex_5_000.txt`, `P_64_Ev_100_000.txt`; +- `P_96_Ex_5_000.txt`, `P_96_Ev_100_000.txt`; +- `P_128_Ex_5_000.txt`, `P_128_Ev_100_000.txt`. + +## Correctness + +Correctness verifies that output from `betree_search` + +is equal to the `search_iterator/search_all` output and `search_iterator/search_next` output. + +`betree_search` evaluation of 100,000 events was recorded using +```erlang +> be_bm:std_output("benchmarks/data/P_128_Ex_5_000.txt", "benchmarks/data/P_128_Ev_100_000.txt", "benchmarks/data/output_std.txt"). +``` +`search_iterator/search_all` evaluation of 100,000 events was recorded using +```erlang +> be_bm:iterated_all_output("benchmarks/data/P_128_Ex_5_000.txt", "benchmarks/data/P_128_Ev_100_000.txt", "benchmarks/data/output_all.txt"). +``` +`search_iterator/search_next` evaluation of 100,000 events was recorded using +```erlang +> be_bm:iterated_next_output("benchmarks/data/P_128_Ex_5_000.txt", "benchmarks/data/P_128_Ev_100_000.txt", "benchmarks/data/output_next.txt"). +``` +Then, `betree_search` evaluation output was compared to `search_iterator/search_all` +```erlang +> be_bm:diff("benchmarks/data/output_std.txt", "benchmarks/data/output_all.txt"). +``` +and to `search_iterator/search_next` +```erlang +> be_bm:diff("benchmarks/data/output_std.txt", "benchmarks/data/output_next.txt"). +``` +No discrepancies were found. The correctness confirmed. + +## Performance statistics collection + +### How event evaluation duration is measured + +#### `betree_search` event evaluation duration +``` + erl_betree: +------------+ +-----------+ +T1 +-- betree_search/2--->| |-->| |-->--+ select + | (Betree, Event) | erl_betree | | be_tree | | expression + | | NIF | | C library |<----+ candidates + | | C code | | | + | | | | |-->--+ evaluate + | | | | | | candidate 1 + | | | | |<----+ + | | | | | ... + | | | | |-->--+ evaluate + | | | | | | candidate N + | | | | |<----+ +T2 +-- Ids <--------------| |<--| | + +------------+ +-----------+ +``` +`betree_search` event evaluation duration = `T2 - T1`. + +#### `search_iterator/search_all` event evaluation duration +``` + erl_betree: +------------+ +-----------+ +T1 +-- search_iterator/2->| |-->| |-->--+ select + | (Betree, Event) | erl_betree | | be_tree | | expression + | | NIF | | C library |<----+ candidates + | | C code | | | + | Iterator <---------| |<--| | + | +------------+ +-----------+ + | + | erl_betree: +------------+ +-----------+ + | search_all/1------>| |-->| |-->--+ evaluate + | (Iterator) | erl_betree | | be_tree | | candidate 1 + | | NIF | | C library |<----+ + | | C code | | | ... + | | | | |-->--+ evaluate + | | | | | | candidate N + | | | | |<----+ +T2 +-- Ids <--------------| |<--| | + +------------+ +-----------+ +``` +`search_iterator/search_all` event evaluation duration = `T2 - T1`. + +#### `search_iterator/search_next` event evaluation duration +``` + erl_betree: +------------+ +-----------+ +T1 +-- search_iterator/2->| |-->| |-->--+ select + | (Betree, Event) | erl_betree | | be_tree | | expression + | | NIF | | C library |<----+ candidates + | | C code | | | + | Iterator <---------| |<--| | + | +------------+ +-----------+ + | + | erl_betree: +------------+ +-----------+ + | search_next/1----->| |-->| |-->--+ evaluate + | (Iterator) | erl_betree | | be_tree | | candidate + | | NIF | | C library |<----+ + | `continue` to | C code | | | + | indicate a need<---| |<--| | + | for another +------------+ +-----------+ + | `search_next` + | + | ... + | + | erl_betree: +------------+ +-----------+ + | search_next/1----->| |-->| |-->--+ evaluate + | (Iterator) | erl_betree | | be_tree | | candidate + | | NIF | | C library |<----+ + | `ok` | C code | | | +T2 +-- Ids <--------------| |<--| | + +------------+ +-----------+ +``` +`search_iterator/search_next` event evaluation duration = `T2 - T1`. + +### How event evaluation durations are collected + +The event evaluation durations are collected by thousands events. + +Thus, for 100,000 events 100 numbers are collected, +where each number is a sum of durations of 1,000 events. + +### How to collect statistics + +#### To collect statistics for `betree_search` run: +```erlang +% 5,000 expressions, each expression with 64 parameters +> be_bm:std_stats("benchmarks/data/P_64_Ex_5_000.txt", "benchmarks/data/P_64_Ev_100_000.txt", "benchmarks/data/stats_P_64_std.txt"). +% 5,000 expressions, each expression with 96 parameters +> be_bm:std_stats("benchmarks/data/P_96_Ex_5_000.txt", "benchmarks/data/P_96_Ev_100_000.txt", "benchmarks/data/stats_P_96_std.txt"). +% 5,000 expressions, each expression with 128 parameters +> be_bm:std_stats("benchmarks/data/P_128_Ex_5_000.txt", "benchmarks/data/P_128_Ev_100_000.txt", "benchmarks/data/stats_P_128_std.txt"). +``` +The statistics outputs are in files `stats_P_64_std.txt`, `stats_P_96_std.txt`, `stats_P_128_std.txt`. + +#### To collect statistics for `search_iterator/search_all` run: +```erlang +% 5,000 expressions, each expression with 64 parameters +> be_bm:iterated_all_stats("benchmarks/data/P_64_Ex_5_000.txt", "benchmarks/data/P_64_Ev_100_000.txt", "benchmarks/data/stats_P_64_all.txt"). +% 5,000 expressions, each expression with 96 parameters +> be_bm:iterated_all_stats("benchmarks/data/P_96_Ex_5_000.txt", "benchmarks/data/P_96_Ev_100_000.txt", "benchmarks/data/stats_P_96_all.txt"). +% 5,000 expressions, each expression with 128 parameters +> be_bm:iterated_all_stats("benchmarks/data/P_128_Ex_5_000.txt", "benchmarks/data/P_128_Ev_100_000.txt", "benchmarks/data/stats_P_128_all.txt"). +``` +The statistics outputs are in files `stats_P_64_all.txt`, `stats_P_96_all.txt`, `stats_P_128_all.txt`. + +#### To collect statistics for `search_iterator/search_next` run: +```erlang +% 5,000 expressions, each expression with 64 parameters +> be_bm:iterated_next_stats("benchmarks/data/P_64_Ex_5_000.txt", "benchmarks/data/P_64_Ev_100_000.txt", "benchmarks/data/stats_P_64_next.txt"). +% 5,000 expressions, each expression with 96 parameters +> be_bm:iterated_next_stats("benchmarks/data/P_96_Ex_5_000.txt", "benchmarks/data/P_96_Ev_100_000.txt", "benchmarks/data/stats_P_96_next.txt"). +% 5,000 expressions, each expression with 128 parameters +> be_bm:iterated_next_stats("benchmarks/data/P_128_Ex_5_000.txt", "benchmarks/data/P_128_Ev_100_000.txt", "benchmarks/data/stats_P_128_next.txt"). +``` +The statistics outputs are in files `stats_P_64_all.txt`, `stats_P_96_all.txt`, `stats_P_128_all.txt`. + +### Compare with baseline + +#### Compare `search_iterator/search_all` with baseline +```erlang +% Compare stats for expressions with 64 parameters +> be_bm:compare_stats("benchmarks/data/stats_P_64_std.txt", "benchmarks/data/stats_P_64_all.txt"). +% Compare stats for expressions with 96 parameters +> be_bm:compare_stats("benchmarks/data/stats_P_96_std.txt", "benchmarks/data/stats_P_96_all.txt"). +% Compare stats for expressions with 128 parameters +> be_bm:compare_stats("benchmarks/data/stats_P_128_std.txt", "benchmarks/data/stats_P_128_all.txt"). +``` + +#### Compare `search_iterator/search_next` with baseline +```erlang +% Compare stats for expressions with 64 parameters +> be_bm:compare_stats("benchmarks/data/stats_P_64_std.txt", "benchmarks/data/stats_P_64_next.txt"). +% Compare stats for expressions with 96 parameters +> be_bm:compare_stats("benchmarks/data/stats_P_96_std.txt", "benchmarks/data/stats_P_96_next.txt"). +% Compare stats for expressions with 128 parameters +> be_bm:compare_stats("benchmarks/data/stats_P_128_std.txt", "benchmarks/data/stats_P_128_next.txt"). +``` + +## Benchmark results + +### Explanations of numbers in Table 1. and Table 2. + +Imagine, that we have a `baseline` that is 100 numbers `10`, i.e. +``` +10, 10, ..., 10 +\ / + -----100----- +``` +Also, we have a sequence of other 100 numbers, will call them `target`, that we want to compare with `baseline` such that + +first 20 of them are 9, next 20 are 10, next 50 are 14, and the rest are 16, i.e. +``` +9, ..., 9, 10, ..., 10, 14, ..., 14, 16, ..., 16 +\ / \ / \ / \ / + --20--- ---20--- ---50--- ---10---- +``` +Then, the subtraction of the baseline from the `target` will give us sequence +``` +-1, ...,-1, 0, ..., 0, 4, ..., 4, 6, ..., 6 +\ / \ / \ / \ / + --20--- ---20--- ---50--- ---10-- +``` +Further, the division by the `baseline` values and expressing the result in percentages will produce +``` +-10%, ...,-10%, 0, ..., 0, 40%, ..., 40%, 60%, ..., 60% +\ / \ / \ / \ / + ----20----- ---20--- ----50---- -----10---- +``` +Such representation gives better picture how the `target` behaves with comparison to the `baseline`. + +In this case we can say that +- the `target` deviates from the `baseline` in the `range` (-10%, 60%); +- the `median` is 40%; +- the `99th` is 60%; +- `better` is 20%, because 20 numbers out 100 are less than 0. + +### Conclusions + +Conclusion for benchmark results on Xeon, Table 1.: + +`search_iterator/search_all` comparison to `betree_search`: +- Overall, penalty to use `search_iterator/search_all` instead of `betree_search` is not bigger than `11%`; +- penalty to use `search_iterator/search_all` for the expressions with 128 parameters is not bigger than `5%`; +- `search_iterator/search_all` produces better results on 85% of events for the expressions with 96 parameters. + +`search_iterator/search_next` comparison to `betree_search`: +- Penalty to use `search_iterator/search_next` instead of `betree_search` is not bigger than `20%`. + +``` +Table 1. +Xeon CPU E5-2630 v4 @ 2.20GHz, Cores: 20, RAM: 128 GB ++=========+=====================+====================+ +| | search_iterator/ | search_iterator/ | +| Params | search_all | search_next | ++---------+---------------------+--------------------+ +| | range: (0%, 11%) | range: (7%, 20%) | +| 64 | median: 2% | median: 10% | +| | 99th: 11% | 99th: 19% | +| | better: 0% | better: 0% | ++---------+---------------------+--------------------+ +| | range: (-8%, 11%) | range: (-5%, 15%) | +| 96 | median: -4% | median: 0% | +| | 99th: 9% | 99th: 13% | +| | better: 85% | better: 38% | ++---------+---------------------+--------------------+ +| | range: (-11%, 5%) | range: (-4%, 16%) | +| 128 | median: 2% | median: 2% | +| | 99th: 5% | 99th: 14% | +| | better: 40% | better: 18% | ++=========+=====================+====================+ +``` + +Conclusion for benchmark results on Intel, Table 2.: + +`search_iterator/search_all` comparison to `betree_search`: +- Overall, penalty to use `search_iterator/search_all` instead of `betree_search` is not bigger than `23%`; +- penalty to use `search_iterator/search_all` for the expressions with 128 parameters is not bigger than `7%`; +- `search_iterator/search_all` produces better results on 94% of events for the expressions with 128 parameters. + +`search_iterator/search_next` comparison to `betree_search`: +- Penalty to use `search_iterator/search_next` instead of `betree_search` is not bigger than `25%`. + +``` +Table 2. +Intel, 2.4 GHz Quad-Core Intel Core i5, RAM 16 GB 2133 MHz LPDDR3 ++=========+=====================+====================+ +| | search_iterator/ | search_iterator/ | +| Params | search_all | search_next | ++---------+---------------------+--------------------+ +| | range: (-11%, 23%) | range: (-5%, 25%) | +| 64 | median: 1% | median: 10% | +| | 99th: 20% | 99th: 22% | +| | better: 31% | better: 4% | ++---------+---------------------+--------------------+ +| | range: (1%, 20%) | range: (3%, 23%) | +| 96 | median: 9% | median: 21% | +| | 99th: 17% | 99th: 23% | +| | better: 0% | better: 0% | ++---------+---------------------+--------------------+ +| | range: (-10%, 7%) | range: (-3%, 20%) | +| 128 | median: -2% | median: 5% | +| | 99th: 1% | 99th: 19% | +| | better: 94% | better: 3% | ++=========+=====================+====================+ +``` + +# Resources + +###### 1. [erl_nif, Section Long-running NIFs](https://www.erlang.org/doc/man/erl_nif) +###### 2. [jiffy, JSON NIFs for Erlang](https://github.com/davisp/jiffy) +###### 3. [The Erlang Runtime System, 11. Scheduling](https://blog.stenmans.org/theBeamBook/#CH-Scheduling) +###### 4. [The Erlang Runtime System, 11.3. Reductions](https://blog.stenmans.org/theBeamBook/#_reductions) +###### 5. [Boolean Expression Evaluator](https://github.com/adgear/beval) +###### 6. [An Approach To Make an Erlang NIF the Erlang Scheduler Friendly](https://adgear.atlassian.net/wiki/spaces/ENG/pages/19922256160/An+Approach+To+Make+an+Erlang+NIF+the+Erlang+Scheduler+Friendly) diff --git a/benchmarks/P_20.tar.gz b/benchmarks/P_20.tar.gz new file mode 100644 index 0000000..a88d3b9 Binary files /dev/null and b/benchmarks/P_20.tar.gz differ diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000..2050dbd --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,56 @@ +# erl-be-tree benchmarks + +From `erl-be-tree` project root directory run +```shell +$ rebar3 shell +``` +To compile the benchmark runner +```erlang +> c("benchmarks/make.erl"). +> make:all(). +[term_reader,term_writer,term_eval,be_loader,be_eval,be_bm_utils,be_bm] +``` + +`be_bm` is a module containing benchmark runner functions. + +Unpack files 'P_20_Ex_300.txt' and 'P_20_Ev_3_000.txt' from `P_20.tar.gz`: +- 'P_20_Ex_300.txt' contains 300 `boolean expressions` with 20 parameters; +- 'P_20_Ev_3_000.txt' contains 3,000 events. + +## To run benchmarks using `erl_betree:betree_search/2`: +```erlang +> be_bm:std("benchmarks/P_20_Ex_300.txt", "benchmarks/P_20_Ev_3_000.txt"). +``` +### save statistics into a file: +```erlang +> be_bm:std_stats("benchmarks/P_20_Ex_300.txt", "benchmarks/P_20_Ev_3_000.txt", "benchmarks/stats_std.txt"). +``` +### save Ids into a file: +```erlang +> be_bm:std_output("benchmarks/P_20_Ex_300.txt", "benchmarks/P_20_Ev_3_000.txt", "benchmarks/output_std.txt"). +``` +## To run benchmarks using `erl_betree:search_iterator/search_all`: +```erlang +> be_bm:iterated_all("benchmarks/P_20_Ex_300.txt", "benchmarks/P_20_Ev_3_000.txt"). +``` +### save statistics into a file: +```erlang +> be_bm:iterated_all_stats("benchmarks/P_20_Ex_300.txt", "benchmarks/P_20_Ev_3_000.txt", "benchmarks/stats_all.txt"). +``` +### save Ids into a file: +```erlang +> be_bm:iterated_all_output("benchmarks/P_20_Ex_300.txt", "benchmarks/P_20_Ev_3_000.txt", "benchmarks/output_all.txt"). +``` + +## To verify that `erl_betree:betree_search/2` and `erl_betree:search_iterator/search_all` produce the same output: +```erlang +> be_bm:diff("benchmarks/output_std.txt", "benchmarks/output_all.txt"). +{3000,[]} +``` +`{3000,[]}` indicates that the results of evaluations of 3,000 events were compared, no differences were found. + + +## To compare statistics for `erl_betree:betree_search/2` and `erl_betree:search_iterator/search_all`: +```erlang +> be_bm:compare_stats("benchmarks/stats_std.txt", "benchmarks/stats_all.txt"). +``` diff --git a/benchmarks/be_bm.erl b/benchmarks/be_bm.erl new file mode 100644 index 0000000..01157dd --- /dev/null +++ b/benchmarks/be_bm.erl @@ -0,0 +1,307 @@ +-module(be_bm). + +-include("be_eval.hrl"). + +%% +%% Boolean Expression benchmark runner +%% + +%% Usage: + +%% API +-export([ + std/2, + std_stats/3, + std_output/3, + iterated_all/2, + iterated_all_stats/3, + iterated_all_output/3, + iterated_next/2, + iterated_next_stats/3, + iterated_next_output/3, + + std_betree_search/2, + iterated_betree_search_all/2, + iterated_betree_search_next/2, + stats_collector/2, + + % compare evaluation outputs + diff/2, + + compare_stats/2 +]). + +std(BetreeFile, EventsFile) -> + {ok, _} = be_eval:start_link(be_eval_std), + Ret = be_eval:run(be_eval_std, "BE-Tree search event", + BetreeFile, EventsFile, + fun std_betree_search/2, fun stats_collector/2), + be_eval:stop(be_eval_std), + Ret. + +std_stats(BetreeFile, EventsFile, StatsFile) -> + {ok, _} = be_eval:start_link(be_eval_std), + Ret = be_eval:run(be_eval_std, "BE-Tree search event", + BetreeFile, + EventsFile, fun std_betree_search/2, _EventEvalOutputFile = undefined, + fun stats_collector/2, StatsFile), + be_eval:stop(be_eval_std), + Ret. + +std_output(BetreeFile, EventsFile, EventEvalOutputFile) -> + {ok, _} = be_eval:start_link(be_eval_std), + Ret = be_eval:run(be_eval_std, "BE-Tree search event", + BetreeFile, + EventsFile, fun std_betree_search/2, EventEvalOutputFile, + fun stats_collector/2, undefined), + be_eval:stop(be_eval_std), + Ret. + +iterated_all(BetreeFile, EventsFile) -> + {ok, _} = be_eval:start_link(be_eval_iterated_all), + Ret = be_eval:run(be_eval_iterated_all, "BE-Tree search event with iterator, all subscriptions", + BetreeFile, EventsFile, + fun iterated_betree_search_all/2, fun stats_collector/2), + be_eval:stop(be_eval_iterated_all), + Ret. + +iterated_all_stats(BetreeFile, EventsFile, StatsFile) -> + {ok, _} = be_eval:start_link(be_eval_iterated_all), + Ret = be_eval:run(be_eval_iterated_all, "BE-Tree search event with iterator, all subscriptions", + BetreeFile, + EventsFile, fun iterated_betree_search_all/2, _EventEvalOutputFile = undefined, + fun stats_collector/2, StatsFile), + be_eval:stop(be_eval_iterated_all), + Ret. + +iterated_all_output(BetreeFile, EventsFile, EventEvalOutputFile) -> + {ok, _} = be_eval:start_link(be_eval_iterated_all), + Ret = be_eval:run(be_eval_iterated_all, "BE-Tree search event with iterator, all subscriptions", + BetreeFile, + EventsFile, fun iterated_betree_search_all/2, EventEvalOutputFile, + fun stats_collector/2, undefined), + be_eval:stop(be_eval_iterated_all), + Ret. + +iterated_next(BetreeFile, EventsFile) -> + {ok, _} = be_eval:start_link(be_eval_iterated_next), + Ret = be_eval:run(be_eval_iterated_next, "BE-Tree search event with iterator, subscriptions one by one", + BetreeFile, EventsFile, + fun iterated_betree_search_next/2, fun stats_collector/2), + be_eval:stop(be_eval_iterated_next), + Ret. + +iterated_next_stats(BetreeFile, EventsFile, StatsFile) -> + {ok, _} = be_eval:start_link(be_eval_iterated_next), + Ret = be_eval:run(be_eval_iterated_next, "BE-Tree search event with iterator, subscriptions one by one", + BetreeFile, + EventsFile, fun iterated_betree_search_next/2, _EventEvalOutputFile = undefined, + fun stats_collector/2, StatsFile), + be_eval:stop(be_eval_iterated_next), + Ret. + +iterated_next_output(BetreeFile, EventsFile, EventEvalOutputFile) -> + {ok, _} = be_eval:start_link(be_eval_iterated_next), + Ret = be_eval:run(be_eval_iterated_next, "BE-Tree search event with iterator, subscriptions one by one", + BetreeFile, + EventsFile, fun iterated_betree_search_next/2, EventEvalOutputFile, + fun stats_collector/2, undefined), + be_eval:stop(be_eval_iterated_next), + Ret. + +std_betree_search(Term, #be_evaluator{betree = Betree} = Context) -> + Event = [list_to_tuple( + [event | [case N of 1 -> true; _ -> false end|| N <- Term]])], + BeginNano = erlang:monotonic_time(nanosecond), + SearchRet = erl_betree:betree_search(Betree, Event), + EndNano = erlang:monotonic_time(nanosecond), + CurrentAllocations = be_bm_utils:betree_allocations(), + DiffNano = EndNano - BeginNano, + case SearchRet of + {ok, Ids} -> {{ok, {Ids, {DiffNano, CurrentAllocations}}}, Context}; + X -> {{error, {betree_search, X}}, Context} + end. + +iterated_betree_search_all(Term, #be_evaluator{betree = Betree} = Context) -> + Event = [list_to_tuple( + [event | [case N of 1 -> true; _ -> false end|| N <- Term]])], + BeginNano = erlang:monotonic_time(nanosecond), + case erl_betree:search_iterator(Betree, Event) of + {ok, {Iterator, _, _}} -> + case erl_betree:search_all(Iterator) of + {ok, Ids} -> + EndNano = erlang:monotonic_time(nanosecond), + CurrentAllocations = be_bm_utils:betree_allocations(), + DiffNano = EndNano - BeginNano, + {{ok, {Ids, {DiffNano, CurrentAllocations}}}, Context}; + X -> {{error, {search_all, X}}, Context} + end; + X -> {{error, {search_iterator, X}}, Context} + end. + +iterated_betree_search_next(Term, #be_evaluator{betree = Betree} = Context) -> + Event = [list_to_tuple( + [event | [case N of 1 -> true; _ -> false end|| N <- Term]])], + BeginNano = erlang:monotonic_time(nanosecond), + case erl_betree:search_iterator(Betree, Event) of + {ok, {Iterator, _, _}} -> + case iterated_betree_search_next(Iterator) of + {ok, Ids} -> + EndNano = erlang:monotonic_time(nanosecond), + CurrentAllocations = be_bm_utils:betree_allocations(), + DiffNano = EndNano - BeginNano, + {{ok, {Ids, {DiffNano, CurrentAllocations}}}, Context}; + {error, Err} -> {{error, {search_all, Err}}, Context} + end; + X -> {{error, {search_iterator, X}}, Context} + end. + +iterated_betree_search_next(Iterator) -> + case erl_betree:search_next(Iterator) of + {continue, _} -> + iterated_betree_search_next(Iterator); + {ok, _Ids} = Ret -> + Ret; + X -> {error, {search_next, X}} + end. + +stats_collector({DiffNano, Allocations}, #be_evaluator_stats{ + info = Info, + index = Index, + snapshot_freq = SnapshotFreq, + snapshot_allocations = SnapshotAllocations, + allocation_diffs = AllocationsDiffs, + snapshot_nano_acc = SnapshotNanoAcc, + nano_diffs = NanoDiffs +} = Stats) -> + Index1 = Index + 1, + CurrentTime = calendar:universal_time_to_local_time(erlang:universaltime()), + SnapshotNanoAcc1 = SnapshotNanoAcc + DiffNano, + Stats1 = + case Index1 rem SnapshotFreq of + 0 -> + NanoPerEvent = SnapshotNanoAcc1 / SnapshotFreq, + MicroPerEvent = NanoPerEvent / 1_000, + io:format(Info ++ ": ~p, ~p microseconds/event~n", [Index1, ceil(MicroPerEvent)]), + AllocationDiff = be_bm_utils:betree_allocations_diff(SnapshotAllocations, Allocations), + io:format(Info ++ " allocations diff:~n~p~n", [AllocationDiff]), + Stats#be_evaluator_stats{ + current_time = CurrentTime, + index = Index1, + current_allocations = Allocations, + snapshot_allocations = Allocations, + allocation_diffs = [AllocationDiff | AllocationsDiffs], + snapshot_nano_acc = 0, + nano_diffs = [SnapshotNanoAcc1 | NanoDiffs]}; + _ -> + Stats#be_evaluator_stats{ + current_time = CurrentTime, + index = Index1, + current_allocations = Allocations, + snapshot_nano_acc = SnapshotNanoAcc1} + end, + {ok, Stats1}. + +diff(FileName1, FileName2) -> + {ok, _} = term_reader:start_link(in1, FileName1), + {ok, _} = term_reader:start_link(in2, FileName2), + diff(in1, in2, 0, []). + +diff(Reader1, Reader2, Index, Acc) -> + Index1 = Index + 1, + Ret1 = term_reader:read(Reader1), + case Ret1 of + eof -> + {Index, lists:reverse(Acc)}; + {error, Reason} -> + {Index1, lists:reverse([{error, {Reader1, Reason}} | Acc])}; + {ok, Term1} -> + Ret2 = term_reader:read(Reader2), + case Ret2 of + eof -> + {Index, lists:reverse(Acc)}; + {error, Reason} -> + {Index1, lists:reverse([{error, {Reader2, Reason}} | Acc])}; + {ok, Term2} -> + Sorted1 = lists:sort(Term1), + Sorted2 = lists:sort(Term2), + case Sorted1 =:= Sorted2 of + true -> + diff(Reader1, Reader2, Index1, Acc); + _ -> + diff(Reader1, Reader2, Index1, [{Index1, Term1, Term2} | Acc]) + end + end + end. + +compare_stats(BaseFileName, TargetFileName) -> + case file:consult(BaseFileName) of + {ok, BaseStats} -> + case file:consult(TargetFileName) of + {ok, TargetStats} -> + Base_n_events = proplists:get_value(n_events, BaseStats), + Target_n_events = proplists:get_value(n_events, TargetStats), + case Base_n_events =/= Target_n_events of + true -> + {error, {{base_n_events, Base_n_events}, {target_n_events, Target_n_events}}}; + _ -> + Base_snapshot_freq = proplists:get_value(snapshot_freq, BaseStats), + Target_snapshot_freq = proplists:get_value(snapshot_freq, TargetStats), + case Base_snapshot_freq =/= Target_snapshot_freq of + true -> + {error, {{base_snapshot_freq, Base_snapshot_freq}, {target_snapshot_freq, Target_snapshot_freq}}}; + _ -> + _Base_info = proplists:get_value(info, BaseStats), + _Target_info = proplists:get_value(info, TargetStats), + Base_nano_diffs = proplists:get_value(nano_diffs, BaseStats), + Target_nano_diffs = proplists:get_value(nano_diffs, TargetStats), + Diffs = lists:zip(Base_nano_diffs, Target_nano_diffs), + AbsRelDiffs = [{Target-Base, ceil((Target-Base)*100/Base)} || {Base, Target} <- Diffs], + {AbsDiffs, RelDiffs} = lists:unzip(AbsRelDiffs), + + SortedAbsDiffs = lists:sort(AbsDiffs), + SortedRelDiffs = lists:sort(RelDiffs), + + % min / max + [AbsMinDiff | _] = SortedAbsDiffs, + [RelMinDiff | _] = SortedRelDiffs, + AbsMaxDiff = lists:last(SortedAbsDiffs), + RelMaxDiff = lists:last(SortedRelDiffs), + AbsMinDiffPerEvent = AbsMinDiff / Base_snapshot_freq, + AbsMaxDiffPerEvent = AbsMaxDiff / Base_snapshot_freq, + + N_snapshots = ceil(Base_n_events / Base_snapshot_freq), + + % median + MedianIdx = ceil(N_snapshots/2), + AbsMedian = lists:nth(MedianIdx, SortedAbsDiffs), + RelMedian = lists:nth(MedianIdx, SortedRelDiffs), + AbsMedianPerEvent = AbsMedian / Base_snapshot_freq, + + % 99th percentile + NinetyNineIdx = ceil(N_snapshots * 99 / 100), + AbsNinetyNine = lists:nth(NinetyNineIdx, SortedAbsDiffs), + RelNinetyNine = lists:nth(NinetyNineIdx, SortedRelDiffs), + AbsNinetyNinePerEvent = AbsNinetyNine / Base_snapshot_freq, + + % average + AbsAvgPerEvent = lists:sum(AbsDiffs) / Base_n_events, + RelAvg = lists:sum(RelDiffs) / N_snapshots, + + % better + RelBetter = be_bm_utils:better_percentage(SortedRelDiffs), + + [ + {min, {relative, RelMinDiff, '%'}, {value, ceil(AbsMinDiffPerEvent/1_000), microsecond}}, + {max, {relative, RelMaxDiff, '%'}, {value, ceil(AbsMaxDiffPerEvent/1_000), microsecond}}, + {avg, {relative, ceil(RelAvg), '%'}, {value, ceil(AbsAvgPerEvent/1_000), microsecond}}, + {median, {relative, RelMedian, '%'}, {value, ceil(AbsMedianPerEvent/1_000), microsecond}}, + {'99th', {relative, RelNinetyNine, '%'}, {value, ceil(AbsNinetyNinePerEvent/1_000), microsecond}}, + {better, {RelBetter, '%'}} + ] + end + end; + X -> X + end; + X -> X + end. diff --git a/benchmarks/be_bm_utils.erl b/benchmarks/be_bm_utils.erl new file mode 100644 index 0000000..1afbc4e --- /dev/null +++ b/benchmarks/be_bm_utils.erl @@ -0,0 +1,93 @@ +-module(be_bm_utils). + +%% +%% Util functions for benchmarking +%% + +%% API +-export([ + better_percentage/1, + + file_exists/1, + + betree_allocations/0, + betree_allocations_diff/2, + + diff_tuple/2, + + write_terms/2 +]). + +%% Consider a sorted list of numbers where +%% - negative numbers indicates a 'better'; +%% - positive number indicates a 'worse'. +%% better_percentage finds how many 'betters' are in the list +%% expressed as percentage. +better_percentage([]) -> + not_applicable; +better_percentage([Min | _] = NsSorted) -> + case Min >= 0 of + true -> 0; + _ -> + Max = lists:last(NsSorted), + case Max =< 0 of + true -> 100; + _ -> + Enumed = lists:enumerate(NsSorted), + case lists:search(fun ({_N, Rel}) -> Rel >= 0 end, Enumed) of + false -> unknown; + {value, {N0, _Rel0}} -> + Total = length(NsSorted), + floor((N0/Total) * 100) + end + end + end. + +file_exists(FileName) -> + case file:open(FileName, [read]) of + {ok, Handle} -> + file:close(Handle), + true; + _ -> false + end. + +betree_allocations() -> + case instrument:allocations() of + {error, _Reason} = Err -> Err; + {ok, {_HistogramStart, _UnscannedSize, Allocations}} -> + case maps:find(erl_betree_nif, Allocations) of + error -> {error, betree_allocations_not_found}; + {ok, BetreeAllocations} -> BetreeAllocations + end + end. + +betree_allocations_diff(Alloc1, Alloc2) + when is_map(Alloc1) andalso is_map(Alloc2) -> + Binary1 = maps:get(binary, Alloc1, error), + Binary2 = maps:get(binary, Alloc2, error), + BinaryDiff = diff_tuple(Binary1, Binary2), + NifInternal1 = maps:get(nif_internal, Alloc1, error), + NifInternal2 = maps:get(nif_internal, Alloc2, error), + NifInternalDiff = diff_tuple(NifInternal1, NifInternal2), + {BinaryDiff, NifInternalDiff}; +betree_allocations_diff(_, _) -> + {[], []}. + +diff_tuple(error, error) -> + []; +diff_tuple(error, T2) -> + diff_tuple({}, T2); +diff_tuple(T1, error) -> + diff_tuple(T1, {}); +diff_tuple(T1, T2) -> + L1 = tuple_to_list(T1), + L2 = tuple_to_list(T2), + L = lists:zip(L1, L2, {pad, {0, 0}}), + Diff = [X2 - X1 || {X1, X2} <- L], + Diff. + +write_terms(undefined, _Ts) -> + ok; +write_terms(File, Ts) -> + Lines = [io_lib:format("~tp.~n", [T]) || T <- Ts], + file:write_file(File, Lines). diff --git a/benchmarks/be_eval.erl b/benchmarks/be_eval.erl new file mode 100644 index 0000000..3aaa4ab --- /dev/null +++ b/benchmarks/be_eval.erl @@ -0,0 +1,226 @@ +-module(be_eval). + +-behaviour(gen_server). + +-include("be_eval.hrl"). + +%% API +-export([ + start_link/1, + stop/1, + run/6, + run/8 +]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(SERVER, ?MODULE). + +-record(be_eval_state, { + id +}). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc Spawns the server and registers the local name (unique) +-spec(start_link(Id :: atom()) -> + {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). +start_link(Id) -> + gen_server:start_link({local, Id}, ?MODULE, [Id], []). + +%% @doc Stops the server +stop(ServerRef) -> + gen_server:stop(ServerRef). + +%% @doc Loads a BE and evaluates events +run(ServerRef, Info, BetreeFile, EventsFile, EventEvalFunc, StatsFunc) -> + run(ServerRef, Info, BetreeFile, EventsFile, EventEvalFunc, _EventEvalOutputFile = undefined, + StatsFunc, _StatsOutputFile = undefined). + +run(ServerRef, Info, BetreeFile, EventsFile, EventEvalFunc, EventEvalOutputFile, StatsFunc, StatsOutputFile) -> + gen_server:call(ServerRef, + {run, Info, BetreeFile, EventsFile, EventEvalFunc, EventEvalOutputFile, StatsFunc, StatsOutputFile}, + infinity). + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +%% @private +%% @doc Initializes the server +-spec(init(Args :: term()) -> + {ok, State :: #be_eval_state{}} | {ok, State :: #be_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term()} | ignore). +init([Id]) -> + {ok, #be_eval_state{id = Id}}. + +%% @private +%% @doc Handling call messages +-spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, + State :: #be_eval_state{}) -> + {reply, Reply :: term(), NewState :: #be_eval_state{}} | + {reply, Reply :: term(), NewState :: #be_eval_state{}, timeout() | hibernate} | + {noreply, NewState :: #be_eval_state{}} | + {noreply, NewState :: #be_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term(), Reply :: term(), NewState :: #be_eval_state{}} | + {stop, Reason :: term(), NewState :: #be_eval_state{}}). +handle_call({run, Info, BetreeFile, EventsFile, EventEvalFunc, EventEvalOutputFile, StatsFunc, StatsOutputFile}, + _From, State = #be_eval_state{id = Id}) -> + case be_bm_utils:file_exists(EventsFile) of + false -> + {reply, {error, {file_does_not_exist, EventsFile}}, State}; + true -> + StrId = atom_to_list(Id), + LoaderId = list_to_atom(StrId ++ "_loader"), + {ok, _} = be_loader:start_link(LoaderId), + io:format("Loading BE-Tree...~n"), + case be_loader:load(LoaderId, BetreeFile) of + {ok, {EvalId, StatsId} = _LoaderRet} -> + be_loader:stop(LoaderId), + ReaderId = list_to_atom(StrId ++ "_event_reader"), + case term_reader:start_link(ReaderId, EventsFile) of + {error, _Reason} = Err -> + {reply, Err, State}; + {ok, _ReaderState} -> + ok = term_eval:update_context(EvalId, EventEvalFunc), + EventEvalOutputId = case EventEvalOutputFile of + undefined -> undefined; + _ -> list_to_atom(StrId ++ "_event_writer") + end, + {ok, _WriterState} = term_writer:start_link(EventEvalOutputId, EventEvalOutputFile), + + StartTime = calendar:universal_time_to_local_time(erlang:universaltime()), + Index = 0, + SnapshotFreq = 1000, + Allocations = be_bm_utils:betree_allocations(), + AllocationDiffs = [], + SnapshotNanoAcc = 0, + NanoDiffs = [], + EvaluatorStats = #be_evaluator_stats{ + start_time = StartTime, + info = Info, + index = Index, + snapshot_freq = SnapshotFreq, + initial_allocations = Allocations, + current_allocations = Allocations, + snapshot_allocations = Allocations, + allocation_diffs = AllocationDiffs, + snapshot_nano_acc = SnapshotNanoAcc, + nano_diffs = NanoDiffs}, + + ok = term_eval:update_context(StatsId, StatsFunc, EvaluatorStats), + + io:format("Processing events...~n"), + case read_eval_loop(ReaderId, EvalId, EventEvalOutputId, StatsId) of + {error, _Reason1} = Err1 -> + term_writer:stop(EventEvalOutputId), + term_eval:stop(StatsId), + term_eval:stop(EvalId), + term_reader:stop(ReaderId), + {reply, Err1, State}; + + ok -> + StatsContext = term_eval:get_context(StatsId), + Stats = report_stats(StatsContext), + be_bm_utils:write_terms(StatsOutputFile, Stats), + term_writer:stop(EventEvalOutputId), + term_eval:stop(StatsId), + term_eval:stop(EvalId), + term_reader:stop(ReaderId), + {reply, ok, State} + end + end; + Err -> + be_loader:stop(LoaderId), + {reply, Err, State} + end + end; + +handle_call(_Request, _From, State = #be_eval_state{}) -> + {reply, ok, State}. + +%% @private +%% @doc Handling cast messages +-spec(handle_cast(Request :: term(), State :: #be_eval_state{}) -> + {noreply, NewState :: #be_eval_state{}} | + {noreply, NewState :: #be_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #be_eval_state{}}). +handle_cast(_Request, State = #be_eval_state{}) -> + {noreply, State}. + +%% @private +%% @doc Handling all non call/cast messages +-spec(handle_info(Info :: timeout() | term(), State :: #be_eval_state{}) -> + {noreply, NewState :: #be_eval_state{}} | + {noreply, NewState :: #be_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #be_eval_state{}}). +handle_info(_Info, State = #be_eval_state{}) -> + {noreply, State}. + +%% @private +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any +%% necessary cleaning up. When it returns, the gen_server terminates +%% with Reason. The return value is ignored. +-spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: #be_eval_state{}) -> term()). +terminate(_Reason, _State = #be_eval_state{}) -> + ok. + +%% @private +%% @doc Convert process state when code is changed +-spec(code_change(OldVsn :: term() | {down, term()}, State :: #be_eval_state{}, + Extra :: term()) -> + {ok, NewState :: #be_eval_state{}} | {error, Reason :: term()}). +code_change(_OldVsn, State = #be_eval_state{}, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +read_eval_loop(ReaderId, EvalId, EventEvalOutputId, StatsId) -> + case term_reader:read(ReaderId) of % read a term + eof -> ok; + {error, _Reason} = Err -> Err; + + {ok, Term} -> + case term_eval:eval(EvalId, Term) of % evaluate the term + {error, _Reason} = Err -> Err; + + % an evaluated term and stats are produced + {ok, {EvaluatedTerm, TermEvaluationStats}} -> + ok = term_writer:write(EventEvalOutputId, EvaluatedTerm), % write the evaluated term + term_eval:eval(StatsId, TermEvaluationStats), % process the stats + read_eval_loop(ReaderId, EvalId, EventEvalOutputId, StatsId) % repeat + end + end. + +report_stats(#be_evaluator_stats{ + start_time = StartTime, current_time = CurrentTime, + info = Info, + index = Index, snapshot_freq = SnapshotFreq, + initial_allocations = InitialAllocations, + current_allocations = CurrentAllocations, + allocation_diffs = AllocationDifs, + nano_diffs = NanoDiffs +}) -> + StartSec = calendar:datetime_to_gregorian_seconds(StartTime), + CurrentSec = calendar:datetime_to_gregorian_seconds(CurrentTime), + {_Days, Duration} = calendar:seconds_to_daystime(CurrentSec - StartSec), + [ + {start_local_time, StartTime}, {end_local_time, CurrentTime}, + {duration, Duration}, + {info, Info}, + {n_events, Index}, {snapshot_freq, SnapshotFreq}, + {initial_allocations, InitialAllocations}, + {current_allocations, CurrentAllocations}, + {nano_diffs, lists:reverse(NanoDiffs)}, + {allocation_diffs, lists:reverse(AllocationDifs)} + ]; +report_stats(X) -> + [{error, {not_a_betree_event_processor_state, X}}]. diff --git a/benchmarks/be_eval.hrl b/benchmarks/be_eval.hrl new file mode 100644 index 0000000..99fad35 --- /dev/null +++ b/benchmarks/be_eval.hrl @@ -0,0 +1,35 @@ + +-record(be_evaluator, { + betree, + consts, + index +}). + +-record(be_loader_stats, { + start_time, + current_time, + index, + snapshot_freq, + initial_allocations, + current_allocations, + snapshot_allocations, + allocation_diffs, + initial_nano, + current_nano, + snapshot_nano, + nano_diffs +}). + +-record(be_evaluator_stats, { + start_time, + current_time, + info, + index, + snapshot_freq, + initial_allocations, + current_allocations, + snapshot_allocations, + allocation_diffs, + snapshot_nano_acc, + nano_diffs +}). diff --git a/benchmarks/be_loader.erl b/benchmarks/be_loader.erl new file mode 100644 index 0000000..2908984 --- /dev/null +++ b/benchmarks/be_loader.erl @@ -0,0 +1,254 @@ +-module(be_loader). + +-behaviour(gen_server). + +%% +%% Loads Boolean Expressions into BE-Tree +%% + +-include("be_eval.hrl"). + +%% API +-export([ + start_link/1, + stop/1, + load/2 +]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(SERVER, ?MODULE). + +-record(be_loader_state, { + id :: atom() +}). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc Spawns the server and registers the local name (unique) +-spec(start_link(Id :: atom()) -> + {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). +start_link(Id) -> + gen_server:start_link({local, Id}, ?MODULE, [Id], []). + +%% @doc Stops the server +stop(ServerRef) -> + gen_server:stop(ServerRef). + +%% @doc Loads Boolean Expressions from file +load(ServerRef, FileName) -> + gen_server:call(ServerRef, {load, FileName}, infinity). + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +%% @private +%% @doc Initializes the server +-spec(init(Args :: term()) -> + {ok, State :: #be_loader_state{}} | {ok, State :: #be_loader_state{}, timeout() | hibernate} | + {stop, Reason :: term()} | ignore). +init([Id]) -> + {ok, #be_loader_state{id = Id}}. + +%% @private +%% @doc Handling call messages +-spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, + State :: #be_loader_state{}) -> + {reply, Reply :: term(), NewState :: #be_loader_state{}} | + {reply, Reply :: term(), NewState :: #be_loader_state{}, timeout() | hibernate} | + {noreply, NewState :: #be_loader_state{}} | + {noreply, NewState :: #be_loader_state{}, timeout() | hibernate} | + {stop, Reason :: term(), Reply :: term(), NewState :: #be_loader_state{}} | + {stop, Reason :: term(), NewState :: #be_loader_state{}}). +handle_call({load, FileName}, _From, State = #be_loader_state{id = Id}) -> + case be_bm_utils:file_exists(FileName) of + false -> + {reply, {error, {file_does_not_exist, FileName}}, State}; + true -> + StrId = atom_to_list(Id), + ReaderId = list_to_atom(StrId ++ "_loader_reader"), + case term_reader:start_link(ReaderId, FileName) of + {error, _Reason} = Err -> + {reply, Err, State}; + {ok, _ReaderState} -> + case term_reader:read(ReaderId) of + eof -> + term_reader:stop(ReaderId), + {reply, {error, be_tree_parameters_not_provided}, State}; + {error, _Reason} = Err -> + term_reader:stop(ReaderId), + {reply, Err, State}; + {ok, Term} -> + Domains = [Term], + case erl_betree:betree_make(Domains) of + {ok, Betree} -> + EvalId = list_to_atom(StrId ++ "_evaluator"), + Consts = [], + Index = 0, + BeEvaluator = #be_evaluator{ + betree = Betree, + consts = Consts, + index = Index + }, + {ok, _StateEval} = term_eval:start_link(EvalId, fun add_expr/2, BeEvaluator), + + StatsId = list_to_atom(StrId ++ "_stats"), + StartTime = calendar:universal_time_to_local_time(erlang:universaltime()), + SnapshotFreq = 100, + Allocations = be_bm_utils:betree_allocations(), + AllocationDiffs = [], + NanoDiffs = [], + Nano = erlang:monotonic_time(nanosecond), + LoaderStats = #be_loader_stats{ + start_time = StartTime, + index = Index, + snapshot_freq = SnapshotFreq, + initial_allocations = Allocations, + current_allocations = Allocations, + snapshot_allocations = Allocations, + allocation_diffs = AllocationDiffs, + initial_nano = Nano, + current_nano = Nano, + snapshot_nano = Nano, + nano_diffs = NanoDiffs}, + {ok, _StateStats} = term_eval:start_link(StatsId, fun add_stats/2, LoaderStats), + + case read_eval_stats_loop(ReaderId, EvalId, StatsId) of + {error, _Reason} = Err -> + term_eval:stop(StatsId), + term_eval:stop(EvalId), + term_reader:stop(ReaderId), + {reply, Err, State}; + + ok -> + term_reader:stop(ReaderId), + {reply, {ok, {EvalId, StatsId}}, State} + end; + Err -> + term_reader:stop(ReaderId), + {reply, Err, State} + end + end + end + end; + +handle_call(_Request, _From, State = #be_loader_state{}) -> + {reply, ok, State}. + +%% @private +%% @doc Handling cast messages +-spec(handle_cast(Request :: term(), State :: #be_loader_state{}) -> + {noreply, NewState :: #be_loader_state{}} | + {noreply, NewState :: #be_loader_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #be_loader_state{}}). +handle_cast(_Request, State = #be_loader_state{}) -> + {noreply, State}. + +%% @private +%% @doc Handling all non call/cast messages +-spec(handle_info(Info :: timeout() | term(), State :: #be_loader_state{}) -> + {noreply, NewState :: #be_loader_state{}} | + {noreply, NewState :: #be_loader_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #be_loader_state{}}). +handle_info(_Info, State = #be_loader_state{}) -> + {noreply, State}. + +%% @private +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any +%% necessary cleaning up. When it returns, the gen_server terminates +%% with Reason. The return value is ignored. +-spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: #be_loader_state{}) -> term()). +terminate(_Reason, _State = #be_loader_state{}) -> + ok. + +%% @private +%% @doc Convert process state when code is changed +-spec(code_change(OldVsn :: term() | {down, term()}, State :: #be_loader_state{}, + Extra :: term()) -> + {ok, NewState :: #be_loader_state{}} | {error, Reason :: term()}). +code_change(_OldVsn, State = #be_loader_state{}, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== + +add_expr(Expr, #be_evaluator{ + betree = Betree, + consts = Consts, + index = Index +} = State) -> + Index1 = Index + 1, + case erl_betree:betree_make_sub(Betree, Index1, Consts, Expr) of + {ok, Sub} -> + case erl_betree:betree_insert_sub(Betree, Sub) of + ok -> + {ok, State#be_evaluator{index = Index1}}; + X -> + {{error, {betree_insert_sub, X}}, State} + end; + X -> + {{error, {betree_make_sub, X}}, State} + end. + +add_stats(_Stats, #be_loader_stats{ + index = Index, + snapshot_freq = SnapshotFreq, + snapshot_allocations = SnapshotAllocations, + allocation_diffs = AllocationsDiffs, + snapshot_nano = SnapshotNano, + nano_diffs = NanoDiffs +} = Stats) -> + Index1 = Index + 1, + CurrentTime = calendar:universal_time_to_local_time(erlang:universaltime()), + CurrentAllocations = be_bm_utils:betree_allocations(), + CurrentNano = erlang:monotonic_time(nanosecond), + Stats1 = case Index1 rem SnapshotFreq of + 0 -> + NanoDiff = CurrentNano - SnapshotNano, + NanoPerIndex = NanoDiff / SnapshotFreq, + MilliPerIndex = ceil(NanoPerIndex / 1_000_000), + io:format("BE-Tree indexes: ~p, ~p milliseconds/index~n", [Index1, MilliPerIndex]), + AllocationDiff = be_bm_utils:betree_allocations_diff(SnapshotAllocations, CurrentAllocations), + Stats#be_loader_stats{ + current_time = CurrentTime, + index = Index1, + current_allocations = CurrentAllocations, + snapshot_allocations = CurrentAllocations, + allocation_diffs = [AllocationDiff | AllocationsDiffs], + current_nano = CurrentNano, + snapshot_nano = CurrentNano, + nano_diffs = [NanoDiff | NanoDiffs]}; + _ -> + Stats#be_loader_stats{ + current_time = CurrentTime, + index = Index1, + current_allocations = CurrentAllocations, + current_nano = CurrentNano} + end, + {ok, Stats1}. + +read_eval_stats_loop(ReaderId, EvalId, StatsId) -> + case term_reader:read(ReaderId) of + eof -> ok; + {error, _Reason} = Err -> Err; + + {ok, Term} -> + case term_eval:eval(EvalId, Term) of + {{error, _Reason} = Err, _EvaluatedState} -> + Err; + + ok -> + Allocations = be_bm_utils:betree_allocations(), + Nano = erlang:monotonic_time(nanosecond), + term_eval:eval(StatsId, {Allocations, Nano}), + read_eval_stats_loop(ReaderId, EvalId, StatsId) + end + end. diff --git a/benchmarks/make.erl b/benchmarks/make.erl new file mode 100644 index 0000000..c73c985 --- /dev/null +++ b/benchmarks/make.erl @@ -0,0 +1,32 @@ +-module(make). + +%% +%% Compile list of Erlang source files +%% + +%% API +-export([ + all/0 +]). + +all() -> + Files = [ + "benchmarks/term_reader.erl", + "benchmarks/term_writer.erl", + "benchmarks/term_eval.erl", + "benchmarks/be_loader.erl", + "benchmarks/be_eval.erl", + "benchmarks/be_bm_utils.erl", + "benchmarks/be_bm.erl" + ], + compile(Files, []). + +compile([], Acc) -> + lists:reverse(Acc); +compile([File | Rest], Acc) -> + case compile:file(File) of + {ok, Atom} -> + compile(Rest, [Atom | Acc]); + Err -> + lists:reverse([Err | Acc]) + end. diff --git a/benchmarks/term_eval.erl b/benchmarks/term_eval.erl new file mode 100644 index 0000000..e74ff07 --- /dev/null +++ b/benchmarks/term_eval.erl @@ -0,0 +1,142 @@ +-module(term_eval). + +-behaviour(gen_server). + +%% +%% Term evaluator within a given context. +%% + +%% API +-export([ + start_link/3, + stop/1, + eval/2, + eval/3, + get_context/1, + update_context/2, + update_context/3 +]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(SERVER, ?MODULE). + +-record(term_eval_state, { + % `eval` is a `Func(Term, Context) -> {Evaluated, NewContext}` + eval, + % `context` is a second parameter in `Func(Term, Context)` + context +}). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc Spawns the server and registers the local name (unique) +-spec(start_link(Id :: atom(), Func :: fun(), Context :: term()) -> + {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). +start_link(Id, Func, Context) -> + gen_server:start_link({local, Id}, ?MODULE, [Func, Context], []). + +%% @doc Stops the server +stop(ServerRef) -> + gen_server:stop(ServerRef). + +%% @doc Evaluates a term +eval(ServerRef, Term) -> + eval(ServerRef, Term, infinity). + +eval(ServerRef, Term, Timeout) -> + gen_server:call(ServerRef, {eval, Term}, Timeout). + +get_context(ServerRef) -> + gen_server:call(ServerRef, get_context). + +update_context(ServerRef, Func) -> + gen_server:call(ServerRef, {update_context, Func}). + +update_context(ServerRef, Func, Context) -> + gen_server:call(ServerRef, {update_context, Func, Context}). + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +%% @private +%% @doc Initializes the server +-spec(init(Args :: term()) -> + {ok, State :: #term_eval_state{}} | {ok, State :: #term_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term()} | ignore). +init([Func, Context]) -> + {ok, #term_eval_state{eval = Func, context = Context}}. + +%% @private +%% @doc Handling call messages +-spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, + State :: #term_eval_state{}) -> + {reply, Reply :: term(), NewState :: #term_eval_state{}} | + {reply, Reply :: term(), NewState :: #term_eval_state{}, timeout() | hibernate} | + {noreply, NewState :: #term_eval_state{}} | + {noreply, NewState :: #term_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term(), Reply :: term(), NewState :: #term_eval_state{}} | + {stop, Reason :: term(), NewState :: #term_eval_state{}}). +handle_call({eval, Term}, _From, State = #term_eval_state{eval = Eval, context = Context}) -> + {Evaluated, Context1} = Eval(Term, Context), + {reply, Evaluated, State#term_eval_state{context = Context1}}; + +handle_call(get_context, _From, State = #term_eval_state{context = Context}) -> + {reply, Context, State}; + +handle_call({update_context, Func}, _From, State = #term_eval_state{}) -> + {reply, ok, State#term_eval_state{eval = Func}}; + +handle_call({update_context, Func, Context}, _From, State = #term_eval_state{}) -> + {reply, ok, State#term_eval_state{eval = Func, context = Context}}; + +handle_call(_Request, _From, State = #term_eval_state{}) -> + {reply, ok, State}. + +%% @private +%% @doc Handling cast messages +-spec(handle_cast(Request :: term(), State :: #term_eval_state{}) -> + {noreply, NewState :: #term_eval_state{}} | + {noreply, NewState :: #term_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #term_eval_state{}}). +handle_cast(_Request, State = #term_eval_state{}) -> + {noreply, State}. + +%% @private +%% @doc Handling all non call/cast messages +-spec(handle_info(Info :: timeout() | term(), State :: #term_eval_state{}) -> + {noreply, NewState :: #term_eval_state{}} | + {noreply, NewState :: #term_eval_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #term_eval_state{}}). +handle_info(_Info, State = #term_eval_state{}) -> + {noreply, State}. + +%% @private +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any +%% necessary cleaning up. When it returns, the gen_server terminates +%% with Reason. The return value is ignored. +-spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: #term_eval_state{}) -> term()). +terminate(_Reason, _State = #term_eval_state{context = _Context}) -> + % Do nothing with `Context` because `Context` came from a caller. + % So, it is the caller responsibility + % to handle the `Context` when the evaluation has completed. + ok. + +%% @private +%% @doc Convert process state when code is changed +-spec(code_change(OldVsn :: term() | {down, term()}, State :: #term_eval_state{}, + Extra :: term()) -> + {ok, NewState :: #term_eval_state{}} | {error, Reason :: term()}). +code_change(_OldVsn, State = #term_eval_state{}, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== diff --git a/benchmarks/term_reader.erl b/benchmarks/term_reader.erl new file mode 100644 index 0000000..f8b2041 --- /dev/null +++ b/benchmarks/term_reader.erl @@ -0,0 +1,132 @@ +-module(term_reader). + +-behaviour(gen_server). + +%% +%% Term reader from file. +%% Reads terms from file one by one unlike `file:consult/1`. +%% + +%% API +-export([ + start_link/2, + stop/1, + read/1 +]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(SERVER, ?MODULE). + +-record(term_reader_state, { + name, + handle, + count, + eof, + error +}). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc Spawns the server and registers the local name (unique) +-spec(start_link(Id :: atom(), FileName :: string()) -> + {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). +start_link(Id, FileName) -> + gen_server:start_link({local, Id}, ?MODULE, [FileName], []). + +%% @doc Stops the server +stop(ServerRef) -> + gen_server:stop(ServerRef). + +%% @doc Reads a term +read(ServerRef) -> + gen_server:call(ServerRef, read). + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +%% @private +%% @doc Initializes the server +-spec(init(Args :: term()) -> + {ok, State :: #term_reader_state{}} | {ok, State :: #term_reader_state{}, timeout() | hibernate} | + {stop, Reason :: term()} | ignore). +init([FileName]) -> + case file:open(FileName, [read]) of + {error, Reason} -> + {stop, {error, {Reason, FileName}}}; + {ok, FileHandle} -> + {ok, #term_reader_state{ + name = FileName, + handle = FileHandle, + count = 0, + eof = false}} + end. + +%% @private +%% @doc Handling call messages +-spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, + State :: #term_reader_state{}) -> + {reply, Reply :: term(), NewState :: #term_reader_state{}} | + {reply, Reply :: term(), NewState :: #term_reader_state{}, timeout() | hibernate} | + {noreply, NewState :: #term_reader_state{}} | + {noreply, NewState :: #term_reader_state{}, timeout() | hibernate} | + {stop, Reason :: term(), Reply :: term(), NewState :: #term_reader_state{}} | + {stop, Reason :: term(), NewState :: #term_reader_state{}}). +handle_call(read, _From, State = #term_reader_state{handle = Handle, count = Count}) -> + case io:read(Handle, '') of + eof -> + {reply, eof, State#term_reader_state{eof = true}}; + {error, Reason} = Err -> + {reply, Err, State#term_reader_state{error = Reason}}; + {ok, _Term} = Reply -> + {reply, Reply, State#term_reader_state{count = Count+1}} + end; + +handle_call(_Request, _From, State = #term_reader_state{}) -> + {reply, ok, State}. + +%% @private +%% @doc Handling cast messages +-spec(handle_cast(Request :: term(), State :: #term_reader_state{}) -> + {noreply, NewState :: #term_reader_state{}} | + {noreply, NewState :: #term_reader_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #term_reader_state{}}). +handle_cast(_Request, State = #term_reader_state{}) -> + {noreply, State}. + +%% @private +%% @doc Handling all non call/cast messages +-spec(handle_info(Info :: timeout() | term(), State :: #term_reader_state{}) -> + {noreply, NewState :: #term_reader_state{}} | + {noreply, NewState :: #term_reader_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #term_reader_state{}}). +handle_info(_Info, State = #term_reader_state{}) -> + {noreply, State}. + +%% @private +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any +%% necessary cleaning up. When it returns, the gen_server terminates +%% with Reason. The return value is ignored. +-spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: #term_reader_state{}) -> term()). +terminate(_Reason, _State = #term_reader_state{handle = Handle}) -> + file:close(Handle), + ok. + +%% @private +%% @doc Convert process state when code is changed +-spec(code_change(OldVsn :: term() | {down, term()}, State :: #term_reader_state{}, + Extra :: term()) -> + {ok, NewState :: #term_reader_state{}} | {error, Reason :: term()}). +code_change(_OldVsn, State = #term_reader_state{}, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== diff --git a/benchmarks/term_writer.erl b/benchmarks/term_writer.erl new file mode 100644 index 0000000..c1c062d --- /dev/null +++ b/benchmarks/term_writer.erl @@ -0,0 +1,124 @@ +-module(term_writer). + +-behaviour(gen_server). + +%% +%% Term writer to file. +%% Append terms to file one by one. +%% + +%% API +-export([ + start_link/2, + stop/1, + write/2 +]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, + code_change/3]). + +-define(SERVER, ?MODULE). + +-record(term_writer_state, { + name, + count +}). + +%%%=================================================================== +%%% API +%%%=================================================================== + +%% @doc Spawns the server and registers the local name (unique) +-spec(start_link(Id :: atom(), FileName :: string()) -> + {ok, Pid :: pid()} | ignore | {error, Reason :: term()}). +start_link(undefined, FileName) -> + {ok, #term_writer_state{name = FileName, count = 0}}; +start_link(Id, FileName) -> + gen_server:start_link({local, Id}, ?MODULE, [FileName], []). + +%% @doc Stops the server +stop(undefined) -> + ok; +stop(ServerRef) -> + gen_server:stop(ServerRef). + +%% @doc Writes a term +write(undefined, _Term) -> + ok; +write(ServerRef, Term) -> + gen_server:call(ServerRef, {write, Term}). + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +%% @private +%% @doc Initializes the server +-spec(init(Args :: term()) -> + {ok, State :: #term_writer_state{}} | {ok, State :: #term_writer_state{}, timeout() | hibernate} | + {stop, Reason :: term()} | ignore). +init([FileName]) -> + {ok, #term_writer_state{ + name = FileName, + count = 0}}. + +%% @private +%% @doc Handling call messages +-spec(handle_call(Request :: term(), From :: {pid(), Tag :: term()}, + State :: #term_writer_state{}) -> + {reply, Reply :: term(), NewState :: #term_writer_state{}} | + {reply, Reply :: term(), NewState :: #term_writer_state{}, timeout() | hibernate} | + {noreply, NewState :: #term_writer_state{}} | + {noreply, NewState :: #term_writer_state{}, timeout() | hibernate} | + {stop, Reason :: term(), Reply :: term(), NewState :: #term_writer_state{}} | + {stop, Reason :: term(), NewState :: #term_writer_state{}}). +handle_call(_Request, _From, State = #term_writer_state{name = undefined, count = Count}) -> + {reply, ok, State#term_writer_state{count = Count + 1}}; +handle_call({write, Term}, _From, State = #term_writer_state{name = FileName, count = Count}) -> + Line = [io_lib:format("~tp.~n", [Term])], + file:write_file(FileName, Line, [append]), + {reply, ok, State#term_writer_state{count = Count + 1}}; + +handle_call(_Request, _From, State = #term_writer_state{}) -> + {reply, ok, State}. + +%% @private +%% @doc Handling cast messages +-spec(handle_cast(Request :: term(), State :: #term_writer_state{}) -> + {noreply, NewState :: #term_writer_state{}} | + {noreply, NewState :: #term_writer_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #term_writer_state{}}). +handle_cast(_Request, State = #term_writer_state{}) -> + {noreply, State}. + +%% @private +%% @doc Handling all non call/cast messages +-spec(handle_info(Info :: timeout() | term(), State :: #term_writer_state{}) -> + {noreply, NewState :: #term_writer_state{}} | + {noreply, NewState :: #term_writer_state{}, timeout() | hibernate} | + {stop, Reason :: term(), NewState :: #term_writer_state{}}). +handle_info(_Info, State = #term_writer_state{}) -> + {noreply, State}. + +%% @private +%% @doc This function is called by a gen_server when it is about to +%% terminate. It should be the opposite of Module:init/1 and do any +%% necessary cleaning up. When it returns, the gen_server terminates +%% with Reason. The return value is ignored. +-spec(terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: #term_writer_state{}) -> term()). +terminate(_Reason, _State = #term_writer_state{}) -> + ok. + +%% @private +%% @doc Convert process state when code is changed +-spec(code_change(OldVsn :: term() | {down, term()}, State :: #term_writer_state{}, + Extra :: term()) -> + {ok, NewState :: #term_writer_state{}} | {error, Reason :: term()}). +code_change(_OldVsn, State = #term_writer_state{}, _Extra) -> + {ok, State}. + +%%%=================================================================== +%%% Internal functions +%%%=================================================================== diff --git a/c_src/betree.c b/c_src/betree.c index 1fd4d34..0545615 100644 --- a/c_src/betree.c +++ b/c_src/betree.c @@ -5,10 +5,17 @@ #include "erl_nif.h" +// IMPORTANT! `NIF true` has to be defined before the use of any `be-tree` library headers. +// `NIF` controls memory allocation/de-allocation functions in `alloc.h`. +// The usage of different sets of allocation/de-allocation functions +// in the `be-tree` library and in the `erl-be-tree` will lead to memory leaks and/or crashes. +#define NIF true + #include "betree.h" // return values static ERL_NIF_TERM atom_ok; +static ERL_NIF_TERM atom_continue; static ERL_NIF_TERM atom_error; static ERL_NIF_TERM atom_bad_expr; static ERL_NIF_TERM atom_bad_event; @@ -44,11 +51,86 @@ static ERL_NIF_TERM atom_unknown; static ErlNifResourceType* MEM_BETREE; static ErlNifResourceType* MEM_SUB; +static ErlNifResourceType* MEM_SEARCH_ITERATOR; struct sub { const struct betree_sub* sub; }; +#include "alloc.h" +#include "hashmap.h" +#include "tree.h" + +struct search_iterator { + size_t attr_domain_count; + struct betree_event* event; + const struct betree_variable** variables; + size_t undefined_count; + uint64_t* undefined; + size_t memoize_count; + struct memoize memoize; + struct subs_to_eval subs; + int node_count; + struct report_counting* report; + size_t index; +}; + +static void search_iterator_init(struct search_iterator* search_iterator) +{ + search_iterator->attr_domain_count = 0; + search_iterator->event = NULL; + search_iterator->variables = NULL; + search_iterator->undefined_count = 0; + search_iterator->undefined = NULL; + search_iterator->memoize_count = 0; + search_iterator->memoize.pass = NULL; + search_iterator->memoize.fail = NULL; + search_iterator->subs.subs = NULL; + search_iterator->subs.capacity = 0; + search_iterator->subs.count = 0; + search_iterator->report = NULL; + search_iterator->node_count = 0; + search_iterator->index = 0; +} + +static void search_iterator_deinit(struct search_iterator* search_iterator) +{ + if (search_iterator->event != NULL) { + betree_free_event(search_iterator->event); + search_iterator->event = NULL; + } + if (search_iterator->variables != NULL) { + search_iterator->attr_domain_count = 0; + bfree(search_iterator->variables); + search_iterator->variables = NULL; + } + if (search_iterator->undefined != NULL) { + search_iterator->undefined_count = 0; + bfree(search_iterator->undefined); + search_iterator->undefined = NULL; + } + search_iterator->memoize_count = 0; + if (search_iterator->memoize.pass != NULL) { + bfree(search_iterator->memoize.pass); + search_iterator->memoize.pass = NULL; + } + if (search_iterator->memoize.fail != NULL) { + bfree(search_iterator->memoize.fail); + search_iterator->memoize.fail = NULL; + } + if (search_iterator->subs.subs != NULL) { + search_iterator->subs.capacity = 0; + search_iterator->subs.count = 0; + bfree(search_iterator->subs.subs); + search_iterator->subs.subs = NULL; + } + if (search_iterator->report != NULL) { + free_report_counting(search_iterator->report); + search_iterator->report = NULL; + } + search_iterator->node_count = 0; +} + static ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name) { ERL_NIF_TERM ret; @@ -67,6 +149,12 @@ static void cleanup_betree(ErlNifEnv* env, void* obj) betree_deinit(betree); } +static void cleanup_search_iterator(ErlNifEnv* env, void* obj) +{ + (void)env; + struct search_iterator* search_iterator = obj; + search_iterator_deinit(search_iterator); +} static int load(ErlNifEnv* env, void **priv_data, ERL_NIF_TERM load_info) { @@ -74,6 +162,7 @@ static int load(ErlNifEnv* env, void **priv_data, ERL_NIF_TERM load_info) (void)load_info; atom_ok = make_atom(env, "ok"); + atom_continue = make_atom(env, "continue"); atom_error = make_atom(env, "error"); atom_bad_expr = make_atom(env, "bad_expr"); atom_bad_event = make_atom(env, "bad_event"); @@ -113,6 +202,10 @@ static int load(ErlNifEnv* env, void **priv_data, ERL_NIF_TERM load_info) if(MEM_SUB == NULL) { return -1; } + MEM_SEARCH_ITERATOR = enif_open_resource_type(env, NULL, "search_iterator", cleanup_search_iterator, flags, NULL); + if(MEM_SEARCH_ITERATOR == NULL) { + return -1; + } return 0; } @@ -124,6 +217,13 @@ static struct betree* get_betree(ErlNifEnv* env, const ERL_NIF_TERM term) return betree; } +static struct search_iterator* get_search_iterator(ErlNifEnv* env, const ERL_NIF_TERM term) +{ + struct search_iterator* search_iterator = NULL; + enif_get_resource(env, term, MEM_SEARCH_ITERATOR, (void*)&search_iterator); + return search_iterator; +} + static struct sub* get_sub(ErlNifEnv* env, const ERL_NIF_TERM term) { struct sub* sub = NULL; @@ -138,11 +238,8 @@ static char *alloc_string(ErlNifBinary bin) if (!key) { return NULL; } - memcpy(key, bin.data, key_len); - key[key_len] = 0; - return key; } @@ -447,17 +544,12 @@ static ERL_NIF_TERM nif_betree_insert_sub(ErlNifEnv* env, int argc, const ERL_NI static bool get_binary(ErlNifEnv* env, ERL_NIF_TERM term, const char* name, struct betree_variable** variable) { ErlNifBinary bin; - if (!enif_inspect_binary(env, term, &bin)) { return false; } - char* value = alloc_string(bin); - *variable = betree_make_string_variable(name, value); - enif_free(value); - return true; } @@ -945,6 +1037,236 @@ static ERL_NIF_TERM nif_betree_exists(ErlNifEnv* env, int argc, const ERL_NIF_TE return retval; } +static void bump_used_reductions(ErlNifEnv* env, int count) +{ + const int reduction_per_count = 1; + const int DEFAULT_ERLANG_REDUCTION_COUNT = 2000; + int reductions_used = count * reduction_per_count; + int pct_used = 100 * reductions_used / DEFAULT_ERLANG_REDUCTION_COUNT; + if(pct_used > 0) { + if(pct_used > 100) { + pct_used = 100; + } + } else { + pct_used = 1; + } + enif_consume_timeslice(env, pct_used); +} + +static ERL_NIF_TERM nif_betree_search_iterator(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + if(argc != 2) { + return enif_make_badarg(env); + } + + struct betree* betree = get_betree(env, argv[0]); + if(betree == NULL) { + return enif_make_badarg(env); + } + + unsigned int list_len; + if(!enif_get_list_length(env, argv[1], &list_len)) { + return enif_make_badarg(env); + } + + struct search_iterator* search_iterator = enif_alloc_resource(MEM_SEARCH_ITERATOR, sizeof(*search_iterator)); + search_iterator_init(search_iterator); + + ERL_NIF_TERM search_iterator_term = enif_make_resource(env, search_iterator); + enif_release_resource(search_iterator); + + search_iterator->attr_domain_count = betree->config->attr_domain_count; + search_iterator->event = betree_make_event(betree); + + ERL_NIF_TERM head; + ERL_NIF_TERM tail = argv[1]; + + size_t pred_index = 0; + const ERL_NIF_TERM* tuple; + int tuple_len; + + for(unsigned int i = 0; i < list_len; i++) { + if(!enif_get_list_cell(env, tail, &head, &tail)) { + search_iterator_deinit(search_iterator); + return enif_make_badarg(env); + } + + if(!enif_get_tuple(env, head, &tuple_len, &tuple)) { + search_iterator_deinit(search_iterator); + return enif_make_badarg(env); + } + + if(!add_variables(env, betree, search_iterator->event, tuple, tuple_len, pred_index)) { + search_iterator_deinit(search_iterator); + return enif_make_badarg(env); + } + pred_index += (tuple_len - 1); + } + + fill_event(betree->config, search_iterator->event); + sort_event_lists(search_iterator->event); + search_iterator->variables + = make_environment(betree->config->attr_domain_count, search_iterator->event); + if(validate_variables(betree->config, search_iterator->variables) == false) { + fprintf(stderr, "Failed to validate event\n"); + search_iterator_deinit(search_iterator); + return enif_make_badarg(env); + } + + search_iterator->undefined = make_undefined_with_count(betree->config->attr_domain_count, + search_iterator->variables, + &search_iterator->undefined_count); + search_iterator->memoize = make_memoize_with_count(betree->config->pred_map->memoize_count, + &search_iterator->memoize_count); + init_subs_to_eval_ext(&search_iterator->subs, 64); + match_be_tree_node_counting( + (const struct attr_domain**)betree->config->attr_domains, + search_iterator->variables, + betree->cnode, &search_iterator->subs, &search_iterator->node_count); + bump_used_reductions(env, search_iterator->node_count); + + ERL_NIF_TERM subs_count = enif_make_ulong(env, search_iterator->subs.count); + ERL_NIF_TERM node_count = enif_make_long(env, search_iterator->node_count); + ERL_NIF_TERM ret = enif_make_tuple3(env, search_iterator_term, subs_count, node_count); + + return enif_make_tuple2(env, atom_ok, ret); +} + +static ERL_NIF_TERM nif_betree_search_next(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 1) { + return enif_make_badarg(env); + } + + struct search_iterator* search_iterator = get_search_iterator(env, argv[0]); + if(search_iterator == NULL) { + return enif_make_badarg(env); + } + + if (search_iterator->report == NULL) { + search_iterator->report = make_report_counting(); + } + + if (search_iterator->index >= search_iterator->subs.count) { + ERL_NIF_TERM matched; + if (search_iterator->report->matched == 0) { + ERL_NIF_TERM tmp[1]; + matched = enif_make_list_from_array(env, tmp, 0); + } else { + ERL_NIF_TERM* arr = enif_alloc(sizeof(ERL_NIF_TERM) * search_iterator->report->matched); + for (int i = 0; i < search_iterator->report->matched; ++i) { + arr[i] = enif_make_uint64(env, search_iterator->report->subs[i]); + } + matched = enif_make_list_from_array(env, arr, search_iterator->report->matched); + enif_free(arr); + } + bump_used_reductions(env, 1); + return enif_make_tuple2(env, atom_ok, matched); + } + + const struct betree_sub* sub = search_iterator->subs.subs[search_iterator->index]; + search_iterator->report->evaluated++; + int count_before = search_iterator->report->node_count + search_iterator->report->ops_count; + bool id_matched = false; + if(match_sub_counting(search_iterator->attr_domain_count, + search_iterator->variables, + sub, + search_iterator->report, + &search_iterator->memoize, + search_iterator->undefined) == true) { + id_matched = true; + add_sub_counting(sub->id, search_iterator->report); + } + search_iterator->index++; + int count_diff = (search_iterator->report->node_count + search_iterator->report->ops_count) + - count_before; + bump_used_reductions(env, count_diff); + + if (search_iterator->index == search_iterator->subs.count) { + ERL_NIF_TERM matched; + if (search_iterator->report->matched == 0) { + ERL_NIF_TERM tmp[1]; + matched = enif_make_list_from_array(env, tmp, 0); + } else { + ERL_NIF_TERM* arr = enif_alloc(sizeof(ERL_NIF_TERM) * search_iterator->report->matched); + for (int i = 0; i < search_iterator->report->matched; ++i) { + arr[i] = enif_make_uint64(env, search_iterator->report->subs[i]); + } + matched = enif_make_list_from_array(env, arr, search_iterator->report->matched); + enif_free(arr); + } + return enif_make_tuple2(env, atom_ok, matched); + } + + ERL_NIF_TERM id[1]; + ERL_NIF_TERM matched; + if (id_matched) { + id[0] = enif_make_uint64(env, sub->id); + matched = enif_make_list_from_array(env, id, 1); + return enif_make_tuple2(env, atom_continue, matched); + } + matched = enif_make_list_from_array(env, id, 0); + return enif_make_tuple2(env, atom_continue, matched); +} + +static ERL_NIF_TERM nif_betree_search_all(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + if (argc != 1) { + return enif_make_badarg(env); + } + + struct search_iterator* search_iterator = get_search_iterator(env, argv[0]); + if(search_iterator == NULL) { + return enif_make_badarg(env); + } + + search_iterator->report = make_report_counting(); + for(size_t i = 0; i < search_iterator->subs.count; i++) { + const struct betree_sub* sub = search_iterator->subs.subs[i]; + search_iterator->report->evaluated++; + if(match_sub_counting(search_iterator->attr_domain_count, + search_iterator->variables, + sub, + search_iterator->report, + &search_iterator->memoize, + search_iterator->undefined) == true) { + add_sub_counting(sub->id, search_iterator->report); + } + } + + ERL_NIF_TERM matched; + if (search_iterator->report->matched == 0) { + ERL_NIF_TERM tmp[1]; + matched = enif_make_list_from_array(env, tmp, 0); + } else { + ERL_NIF_TERM* arr = enif_alloc(sizeof(ERL_NIF_TERM) * search_iterator->report->matched); + for (int i = 0; i < search_iterator->report->matched; ++i) { + arr[i] = enif_make_uint64(env, search_iterator->report->subs[i]); + } + matched = enif_make_list_from_array(env, arr, search_iterator->report->matched); + enif_free(arr); + } + + bump_used_reductions(env, + search_iterator->report->node_count + search_iterator->report->ops_count); + + return enif_make_tuple2(env, atom_ok, matched); +} + +static ERL_NIF_TERM nif_betree_search_iterator_release(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 1) { + return enif_make_badarg(env); + } + + struct search_iterator* search_iterator = get_search_iterator(env, argv[0]); + if(search_iterator == NULL) { + return enif_make_badarg(env); + } + + search_iterator_deinit(search_iterator); + + return atom_ok; +} + /*static ERL_NIF_TERM nif_betree_delete(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])*/ /*{*/ /*ERL_NIF_TERM retval;*/ @@ -974,9 +1296,12 @@ static ErlNifFunc nif_functions[] = { {"betree_insert_sub", 2, nif_betree_insert_sub, 0}, {"betree_exists", 2, nif_betree_exists, 0}, {"betree_search", 2, nif_betree_search, 0}, - {"betree_search", 3, nif_betree_search_t, 0} + {"betree_search", 3, nif_betree_search_t, 0}, + {"search_iterator", 2, nif_betree_search_iterator, 0}, + {"search_next", 1, nif_betree_search_next, 0}, + {"search_all", 1, nif_betree_search_all, 0}, + {"search_iterator_release", 1, nif_betree_search_iterator_release, 0}, /*{"betree_delete", 2, nif_betree_delete, 0}*/ }; ERL_NIF_INIT(erl_betree_nif, nif_functions, &load, NULL, NULL, NULL); - diff --git a/c_src/build_betree b/c_src/build_betree index 4fe490e..ca89c39 100755 --- a/c_src/build_betree +++ b/c_src/build_betree @@ -3,7 +3,7 @@ set -x if [ ! -d "be-tree" ]; then - git clone -b 'v1.1.0' --single-branch --depth 1 http://github.com/adgear/be-tree + git clone -b 'v1.2.0' --single-branch --depth 1 http://github.com/adgear/be-tree cd be-tree make NIF=true fi diff --git a/rebar3 b/rebar3 index b9e599d..9bb538c 100755 Binary files a/rebar3 and b/rebar3 differ diff --git a/src/erl_betree.erl b/src/erl_betree.erl index 9997696..220d09b 100644 --- a/src/erl_betree.erl +++ b/src/erl_betree.erl @@ -6,7 +6,11 @@ betree_insert_sub/2, betree_exists/2, betree_search/2, - betree_search/3 + betree_search/3, + search_iterator/2, + search_next/1, + search_all/1, + search_iterator_release/1 ]). -inline([check_clock_type/1]). @@ -36,3 +40,12 @@ check_clock_type(?CLOCK_REALTIME) -> ?CLOCK_REALTIME; check_clock_type(?CLOCK_PROCESS_CPUTIME_ID) -> ?CLOCK_PROCESS_CPUTIME_ID; check_clock_type(?CLOCK_THREAD_CPUTIME_ID) -> ?CLOCK_THREAD_CPUTIME_ID; check_clock_type(_) -> ?CLOCK_MONOTONIC. + +search_iterator(Betree, Event) -> + erl_betree_nif:search_iterator(Betree, Event). +search_next(Iterator) -> + erl_betree_nif:search_next(Iterator). +search_all(Iterator) -> + erl_betree_nif:search_all(Iterator). +search_iterator_release(Iterator) -> + erl_betree_nif:search_iterator_release(Iterator). diff --git a/src/erl_betree_nif.erl b/src/erl_betree_nif.erl index 95c8450..43aa14b 100644 --- a/src/erl_betree_nif.erl +++ b/src/erl_betree_nif.erl @@ -9,7 +9,11 @@ betree_insert_sub/2, betree_exists/2, betree_search/2, - betree_search/3 + betree_search/3, + search_iterator/2, + search_next/1, + search_all/1, + search_iterator_release/1 ]). -spec on_load() -> ok. @@ -45,3 +49,12 @@ betree_search(_Betree, _Event) -> ?nif_stub. betree_search(_Betree, _Event, _ClockType) -> ?nif_stub. + +search_iterator(_Betree, _Event) -> + ?nif_stub. +search_next(_Iterator) -> + ?nif_stub. +search_all(_Iterator) -> + ?nif_stub. +search_iterator_release(_Iterator) -> + ?nif_stub. diff --git a/test/Params_10_Events_20.txt b/test/Params_10_Events_20.txt new file mode 100644 index 0000000..8603f87 --- /dev/null +++ b/test/Params_10_Events_20.txt @@ -0,0 +1,20 @@ +[false,true,true,true,true,false,false,true,true,true]. +[false,false,false,true,false,false,false,true,true,false]. +[true,true,true,false,false,true,true,true,true,false]. +[false,true,false,false,false,false,false,false,false,true]. +[false,false,false,false,false,false,false,false,true,false]. +[false,true,false,false,true,false,true,true,false,true]. +[true,false,true,false,false,true,true,true,false,true]. +[false,true,true,true,true,true,true,false,false,true]. +[true,true,true,true,true,true,false,true,true,false]. +[false,true,false,true,false,true,true,true,true,false]. +[true,true,true,true,true,false,true,true,true,false]. +[false,false,false,true,false,false,false,true,false,false]. +[true,true,true,true,false,false,false,false,false,true]. +[true,true,false,false,true,false,false,true,false,true]. +[false,true,false,false,true,false,true,false,true,true]. +[true,true,false,false,false,false,true,true,true,false]. +[false,true,false,false,true,true,false,false,false,false]. +[true,false,false,false,true,false,true,true,true,true]. +[false,false,false,true,false,true,false,true,false,true]. +[false,false,true,false,false,true,true,true,false,false]. diff --git a/test/Params_10_Exprs_100.txt b/test/Params_10_Exprs_100.txt new file mode 100644 index 0000000..0355b6f --- /dev/null +++ b/test/Params_10_Exprs_100.txt @@ -0,0 +1,110 @@ +[{par1,bool,disallow_undefined}, + {par2,bool,disallow_undefined}, + {par3,bool,disallow_undefined}, + {par4,bool,disallow_undefined}, + {par5,bool,disallow_undefined}, + {par6,bool,disallow_undefined}, + {par7,bool,disallow_undefined}, + {par8,bool,disallow_undefined}, + {par9,bool,disallow_undefined}, + {par10,bool,disallow_undefined}]. +<<"((par2 and ((par3 or (par7 or par1)) or (par5 and (par9 and (par8 and par10))))) or par6) or (not par4)">>. +<<"(((par7 or par6) or par1) or (not (par10 or par5))) or (not (((par3 or par4) or par2) or ((not par8) and par9)))">>. +<<"((not par3) or ((not par10) or par5)) or ((par7 or par8) or (((par6 or (par1 and par9)) and par4) or par2))">>. +<<"(par5 and (par6 and par3)) or ((par9 or ((par8 or par4) or par1)) and ((par7 and par2) and par10))">>. +<<"(((not (par5 or par3)) or (par9 or par4)) and ((par7 and par2) and (((not par10) or par8) and par1))) or (not par6)">>. +<<"((par6 or par4) and ((((par3 and (par8 or par5)) and par2) or (par1 or par7)) or (not (not par10)))) and (not par9)">>. +<<"(not (((par10 and par1) or par3) or (not ((par8 or par6) or par7)))) or (par2 and ((par4 or (not par5)) or par9))">>. +<<"(not par1) or (not ((par7 or ((par10 and (not (not (par9 or par6)))) and (not par5))) and ((par4 or par8) or (par3 or (not par2)))))">>. +<<"((not (par7 and (par9 and ((not par8) or par10)))) and ((not par3) or (par2 or (not par6)))) or (par1 or (par5 or par4))">>. +<<"((par3 or par8) or ((par1 and (par6 and (par9 or par5))) and ((not (not (par7 or (not par2)))) and par10))) or par4">>. +<<"((not par9) and ((not (par2 and ((par4 or par6) or par1))) or (par7 and (par8 or par3)))) or (not (par5 or par10))">>. +<<"((par10 or par5) and (not (not (not par3)))) or ((par7 or (((not par1) or (par9 and par4)) and (par6 and par2))) or par8)">>. +<<"((par5 or ((par9 or (par1 or par4)) and (par2 or par10))) or (not (par6 and par7))) or (par3 and (not par8))">>. +<<"((par7 and par10) or ((par6 or (par9 or (par5 and par2))) or par8)) and (not ((par1 and par4) and par3))">>. +<<"(((par10 and (not par8)) or (par4 or par9)) or ((par2 or par7) and par3)) and (par6 and (not (par1 and par5)))">>. +<<"(((par6 or par10) and (par9 and par8)) and ((par4 or par7) or par5)) or ((par3 and par2) or par1)">>. +<<"(par8 and (((par7 or par9) or ((par5 and par3) or par1)) and (par2 and par10))) and (not (par6 or par4))">>. +<<"(par6 or (par10 or (par5 or (not par2)))) and (((par9 or par7) and (not ((par8 or par3) or par4))) or par1)">>. +<<"(not (par1 or par8)) and ((not par7) or ((not ((par10 and par4) and par2)) or (not ((not par6) or (par9 and (par5 and par3))))))">>. +<<"((par4 and (par9 or par8)) or (((par2 and par6) and par1) or (par10 or par5))) and (not (par3 and par7))">>. +<<"((par2 and par10) or ((par3 and par8) and par7)) and ((par6 or par9) or ((not (par4 or par5)) or par1))">>. +<<"(not ((((par5 or (par2 or par4)) or par10) and par6) or (par3 or par1))) or (not ((not (not par7)) or (par8 and (not par9))))">>. +<<"((par6 and par4) and (par3 and par10)) and ((par5 or par1) or ((par2 and (par8 or par9)) and par7))">>. +<<"(not ((par2 or par8) or (not (par4 or (par9 or par10))))) and ((par7 and par1) and (not ((par5 and par6) or par3)))">>. +<<"((((not par4) and (par1 or par3)) and (not par6)) and (par8 and (par5 or par7))) or ((par10 and par2) and par9)">>. +<<"(par3 and (par8 and par9)) or (((par2 or par5) or par10) or ((par6 or par7) or ((not par1) and par4)))">>. +<<"((not ((par7 and (not ((par1 and (not par9)) and par2))) and par5)) or ((not (par6 and par10)) and par3)) and ((not par4) or par8)">>. +<<"(par7 or par1) or (((par4 or (((not par5) or (par2 or par3)) and par10)) and (par6 and par9)) or par8)">>. +<<"(not (not ((par6 and (par7 or par8)) or ((not par3) or (not (not par9)))))) or ((not par4) and (par5 and ((par10 or par2) or par1)))">>. +<<"(par1 and par5) and ((((par10 or par3) and par7) and (not (par2 and par8))) and (par9 or (par4 and par6)))">>. +<<"((par9 or par2) or ((par8 or par10) or par3)) or (not (((par6 or (not par7)) or (not (par1 or par4))) and (not (not par5))))">>. +<<"(not ((par8 or par3) or (par4 or (par6 and par1)))) and ((((not par5) and par2) and (par7 or par10)) or par9)">>. +<<"(not par6) and ((((not (par7 and par2)) or ((not (par8 and (par9 or par10))) and par5)) and (not par3)) and (not (par1 or par4)))">>. +<<"(((par4 and par8) and par1) and par5) or (((par3 and par6) and (par7 and par2)) or (par10 and par9))">>. +<<"(not par8) or ((not ((not par10) or par3)) and ((par9 and ((par4 or par7) or (par2 and par1))) and ((not par5) and par6)))">>. +<<"(not ((par9 and par7) or (not ((par4 and par6) or (par10 or par3))))) or ((par1 and (par8 or par2)) or par5)">>. +<<"((par7 or ((par8 or par6) and (not par5))) or (not par2)) or ((par3 or (par9 and par10)) and ((not par4) or par1))">>. +<<"(((par7 and par9) and ((par8 or par2) or par3)) or (par5 or par1)) or (par6 and (par4 or (not (not par10))))">>. +<<"(par3 and par4) or (par10 and ((par2 or par7) and ((not (par1 and par9)) or (par8 and (par6 and par5)))))">>. +<<"(not ((((par3 and par4) and par8) or (par2 and par6)) or (not (par7 and par9)))) and (par1 and (par10 or par5))">>. +<<"(par2 or (((par10 or par9) or par3) or (not ((par4 and par6) and par7)))) and (not (not (par8 or (par1 and par5))))">>. +<<"((not (not (par2 or par6))) and ((par5 or par9) or (par8 or (par3 or par10)))) or (not ((par1 and par4) and par7))">>. +<<"(par5 and ((par9 and par1) and ((not par2) or par3))) or (((par10 or par7) or par8) or (par6 and par4))">>. +<<"(par4 and ((par10 and (par1 and par6)) and par9)) or ((not (not ((par3 or par8) and (par7 or (not par5))))) or par2)">>. +<<"(par5 or par9) or (((par2 and par4) and (par3 or par6)) and ((not (par7 or par10)) and (par8 and par1)))">>. +<<"(((not ((par6 and par5) and par4)) or (not par9)) or par1) and ((((par8 and par7) or par2) or par3) or par10)">>. +<<"((not par10) and (not par9)) or (not ((par2 and ((par4 and par5) or (not (par7 and (par8 or par3))))) or (not (par1 or par6))))">>. +<<"(par7 and par2) and (((par1 and par9) and par4) or ((not par3) or (par6 or ((par10 and par8) or par5))))">>. +<<"(par8 or (par10 and par4)) or (((par1 and ((par9 or par6) and par3)) or (par2 and par7)) or par5)">>. +<<"(not ((not par4) and par1)) and (not ((not (par5 and par9)) and (par7 or (par10 or (par6 and (par2 or (par3 and par8)))))))">>. +<<"((((par4 or par2) and par9) or (not par7)) and (not par1)) and ((not par5) or ((par3 and par10) and (par6 and par8)))">>. +<<"((par7 and (par4 or par8)) or ((((par10 and par6) and par1) or (par9 or par5)) and par2)) and par3">>. +<<"(par4 and ((par3 or (not par10)) and par2)) and (not ((not par6) or ((not (par8 and par1)) or ((par9 or par5) and par7))))">>. +<<"((((par3 and par9) and par2) or (par8 or par7)) and (par10 or par5)) and (par4 and (par1 or par6))">>. +<<"(((par1 and par8) and (not par2)) and (par9 and (par4 and par7))) or ((par5 or par10) and (not (par3 or par6)))">>. +<<"((par2 and (par9 and par1)) and (par10 and par6)) and (not (((not par4) and par7) and ((not (not par3)) or (par8 or par5))))">>. +<<"par1 or (((par10 and par9) or par8) or ((par7 or par6) or ((par2 or par5) and (par4 or par3))))">>. +<<"((par2 and par4) and (par10 and par5)) and (par7 and ((par6 or (par1 and par8)) and (par3 and (not par9))))">>. +<<"(par6 or (((not par1) and par10) or par4)) or ((not par8) and ((not (par7 and par3)) and (par5 or (par9 or par2))))">>. +<<"(((not (par10 or par5)) and par6) or par4) and (((par2 or par7) or ((par1 and par3) or (not par9))) or (not par8))">>. +<<"((par10 and par1) and (par9 and (par5 or par2))) or (par8 or (not (((not par7) and par6) and (par4 or par3))))">>. +<<"(not (not (par4 or (par10 or par2)))) and (not ((not (not par5)) or (((par1 or par6) and (par9 or (par8 or par7))) or par3)))">>. +<<"(not par5) or ((((((par8 and par2) and par3) or (par1 or par4)) or (par10 or (not par9))) or (not par7)) or (not par6))">>. +<<"(par3 or par5) and ((not (not (not ((par4 and par10) and par7)))) and ((par6 or par8) or (par9 or (par2 or par1))))">>. +<<"(par5 or (not par8)) and (((not par10) and ((((par4 and par9) and par3) or par1) and par2)) or (par6 or par7))">>. +<<"(not par1) and (((par4 or par9) and par10) or (not (par7 and (((par6 and par5) or par2) and (par8 or par3)))))">>. +<<"(not par6) and ((par9 and (par2 or (par8 or par1))) and (par4 and ((par5 and par7) and (par3 or par10))))">>. +<<"((not (par10 or par9)) or par5) or (((not (par6 or (par7 and par3))) and (par4 and par1)) or ((not par2) or par8))">>. +<<"par8 or (((par1 or (par4 or par6)) or (not (not (par10 and par2)))) and (par5 or ((par9 or par3) or (not par7))))">>. +<<"(par9 or (par3 and par2)) and ((((par10 and (not par7)) and (not (not (not par4)))) or (par8 and par1)) and (not (par5 and par6)))">>. +<<"((((not par5) or (((not par6) and par1) or (not par4))) or (not par10)) and (par9 and par7)) and ((not par3) or (par8 or par2))">>. +<<"(par7 or (not (par10 and ((par9 and par6) and par8)))) or (((par2 or (not (par1 or (not par5)))) or par3) or (not par4))">>. +<<"((par4 and par8) or (((par3 or par10) or par2) and ((par5 and (par9 or par7)) and par1))) and par6">>. +<<"(par2 and par3) or (((((par1 and par7) or (not par8)) and (not par10)) and (par6 and par5)) or (par9 or par4))">>. +<<"(((not (par6 or par7)) and ((((par3 and par2) and (par1 or par9)) or par8) or par4)) or par10) and par5">>. +<<"(((par7 or par2) and (not (par8 or par4))) and (not (((not par10) and (par5 or par9)) and (par6 and par3)))) and par1">>. +<<"(par6 or par5) and ((not (par7 and (par1 or par9))) or (((par4 and par8) and par10) or (par2 or par3)))">>. +<<"((not par10) and (not ((not par3) or ((par1 or par9) and (par6 or par5))))) and (not (par4 and ((not (par7 and par8)) or par2)))">>. +<<"((par6 and (par1 and par5)) or par4) or (not ((par2 or par7) or ((not (par10 or (not par3))) and (par9 or par8))))">>. +<<"((not ((par1 or par6) or par2)) or ((par10 or (not par7)) and (not par8))) and (par4 and (par3 or (par9 and par5)))">>. +<<"par4 and (((((not (par6 and par7)) and par2) or par9) and (par3 and par1)) or ((par10 or par8) or par5))">>. +<<"(not (((not par3) and (par1 and par8)) or (not ((par2 and par7) or (par6 and par9))))) or (not (par10 and (par4 and par5)))">>. +<<"(((par6 and par2) or ((par1 and (not (not par9))) or (not (par5 and par8)))) or (par7 or par3)) and (par10 and par4)">>. +<<"par1 or ((((par10 or par8) or par2) and (not (par4 and par6))) or ((par5 or par9) and (par3 and par7)))">>. +<<"(((((not par5) or par10) and par3) and par1) and par2) or (par7 or (not (par6 and (par8 and (par9 and par4)))))">>. +<<"(((((par10 or par3) or par2) or par7) and (par4 and (not par9))) and (not (not par8))) or (par5 or (par1 and par6))">>. +<<"(not (((par10 and (not par3)) and par6) and (not par4))) and ((((par7 or par5) or (not par2)) or (not ((not par9) or par1))) and (not par8))">>. +<<"((not (par3 or (par9 or par2))) or (par4 and par1)) and ((par6 or ((par8 or par10) and par5)) or par7)">>. +<<"(not (((par6 and ((not par7) or par4)) or ((par9 or (par8 or (par2 or par3))) or par10)) and (not par5))) and par1">>. +<<"((par10 and (par2 and par1)) and (not par7)) and (((par4 and (not par3)) and (not par8)) and (par5 or (par9 and par6)))">>. +<<"(par3 and (par6 or par10)) and ((((par2 or par4) or (par5 and par8)) or par1) and ((not par9) and par7))">>. +<<"(((not (not (par3 or par10))) or (par6 or par5)) or (par7 or par1)) or ((par2 and par8) and (par4 or par9))">>. +<<"((par3 and (par8 and par1)) or ((par6 and par7) or (par5 or par9))) and ((not (par10 and par4)) and par2)">>. +<<"(par8 or par5) and ((((par6 and (not (par7 and par4))) and par1) or (par10 and par2)) or (par9 and par3))">>. +<<"(par6 and par8) or ((not (par3 or par2)) or (((par1 and (par10 or par4)) or par5) and (par9 and par7)))">>. +<<"((((par10 or par2) and par7) and par9) or (par6 and (not par8))) and ((not (par3 or par1)) or (par4 or par5))">>. +<<"(((par8 or (par4 or (par6 or (not par10)))) or (par2 and par7)) and par1) and ((par5 or par9) and par3)">>. +<<"((par7 or (not par5)) and (((par1 or par9) or (not par3)) and par10)) and (par8 or (par6 or (par4 or par2)))">>. +<<"((par6 and par9) or (((par5 or par10) and (par2 or par4)) or ((par1 or par8) and par3))) and (not par7)">>. +<<"(not ((par2 and par6) or ((par7 and par5) and ((par4 and par1) and (par8 or (par9 and par3)))))) or par10">>. diff --git a/test/betree_iterator_test.erl b/test/betree_iterator_test.erl new file mode 100644 index 0000000..130260d --- /dev/null +++ b/test/betree_iterator_test.erl @@ -0,0 +1,221 @@ +-module(betree_iterator_test). + +-include_lib("eunit/include/eunit.hrl"). + +-record(event, { a }). + +instantiate_iterator_test() -> + Betree = mk_betree(), + Event = [#event{ a = true }], + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)). + +search_all_found_test() -> + Betree = mk_betree(), + Event = [#event{ a = true }], + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + Ret_search_all = erl_betree:search_all(Iterator), + ?assertEqual({ok, [1]}, Ret_search_all). + +search_all_not_found_test() -> + Betree = mk_betree(), + Event = [#event{ a = false }], + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + Ret_search_all = erl_betree:search_all(Iterator), + ?assertEqual({ok, []}, Ret_search_all). + +search_iterator_test() -> + Betree = mk_betree(), + Event = [#event{ a = false }], + Before_search_iterator = betree_allocations(), + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + Ret_search_iterator_release = erl_betree:search_iterator_release(Iterator), + ?assertEqual(ok, Ret_search_iterator_release), + After_search_iterator_release = betree_allocations(), + {_Binary, NifInternal} = _Diff = betree_allocations_diff(Before_search_iterator, After_search_iterator_release), + ?assert(lists:all(fun (N) -> N =< 0 end, NifInternal)). + +search_all_false_test() -> + Betree = mk_betree(), + Event = [#event{ a = false }], + Before_search_iterator = betree_allocations(), + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + + Ret_search_all = erl_betree:search_all(Iterator), + ?assertEqual({ok, []}, Ret_search_all), + + Ret_search_iterator_release = erl_betree:search_iterator_release(Iterator), + ?assertEqual(ok, Ret_search_iterator_release), + After_search_iterator_release = betree_allocations(), + {_Binary, NifInternal} = _Diff = betree_allocations_diff(Before_search_iterator, After_search_iterator_release), + ?assert(lists:all(fun (N) -> N =< 0 end, NifInternal)). + +search_all_true_test() -> + Betree = mk_betree(), + Event = [#event{ a = true }], + Before_search_iterator = betree_allocations(), + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + + Ret_search_all = erl_betree:search_all(Iterator), + ?assertEqual({ok, [1]}, Ret_search_all), + + Ret_search_iterator_release = erl_betree:search_iterator_release(Iterator), + ?assertEqual(ok, Ret_search_iterator_release), + After_search_iterator_release = betree_allocations(), + {_Binary, NifInternal} = _Diff = betree_allocations_diff(Before_search_iterator, After_search_iterator_release), + ?assert(lists:all(fun (N) -> N =< 0 end, NifInternal)). + +memory_usage_test() -> + Betree = be_tree_from_file("test/Params_10_Exprs_100.txt"), + Events = bool_events_from_file("test/Params_10_Events_20.txt"), + Memory_start = betree_allocations(), + lists:foreach(fun (Event) -> search_iterator(Betree, Event) end, Events), + Memory_stop = betree_allocations(), + {_Binary, NifInternal} = _Diff = betree_allocations_diff(Memory_start, Memory_stop), + ?assert(lists:all(fun (N) -> N =< 0 end, NifInternal)). + +search_next_1_expr_found_test() -> + Domains = [[{a, bool, disallow_undefined}]], + {ok, Betree} = erl_betree:betree_make(Domains), + Expr = <<"a">>, + Consts = [], + {ok, Sub} = erl_betree:betree_make_sub(Betree, 1, Consts, Expr), + ok = erl_betree:betree_insert_sub(Betree, Sub), + Event = [#event{ a = true }], + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + + Ret_search_next = erl_betree:search_next(Iterator), + ?assertEqual({ok, [1]}, Ret_search_next). + +search_next_1_expr_not_found_test() -> + Domains = [[{a, bool, disallow_undefined}]], + {ok, Betree} = erl_betree:betree_make(Domains), + Expr = <<"a">>, + Consts = [], + {ok, Sub} = erl_betree:betree_make_sub(Betree, 1, Consts, Expr), + ok = erl_betree:betree_insert_sub(Betree, Sub), + Event = [#event{ a = false }], + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + + Ret_search_next = erl_betree:search_next(Iterator), + ?assertEqual({ok, []}, Ret_search_next). + +search_next_2_exprs_test() -> + Domains = [[{a, bool, disallow_undefined}]], + {ok, Betree} = erl_betree:betree_make(Domains), + Consts = [], + Expr1 = <<"a">>, + {ok, Sub1} = erl_betree:betree_make_sub(Betree, 1, Consts, Expr1), + ok = erl_betree:betree_insert_sub(Betree, Sub1), + Expr2 = <<"not a">>, + {ok, Sub2} = erl_betree:betree_make_sub(Betree, 2, Consts, Expr2), + ok = erl_betree:betree_insert_sub(Betree, Sub2), + Event = [#event{ a = true }], + Ret_iter = erl_betree:search_iterator(Betree, Event), + ?assertMatch({ok, {_, _, _}}, Ret_iter), + {ok, {Iterator, _, _}} = Ret_iter, + ?assert(is_reference(Iterator)), + + Ret_search_1 = erl_betree:search_next(Iterator), + ?assertMatch({continue, _}, Ret_search_1), + {continue, Ret_L1} = Ret_search_1, + ?assert(is_list(Ret_L1)), + + Ret_search_2 = erl_betree:search_next(Iterator), + ?assertEqual({ok, [1]}, Ret_search_2). + +mk_betree() -> + Domains = [[{a, bool, disallow_undefined}]], + {ok, Betree} = erl_betree:betree_make(Domains), + Expr = <<"a">>, + Consts = [], + {ok, Sub} = erl_betree:betree_make_sub(Betree, 1, Consts, Expr), + ok = erl_betree:betree_insert_sub(Betree, Sub), + Betree. + +be_tree_from_file(File) -> + {Domain, Exprs} = read_be_tree(File), + Domains = [Domain], + {ok, Betree} = erl_betree:betree_make(Domains), + Consts = [], + {_, Subs} = lists:foldl(fun (E, {I, Acc}) -> + {ok, Sub} = erl_betree:betree_make_sub(Betree, I, Consts, E), + {I+1, [Sub | Acc]} + end, {1, []}, Exprs), + lists:foreach(fun (S) -> erl_betree:betree_insert_sub(Betree, S) end, Subs), + Betree. + +bool_events_from_file(File) -> + {ok, Events} = file:consult(File), + [[list_to_tuple( + [bool_event | + [case N of 1 -> true; _ -> false end|| N <- Event] + ])] || Event <- Events]. + +search_iterator(Betree, Event) -> + {ok, {Iterator, _, _}} = erl_betree:search_iterator(Betree, Event), + Ret = erl_betree:search_all(Iterator), + erl_betree:search_iterator_release(Iterator), + Ret. + +betree_allocations() -> + case instrument:allocations() of + {error, _Reason} = Err -> Err; + {ok, {_HistogramStart, _UnscannedSize, Allocations}} -> + case maps:find(erl_betree_nif, Allocations) of + error -> {error, betree_allocations_not_found}; + {ok, BetreeAllocations} -> BetreeAllocations + end + end. + +betree_allocations_diff(Alloc1, Alloc2) + when is_map(Alloc1) andalso is_map(Alloc2) -> + Binary1 = maps:get(binary, Alloc1, error), + Binary2 = maps:get(binary, Alloc2, error), + BinaryDiff = diff_tuple(Binary1, Binary2), + NifInternal1 = maps:get(nif_internal, Alloc1, error), + NifInternal2 = maps:get(nif_internal, Alloc2, error), + NifInternalDiff = diff_tuple(NifInternal1, NifInternal2), + {BinaryDiff, NifInternalDiff}; +betree_allocations_diff(_, _) -> + {[], []}. + +diff_tuple(error, error) -> + []; +diff_tuple(error, T2) -> + diff_tuple({}, T2); +diff_tuple(T1, error) -> + diff_tuple(T1, {}); +diff_tuple(T1, T2) -> + L1 = tuple_to_list(T1), + L2 = tuple_to_list(T2), + L = lists:zip(L1, L2, {pad, {0, 0}}), + Diff = [X2 - X1 || {X1, X2} <- L], + Diff. + +read_be_tree(File) -> + {ok, [Domain | Exprs]} = file:consult(File), + {Domain, Exprs}.