Processing Cloud and Parallel track results.

* CSV to feath conversion. * Adapted website generation for Cloud and Parallel tracks. * Updated make results-generation. * Fixed makefile. * Update smtcomp/results.py Co-authored-by: François Bobot <[email protected]> * Incorporated suggestions by Francois. --------- Co-authored-by: François Bobot <[email protected]>
SMT-COMP · Sep 12, 2024 · a71d958 · a71d958
1 parent 279d242
commit a71d958
Show file tree

Hide file tree

Showing 8 changed files with 110 additions and 7 deletions.
diff --git a/Makefile b/Makefile
@@ -78,6 +78,10 @@ results-generation:
  @poetry run smtcomp export-results-pages data UnsatCore
  @echo "🚀 Generating results to web/content/results for Incremental"
  @poetry run smtcomp export-results-pages data Incremental
+ @echo "🚀 Generating results to web/content/results for Cloud"
+ @poetry run smtcomp export-results-pages data Cloud
+ @echo "🚀 Generating results to web/content/results for Parallel"
+ @poetry run smtcomp export-results-pages data Parallel
 
 cache:
  @echo "🚀 Generating cache"

diff --git a/README.md b/README.md
@@ -257,3 +257,17 @@ The step from Single Query can be followed with the model validation results whe
 ---
 
 Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry).
+
+## Using the `smtcomp` tool for handling results for Cloud and Parallel tracks provided in `csv` format by AWS
+
+Works the same for both Parallel and Cloud track. If only one csv file is provided, the file needs to be split into one just containing the results for the relevant track. Furthermore, the file must be called results.csv, stored in its own local directory (here tmp/cloud_results), and contain at least the columns: solver, scramble_id, logic, solver_time, file, track, and solver_result.
+
+We will suppose that results.csv is locally available in directory `tmp/cloud_results`:
+
+In order to allow looking at the results incrementally, the first step is to translate the `.csv` file into a faster `.feather` file.
+
+```
+smtcomp convert-aws-results tmp/cloud_results
+```
+
+Everything else works the same as for any feather file that was converted by `smtcomp convert-benchexec-results`.
diff --git a/data/results-cloud-2024.json.gz b/data/results-cloud-2024.json.gz
diff --git a/data/results-parallel-2024.json.gz b/data/results-parallel-2024.json.gz
diff --git a/smtcomp/generate_website_page.py b/smtcomp/generate_website_page.py
@@ -242,6 +242,13 @@ def get_winner(l: List[dict[str, str]] | None) -> str:
  division = defs.Logic.name_of_int(d["logic"])
  logics = dict()
 
+ if (track == defs.Track.Cloud) | (track == defs.Track.Parallel):
+ winner_seq = "-"
+ steps_seq = []
+ else:
+ winner_seq = get_winner(d[smtcomp.scoring.Kind.seq.name])
+ steps_seq = podium_steps(d[smtcomp.scoring.Kind.seq.name])
+
  return PodiumDivision(
  resultdate="2024-07-08",
  year=config.current_year,
@@ -254,12 +261,12 @@ def get_winner(l: List[dict[str, str]] | None) -> str:
  time_limit=config.timelimit_s,
  mem_limit=config.memlimit_M,
  logics=dict(sorted(logics.items())),
- winner_seq=get_winner(d[smtcomp.scoring.Kind.seq.name]),
+ winner_seq=winner_seq,
  winner_par=get_winner(d[smtcomp.scoring.Kind.par.name]),
  winner_sat=get_winner(d[smtcomp.scoring.Kind.sat.name]),
  winner_unsat=get_winner(d[smtcomp.scoring.Kind.unsat.name]),
  winner_24s=get_winner(d[smtcomp.scoring.Kind.twentyfour.name]),
- sequential=podium_steps(d[smtcomp.scoring.Kind.seq.name]),
+ sequential=steps_seq,
  parallel=podium_steps(d[smtcomp.scoring.Kind.par.name]),
  sat=podium_steps(d[smtcomp.scoring.Kind.sat.name]),
  unsat=podium_steps(d[smtcomp.scoring.Kind.unsat.name]),
@@ -424,13 +431,19 @@ def get_winner(l: List[PodiumStepBiggestLead] | None) -> str:
  unsat = biggest_lead_ranking_for_kind(data, smtcomp.scoring.Kind.unsat)
  twentyfour = biggest_lead_ranking_for_kind(data, smtcomp.scoring.Kind.twentyfour)
 
+ if (track == defs.Track.Cloud) | (track == defs.Track.Parallel):
+ winner_seq = "-"
+ sequential = []
+ else:
+ winner_seq = get_winner(sequential)
+
  return PodiumBiggestLead(
  resultdate="2024-07-08",
  year=config.current_year,
  track=track,
  results=f"results_{config.current_year}",
  participants=f"participants_{config.current_year}",
- winner_seq=get_winner(sequential),
+ winner_seq=winner_seq,
  winner_par=get_winner(parallel),
  winner_sat=get_winner(sat),
  winner_unsat=get_winner(unsat),
@@ -506,18 +519,25 @@ def timeScore(vws_step: PodiumStep) -> float:
  for k in smtcomp.scoring.Kind
  )
 
+ if (track == defs.Track.Cloud) | (track == defs.Track.Parallel):
+ winner_seq = "-"
+ steps_seq = []
+ else:
+ winner_seq = get_winner(ld[smtcomp.scoring.Kind.seq])
+ steps_seq = ld[smtcomp.scoring.Kind.seq]
+
  return PodiumLargestContribution(
  resultdate="2024-07-08",
  year=config.current_year,
  track=track,
  results=f"results_{config.current_year}",
  participants=f"participants_{config.current_year}",
- winner_seq=get_winner(ld[smtcomp.scoring.Kind.seq]),
+ winner_seq=winner_seq,
  winner_par=get_winner(ld[smtcomp.scoring.Kind.par]),
  winner_sat=get_winner(ld[smtcomp.scoring.Kind.sat]),
  winner_unsat=get_winner(ld[smtcomp.scoring.Kind.unsat]),
  winner_24s=get_winner(ld[smtcomp.scoring.Kind.twentyfour]),
- sequential=ld[smtcomp.scoring.Kind.seq],
+ sequential=steps_seq,
  parallel=ld[smtcomp.scoring.Kind.par],
  sat=ld[smtcomp.scoring.Kind.sat],
  unsat=ld[smtcomp.scoring.Kind.unsat],

diff --git a/smtcomp/main.py b/smtcomp/main.py
@@ -194,6 +194,18 @@ def convert_benchexec_results(
  lf.collect().write_ipc(results / "parsed.feather")
 
 
+@app.command(rich_help_panel=benchexec_panel)
+def convert_aws_results(
+ results: Path,
+) -> None:
+ """
+ Load aws results in cvs format and aggregates results in feather format
+ """
+
+ lf = smtcomp.results.parse_aws_csv(results)
+ lf.collect().write_ipc(results / "parsed.feather")
+
+
 @app.command(rich_help_panel=benchexec_panel)
 def store_results(
  data: Path,

diff --git a/smtcomp/results.py b/smtcomp/results.py
@@ -467,8 +467,13 @@ def helper_get_results(
  else:
  lf = pl.concat(pl.read_ipc(p / "parsed.feather").lazy() for p in results)
  lf = lf.filter(track=int(track)).drop("scramble_id")
+
  selection = smtcomp.selection.helper(config, track).filter(selected=True).with_columns(track=int(track))
 
+ selection = (
+ selection.unique()
+ ) # Needed because smtcomp.selection.helper(...) returns the selected benchmarks for both Cloud and Parallel track at once if track equals either of them. This can lead to dublipcates! Should be improved later.
+
  selection = (
  add_columns(selection, smtcomp.selection.tracks(), on=["track", "logic"], defaults={"division": -1})
  .collect() # Improve later works
@@ -491,3 +496,49 @@ def helper_get_results(
  )
 
  return selected, selection
+
+
+def parse_aws_csv(dir: Path) -> pl.LazyFrame:
+ """
+ output columns: solver, participation, track, cputime_s, memory_B, status, walltime_s, scramble_id, file, answer
+
+ Assumes that there is a file results.csv in the directory dir. The file
+ must contain columns: solver, scramble_id, logic, solver_time, file, track, solver_result
+ """
+
+ def aws_logic(logic: str) -> int:
+ return int(defs.Logic(logic))
+
+ def aws_track(track: str) -> int:
+ return int(defs.Track(track))
+
+ def aws_result(res: str) -> int:
+ match res:
+ case "unsat":
+ return int(defs.Answer.Unsat)
+ case "sat":
+ return int(defs.Answer.Sat)
+ case _:
+ return int(defs.Answer.Unknown)
+
+ csv = dir / "results.csv"
+ if not csv.exists():
+ raise (ValueError(f"results.csv missing in the directory"))
+ lf = pl.scan_csv(csv).select(
+ pl.col("solver"),
+ pl.col("scramble_id"),
+ pl.col("logic").apply(aws_logic, return_dtype=pl.Int64).alias("logic"),
+ pl.col("solver_time").alias("walltime_s"),
+ pl.col("file"),
+ pl.col("track").apply(aws_track, return_dtype=pl.Int32).alias("track"),
+ pl.col("solver_result").map_elements(aws_result, return_dtype=pl.Int64).alias("answer"),
+ )
+
+ results = lf.with_columns(
+ participation=pl.lit(0, dtype=pl.Int64),
+ memory_B=pl.lit(0, dtype=pl.Int64),
+ nb_answers=pl.lit(0, dtype=pl.Int64),
+ cputime_s=pl.lit(0, dtype=pl.Int64),
+ )
+
+ return results
diff --git a/smtcomp/selection.py b/smtcomp/selection.py
@@ -218,7 +218,9 @@ def helper(config: defs.Config, track: defs.Track) -> pl.LazyFrame:
  selected = helper_compute_non_incremental(config, track)
  case defs.Track.UnsatCore:
  selected = helper_compute_non_incremental(config, track)
- case defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
+ case defs.Track.Cloud | defs.Track.Parallel:
+ selected = helper_aws_selection(config).drop("division")
+ case defs.Track.ProofExhibition:
  selected = helper_compute_non_incremental(config, defs.Track.SingleQuery)
  rich.print(
  f"[red]The selection and scramble_benchmarks command does not yet work for the competition track: {track}[/red]"
@@ -322,7 +324,7 @@ def round_robbin(files: list[list[int]]) -> list[int]:
  d = b.collect().to_dict(as_series=False)
  d["file"] = list(map(round_robbin, d["file"]))
 
- b = pl.LazyFrame(d)
+ b = pl.LazyFrame(d).cast({"track": pl.Int32})
 
  b = b.explode("file").with_columns(selected=True)