Find disagreement using status

SMT-COMP · Jun 28, 2024 · 9f3159e · 9f3159e
1 parent 5888998
commit 9f3159e
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 55 deletions.
diff --git a/smtcomp/main.py b/smtcomp/main.py
@@ -20,6 +20,7 @@
 import smtcomp.benchexec
 import smtcomp.defs as defs
 import smtcomp.results
+import smtcomp.scoring
 import smtcomp.submission as submission
 import smtcomp.execution as execution
 import smtcomp.model_validation as model_validation
@@ -48,6 +49,7 @@
 data_panel = "Data"
 benchmarks_panel = "Benchmarks"
 selection_panel = "Selection process"
+scoring_panel = "Scoring process"
 
 
 @app.command(rich_help_panel=submissions_panel)
@@ -202,38 +204,8 @@ def stats_of_benchexec_results(
  Load benchexec results and print some results about them
  """
  config = defs.Config(data)
- lf = pl.read_ipc(results / "parsed.feather").lazy().filter(track=int(defs.Track.SingleQuery))
- selected = (smtcomp.selection
- .helper(config, defs.Track.SingleQuery)
- .filter(selected=True)
- .with_columns(track=int(defs.Track.SingleQuery)))
 
-
-
- selected = intersect(selected, smtcomp.selection.solver_competing_logics(config), on=["logic", "track"])
-
- selected = add_columns(
- selected,
- lf.drop("logic"),
- on=["file", "solver", "track", "participation"],
- defaults={
- "answer": -1,
- "scramble_id": 1,
- "cputime_s": -1,
- "memory_B": -1,
- "walltime_s": -1,
- },
- )
-
- selected = add_columns(
- selected,
- smtcomp.selection.tracks(),
- on=["track","logic"],
- defaults={
- "division": -1
- }
-
- )
+ selected = smtcomp.results.helper_get_results(config,results)
 
  sum_answer = (pl.col("answer") == -1).sum()
  waiting = (pl.col("answer") == -1).all()
@@ -296,41 +268,26 @@ def print_missing(d: Dict[str, Any]) -> str:
 def find_disagreement_results(
  data: Path,
  results: Path,
+ use_previous_year_results: bool = defs.Config.use_previous_results_for_status
 ) -> None:
  """
  Load benchexec results and print some results about them
  """
  config = defs.Config(data)
- lf = pl.read_ipc(results / "parsed.feather").lazy().filter(track=int(defs.Track.SingleQuery))
- selected = (smtcomp.selection
- .helper(config, defs.Track.SingleQuery)
- .filter(selected=True)
- .with_columns(track=int(defs.Track.SingleQuery)))
-
-
-
- selected = intersect(selected, smtcomp.selection.solver_competing_logics(config), on=["logic", "track"])
-
- selected = add_columns(
- selected,
- lf.drop("logic"),
- on=["file", "solver", "track", "participation"],
- defaults={
- "answer": -1,
- "scramble_id": 1,
- "cputime_s": -1,
- "memory_B": -1,
- "walltime_s": -1,
- },
- )
+ config.use_previous_results_for_status = use_previous_year_results
+ selected = smtcomp.results.helper_get_results(config,results)
 
  slash = pl.lit("/")
 
  df = (
  selected
  .filter(pl.col("answer").is_in([int(defs.Answer.Sat),int(defs.Answer.Unsat)]))
- .filter(((pl.col("answer")==int(defs.Answer.Sat)).any().over("file")) &
- ((pl.col("answer")==int(defs.Answer.Unsat)).any().over("file")) 
+ .filter((((pl.col("answer")==int(defs.Answer.Sat)).any().over("file")) |
+ (pl.col("status")==int(defs.Status.Sat))
+ )
+ &
+ (((pl.col("answer")==int(defs.Answer.Unsat)).any().over("file"))| 
+ (pl.col("status")==int(defs.Status.Unsat))) 
  )
  .group_by("logic","file")
  .agg(
@@ -369,6 +326,12 @@ def print_answers(d: List[Dict[str, Any]]) -> str:
  Col("answers", "Disagreement",custom=print_answers,footer=""),
  )
 
+@app.command(rich_help_panel=scoring_panel)
+def scoring_stats(data:Path, src:Path) -> None:
+ config = defs.Config(data)
+ results = smtcomp.results.helper_get_results(config,src)
+
+ smtcomp.scoring.sanity_check(config,results)
 
 
 @app.command(rich_help_panel=benchexec_panel)

diff --git a/smtcomp/results.py b/smtcomp/results.py
@@ -247,3 +247,38 @@ def get_output(self: "LogFile", r: RunId, basename: str) -> str:
  raise ValueError(f"Log Header not found {r!r} {basename!r}")
  index += len(benchexec_log_separator())
  return s[index:]
+
+
+def helper_get_results(config: defs.Config, results: Path) -> pl.LazyFrame:
+ """
+ Return on all the selected benchmarks for each solver that should run it
+ "track", "file", "scrambled_id", "logic", "division", "status", "solver", "answer", "cputime_s", "memory_B", "walltime_s".
+
+ -1 is used when no answer is available.
+
+ """
+ lf = pl.read_ipc(results / "parsed.feather").lazy().filter(track=int(defs.Track.SingleQuery))
+ selected = (
+ smtcomp.selection.helper(config, defs.Track.SingleQuery)
+ .filter(selected=True)
+ .with_columns(track=int(defs.Track.SingleQuery))
+ )
+
+ selected = intersect(selected, smtcomp.selection.solver_competing_logics(config), on=["logic", "track"])
+
+ selected = add_columns(
+ selected,
+ lf.drop("logic"),
+ on=["file", "solver", "track", "participation"],
+ defaults={
+ "answer": -1,
+ "scramble_id": 1,
+ "cputime_s": -1,
+ "memory_B": -1,
+ "walltime_s": -1,
+ },
+ )
+
+ selected = add_columns(selected, smtcomp.selection.tracks(), on=["track", "logic"], defaults={"division": -1})
+
+ return selected
diff --git a/smtcomp/scoring.py b/smtcomp/scoring.py
@@ -0,0 +1,20 @@
+import functools, itertools
+from typing import Set, Dict, Optional, cast, List, DefaultDict
+from pathlib import Path, PurePath
+from smtcomp import defs
+from rich import progress
+from rich import print
+from pydantic import BaseModel
+import polars as pl
+from smtcomp.utils import *
+
+c_answer = pl.col("answer")
+is_sat = c_answer == int(defs.Answer.Sat)
+is_unsat = c_answer == int(defs.Answer.Sat)
+is_known = is_sat | is_unsat
+is_not_known = is_known.not_()
+
+
+def sanity_check(config: defs.Config, result: pl.LazyFrame) -> None:
+ result = result.filter(is_known & (config.timelimit_s < pl.col("walltime_s")))
+ assert result.select(n=pl.len()).collect()["n"][0] == 0