diff --git a/.github/workflows/deploy-website.yaml b/.github/workflows/deploy-website.yaml index 488c022a..ee13bf77 100644 --- a/.github/workflows/deploy-website.yaml +++ b/.github/workflows/deploy-website.yaml @@ -51,12 +51,21 @@ jobs: key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} - name: Set up the environment uses: ./.github/actions/setup-poetry-env + #For smtcomp data cache + - uses: actions/cache@v3 + id: data-cache + with: + path: data/*.feather + key: data-cache-${{ hashFiles('data/*.json.gz') }} + - name: Cache generation if needed + if: steps.data-cache.outputs.cache-hit != 'true' + run: | + poetry run make cache # Generate files using smtcomp tool - name: Files generation run: | pip install json-schema-for-humans - poetry run make submission-doc - poetry run make participant-data + poetry run make submission-doc generation ### - name: Setup Pages id: pages diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 72b3ee85..87c3edb8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -24,8 +24,19 @@ jobs: - name: Run checks run: make check - - name: Run tests - run: make tests + #For smtcomp data cache (TODO unduplicate from deploy-website) + - uses: actions/cache@v3 + id: data-cache + with: + path: data/*.feather + key: data-cache-${{ hashFiles('data/*.json.gz') }} + - name: Cache generation if needed + if: steps.data-cache.outputs.cache-hit != 'true' + run: | + poetry run make cache + + - name: Run test + run: make test tox: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index 6a24df05..2c981e74 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ schema_doc.min.js *.feather tmp/ /web/public/ +/web/content/results/*.md diff --git a/Makefile b/Makefile index d5e0ef5e..9b62ef0f 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ test: generation ## Test the code with pytest @echo "🚀 Testing code: Running pytest" @poetry run pytest -generation: submission-generation participant-data ## Files generation for the website +generation: submission-generation participant-data results-generation ## Files generation for the website .PHONY: build build: clean-build ## Build wheel file using poetry @@ -45,7 +45,7 @@ GENERATED_SCHEMA_FILE=web/content/solver_submission/schema.json GENERATED_SCHEMA_HTML=web/content/solver_submission/schema.html PARTICIPANT_DATA_FILE=web/data/participants.json -.PHONY: submission-doc submission-generation participant-data +.PHONY: submission-doc submission-generation participant-data results-generation cache submission-generation: @echo "🚀 Generating schema to $(GENERATED_SCHEMA_FILE)" @poetry run smtcomp dump-json-schema $(GENERATED_SCHEMA_FILE) @@ -59,5 +59,13 @@ participant-data: @echo "🚀 Generating participant data to $(PARTICIPANT_DATA_FILE)" @poetry run smtcomp show-json submissions/*.json $(PARTICIPANT_DATA_FILE) +results-generation: + @echo "🚀 Generating results to web/content/results" + @poetry run smtcomp export-results-pages data + +cache: + @echo "🚀 Generating cache" + @poetry run smtcomp create-cache data + hugo-server: (cd web; hugo server) diff --git a/smtcomp/defs.py b/smtcomp/defs.py index 8d67f2c7..af0b0bde 100644 --- a/smtcomp/defs.py +++ b/smtcomp/defs.py @@ -1384,6 +1384,7 @@ class Results(BaseModel): ## Parameters that can change each year class Config: + __next_id__: ClassVar[int] = 0 current_year = 2024 oldest_previous_results = 2018 timelimit_s = 60 * 20 @@ -1427,10 +1428,20 @@ class Config: ] def __init__(self, data: Path | None) -> None: + self.id = self.__class__.__next_id__ + self.__class__.__next_id__ += 1 if data is not None and data.name != "data": raise ValueError("Consistency check, data directory must be named 'data'") self._data = data + def __eq__(self, other: Any) -> bool: + if isinstance(other, Config): + return self.id == other.id + return False + + def __hash__(self: Config) -> int: + return self.id + @functools.cached_property def data(self) -> Path: if self._data is None: @@ -1478,6 +1489,10 @@ def submissions(self) -> list[Submission]: Submission.model_validate_json(Path(file).read_text()) for file in self.data.glob("../submissions/*.json") ] + @functools.cached_property + def web_results(self) -> Path: + return self.data / ".." / "web" / "content" / "results" + @functools.cached_property def seed(self) -> int: unknown_seed = 0 diff --git a/smtcomp/generate_website_page.py b/smtcomp/generate_website_page.py new file mode 100644 index 00000000..fc689c71 --- /dev/null +++ b/smtcomp/generate_website_page.py @@ -0,0 +1,219 @@ +import functools, itertools +from typing import Set, Dict, Optional, cast, List, DefaultDict +from pathlib import Path, PurePath +from smtcomp import defs +from rich import progress +from rich import print +from pydantic import BaseModel +import polars as pl +import smtcomp.scoring +from smtcomp.utils import * +import smtcomp.results + +# Warning: Hugo lowercase all dict keys + + +class PodiumStep(BaseModel): + name: str + competing: str # yes or no + errorScore: int + correctScore: int + CPUScore: float + WallScore: float + solved: int + solved_sat: int + solved_unsat: int + unsolved: int + abstained: int + timeout: int + memout: int + + +class PodiumDivision(BaseModel): + resultdate: str + year: int + divisions: str # divisions_2023 + participants: str # participants_2023 + disagreements: str # disagreements_2023 + division: str # Arith + track: str # track_single_query + n_benchmarks: int + time_limit: int + mem_limit: int + logics: dict[str, int] + winner_seq: str + winner_par: str + winner_sat: str + winner_unsat: str + winner_24s: str + + sequential: list[PodiumStep] + parallel: list[PodiumStep] + sat: list[PodiumStep] + unsat: list[PodiumStep] + twentyfour: list[PodiumStep] + + layout: str = "result" + + +def podium_steps(podium: List[dict[str, Any]] | None) -> List[PodiumStep]: + if podium is None: + return [] + else: + return [ + PodiumStep( + name=s["solver"], + competing="yes", # TODO + errorScore=-s["error_score"], + correctScore=s["correctly_solved_score"], + CPUScore=s["cpu_time_score"], + WallScore=s["wallclock_time_score"], + solved=s["solved"], + solved_sat=s["solved_sat"], + solved_unsat=s["solved_unsat"], + unsolved=s["unsolved"], + abstained=s["abstained"], + timeout=s["timeout"], + memout=s["memout"], + ) + for s in podium + ] + + +def make_podium(config: defs.Config, d: dict[str, Any], for_division: bool) -> PodiumDivision: + def get_winner(l: List[dict[str, str]] | None) -> str: + # TODO select only participating + if l is None: + return "-" + else: + return l[0]["solver"] + + if for_division: + division = defs.Division.name_of_int(d["division"]) + logics = dict((defs.Logic.name_of_int(d2["logic"]), d2["n"]) for d2 in d["logics"]) + else: + division = defs.Logic.name_of_int(d["logic"]) + logics = dict() + + return PodiumDivision( + resultdate="2024-07-08", + year=config.current_year, + divisions=f"divisions_{config.current_year}", + participants=f"participants_{config.current_year}", + disagreements=f"disagreements_{config.current_year}", + division=division, + track="track_single_query", + n_benchmarks=d["total"], + time_limit=config.timelimit_s, + mem_limit=config.memlimit_M, + logics=logics, + winner_seq=get_winner(d[smtcomp.scoring.Kind.seq.name]), + winner_par=get_winner(d[smtcomp.scoring.Kind.par.name]), + winner_sat=get_winner(d[smtcomp.scoring.Kind.sat.name]), + winner_unsat=get_winner(d[smtcomp.scoring.Kind.unsat.name]), + winner_24s=get_winner(d[smtcomp.scoring.Kind.twentyfour.name]), + sequential=podium_steps(d[smtcomp.scoring.Kind.seq.name]), + parallel=podium_steps(d[smtcomp.scoring.Kind.par.name]), + sat=podium_steps(d[smtcomp.scoring.Kind.sat.name]), + unsat=podium_steps(d[smtcomp.scoring.Kind.unsat.name]), + twentyfour=podium_steps(d[smtcomp.scoring.Kind.twentyfour.name]), + ) + + +def sq_generate_datas( + config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame, for_division: bool +) -> dict[str, PodiumDivision]: + """ + Input with disagreements + Generate datas for divisions or for logics + """ + assert "disagreements" in results.columns + + if for_division: + group_by = "division" + name_of_int = defs.Division.name_of_int + else: + group_by = "logic" + name_of_int = defs.Logic.name_of_int + + results = results.filter(track=int(defs.Track.SingleQuery)).drop("track") + + selection = selection.filter(selected=True) + + len_by_division = selection.group_by(group_by).agg(total=pl.len()) + + def info_for_podium_step(kind: smtcomp.scoring.Kind, config: defs.Config, results: pl.LazyFrame) -> pl.LazyFrame: + results = smtcomp.scoring.filter_for(kind, config, results) + return ( + intersect(results, len_by_division, on=[group_by]) + .group_by(group_by, "solver") + .agg( + pl.sum("error_score"), + pl.sum("correctly_solved_score"), + pl.sum("cpu_time_score"), + pl.sum("wallclock_time_score"), + solved=(smtcomp.scoring.known_answer).sum(), + solved_sat=(smtcomp.scoring.sat_answer).sum(), + solved_unsat=(smtcomp.scoring.unsat_answer).sum(), + unsolved=(smtcomp.scoring.unknown_answer).sum(), + timeout=(smtcomp.scoring.timeout_answer).sum(), + memout=(smtcomp.scoring.memout_answer).sum(), + abstained=pl.col("total").first() - pl.len(), + ) + .sort([group_by] + smtcomp.scoring.scores, descending=True) + .group_by(group_by, maintain_order=True) + .agg( + pl.struct( + "solver", + "error_score", + "correctly_solved_score", + "cpu_time_score", + "wallclock_time_score", + "solved", + "solved_sat", + "solved_unsat", + "unsolved", + "timeout", + "memout", + "abstained", + ).alias(kind.name) + ) + ) + + if for_division: + lf_logics = [ + selection.group_by("division", "logic") + .agg(n=pl.len()) + .group_by("division") + .agg(logics=pl.struct("logic", "n")) + ] + else: + lf_logics = [] + + lf_info2 = results.group_by(group_by).agg(disagreements=(pl.col("disagreements") == True).sum()) + + results = results.filter(disagreements=False).drop("disagreements") + + l = ( + [len_by_division, lf_info2] + + lf_logics + + [info_for_podium_step(kind, config, results) for kind in smtcomp.scoring.Kind] + ) + + r = functools.reduce(lambda x, y: x.join(y, validate="1:1", on=[group_by], how="left"), l) + + df = r.collect() + + return dict((name_of_int(d[group_by]), make_podium(config, d, for_division)) for d in df.to_dicts()) + + +def export_results(config: defs.Config, selection: pl.LazyFrame, results: pl.LazyFrame) -> None: + + dst = config.web_results + dst.mkdir(parents=True, exist_ok=True) + + results = results.collect().lazy() + + for for_division in [True, False]: + for name, data in sq_generate_datas(config, selection, results, for_division).items(): + (dst / f"{name.lower()}-single-query.md").write_text(data.model_dump_json(indent=1)) diff --git a/smtcomp/main.py b/smtcomp/main.py index 1b6e7c28..9aafee99 100644 --- a/smtcomp/main.py +++ b/smtcomp/main.py @@ -30,6 +30,7 @@ import smtcomp.generate_benchmarks import smtcomp.list_benchmarks import smtcomp.selection +import smtcomp.generate_website_page from smtcomp.unpack import write_cin, read_cin import smtcomp.scramble_benchmarks from rich.console import Console @@ -244,14 +245,17 @@ def store_results( @app.command(rich_help_panel=benchexec_panel) def stats_of_benchexec_results( - data: Path, results: Path, only_started: bool = False, track: defs.Track = defs.Track.SingleQuery + data: Path, + results: List[Path] = typer.Argument(None), + only_started: bool = False, + track: defs.Track = typer.Argument(defs.Track.SingleQuery.name), ) -> None: """ Load benchexec results and print some results about them """ config = defs.Config(data) - selected = smtcomp.results.helper_get_results(config, results, track) + selected, _ = smtcomp.results.helper_get_results(config, results, track) sum_answer = (pl.col("answer") == -1).sum() waiting = (pl.col("answer") == -1).all() @@ -323,14 +327,16 @@ def print_missing(d: Dict[str, Any]) -> str: @app.command(rich_help_panel=benchexec_panel) def find_disagreement_results( - data: Path, results: Path, use_previous_year_results: bool = defs.Config.use_previous_results_for_status + data: Path, + results: List[Path] = typer.Argument(None), + use_previous_year_results: bool = defs.Config.use_previous_results_for_status, ) -> None: """ Load benchexec results and print some results about them """ config = defs.Config(data) config.use_previous_results_for_status = use_previous_year_results - selected = smtcomp.results.helper_get_results(config, results) + selected, _ = smtcomp.results.helper_get_results(config, results) df = ( selected.filter(pl.col("answer").is_in([int(defs.Answer.Sat), int(defs.Answer.Unsat)])) @@ -380,11 +386,13 @@ def print_answers(d: List[Dict[str, Any]]) -> str: @app.command(rich_help_panel=scoring_panel) def scoring_removed_benchmarks( - data: Path, src: Path, use_previous_year_results: bool = defs.Config.use_previous_results_for_status + data: Path, + src: List[Path] = typer.Argument(None), + use_previous_year_results: bool = defs.Config.use_previous_results_for_status, ) -> None: config = defs.Config(data) config.use_previous_results_for_status = use_previous_year_results - results = smtcomp.results.helper_get_results(config, src) + results, _ = smtcomp.results.helper_get_results(config, src) results = smtcomp.scoring.add_disagreements_info(results) @@ -406,9 +414,16 @@ def scoring_removed_benchmarks( @app.command(rich_help_panel=scoring_panel) -def show_scores(data: Path, src: Path, kind: smtcomp.scoring.Kind = typer.Argument(default="par")) -> None: +def show_scores( + data: Path, + src: List[Path] = typer.Argument(None), + kind: smtcomp.scoring.Kind = typer.Argument(default="par"), +) -> None: + """ + If src is empty use results in data + """ config = defs.Config(data) - results = smtcomp.results.helper_get_results(config, src) + results, _ = smtcomp.results.helper_get_results(config, src) smtcomp.scoring.sanity_check(config, results) @@ -978,3 +993,15 @@ def check_model_locally( (dst / basename).unlink(missing_ok=True) (dst / basename).symlink_to(smt2_file) (dst / basename_model).write_text(result.model) + + +@app.command() +def export_results_pages(data: Path, results: list[Path] = typer.Argument(None)) -> None: + """ + Generate page for results pages in web directory + """ + config = defs.Config(data) + lf, selection = smtcomp.results.helper_get_results(config, results) + lf = smtcomp.scoring.add_disagreements_info(lf) + lf = smtcomp.scoring.benchmark_scoring(lf) + smtcomp.generate_website_page.export_results(config, selection, lf) diff --git a/smtcomp/results.py b/smtcomp/results.py index 3d582724..fd8e1ce8 100644 --- a/smtcomp/results.py +++ b/smtcomp/results.py @@ -254,18 +254,45 @@ def get_output(self: "LogFile", r: RunId, basename: str) -> str: return s[index:] -def helper_get_results(config: defs.Config, results: Path, track: defs.Track = defs.Track.SingleQuery) -> pl.LazyFrame: +def helper_get_results( + config: defs.Config, results: List[Path], track: defs.Track = defs.Track.SingleQuery +) -> Tuple[pl.LazyFrame, pl.LazyFrame]: """ + If results is empty use the one in data + Return on all the selected benchmarks for each solver that should run it - "track", "file", "scrambled_id", "logic", "division", "status", "solver", "answer", "cputime_s", "memory_B", "walltime_s". + "track", "file", "logic", "division", "status", "solver", "answer", "cputime_s", "memory_B", "walltime_s". -1 is used when no answer is available. """ - lf = pl.read_ipc(results / "parsed.feather").lazy().filter(track=int(track)) - selected = smtcomp.selection.helper(config, track).filter(selected=True).with_columns(track=int(track)) + if len(results) == 0: + lf = ( + pl.read_ipc(config.cached_current_results[track]) + .lazy() + .with_columns(track=int(track)) + .rename( + { + "result": "answer", + "memory_usage": "memory_B", + "cpu_time": "cputime_s", + "wallclock_time": "walltime_s", + } + ) + .drop("year") + ) + else: + lf = pl.concat(pl.read_ipc(p / "parsed.feather").lazy() for p in results) + lf = lf.filter(track=int(track)).drop("scrambled_id") + selection = smtcomp.selection.helper(config, track).filter(selected=True).with_columns(track=int(track)) + + selection = ( + add_columns(selection, smtcomp.selection.tracks(), on=["track", "logic"], defaults={"division": -1}) + .collect() # Improve later works + .lazy() + ) - selected = intersect(selected, smtcomp.selection.solver_competing_logics(config), on=["logic", "track"]) + selected = intersect(selection, smtcomp.selection.solver_competing_logics(config), on=["logic", "track"]) selected = add_columns( selected, @@ -273,13 +300,10 @@ def helper_get_results(config: defs.Config, results: Path, track: defs.Track = d on=["file", "solver", "track"], defaults={ "answer": -1, - "scramble_id": 1, "cputime_s": -1, "memory_B": -1, "walltime_s": -1, }, ) - selected = add_columns(selected, smtcomp.selection.tracks(), on=["track", "logic"], defaults={"division": -1}) - - return selected + return selected, selection diff --git a/smtcomp/scoring.py b/smtcomp/scoring.py index 240e33fd..9c665663 100644 --- a/smtcomp/scoring.py +++ b/smtcomp/scoring.py @@ -14,6 +14,11 @@ sat_status = c_status == int(defs.Status.Sat) unsat_status = c_status == int(defs.Status.Unsat) +c_sound_status = pl.col("sound_status") +sat_sound_status = c_sound_status == int(defs.Status.Sat) +unsat_sound_status = c_sound_status == int(defs.Status.Unsat) + + c_walltime_s = pl.col("walltime_s") c_cputime_s = pl.col("cputime_s") twentyfour = c_walltime_s <= 24 @@ -49,15 +54,24 @@ def add_disagreements_info(results: pl.LazyFrame) -> pl.LazyFrame: disagreements = (pl.col("sound_solvers") & sat_answer).any().over("track", "file") & ( pl.col("sound_solvers") & unsat_answer ).any().over("track", "file") - return results.with_columns(disagreements=disagreements).drop("sound_solvers") + sound_status = ( + pl.when((pl.col("sound_solvers") & sat_answer).any().over("track", "file")) + .then(int(defs.Answer.Sat)) + .when((pl.col("sound_solvers") & unsat_answer).any().over("track", "file")) + .then(int(defs.Answer.Unsat)) + .otherwise(c_status) + ) + + return results.with_columns(disagreements=disagreements, sound_status=sound_status).drop("sound_solvers") def benchmark_scoring(results: pl.LazyFrame) -> pl.LazyFrame: """ + Requires disagreements Add "error_score", "correctly_solved_score", "wallclock_time_score","cpu_time_score" """ - error_score = pl.when((sat_status & unsat_answer) | (unsat_status & sat_answer)).then(-1).otherwise(0) + error_score = pl.when((sat_sound_status & unsat_answer) | (unsat_sound_status & sat_answer)).then(-1).otherwise(0) """ Use -1 instead of 1 for error so that we can use lexicographic comparison """ @@ -111,8 +125,8 @@ def filter_for(kind: Kind, config: defs.Config, results: pl.LazyFrame) -> pl.Laz case Kind.seq: return virtual_sequential_score(config, results) case Kind.sat: - return results.filter(sat_answer) + return results.filter(sat_sound_status) case Kind.unsat: - return results.filter(unsat_answer) + return results.filter(unsat_sound_status) case Kind.twentyfour: return results.filter(twentyfour) diff --git a/web/themes/smtcomp/layouts/_default/result.html b/web/themes/smtcomp/layouts/_default/result.html index 165fcb09..67e2c0a2 100644 --- a/web/themes/smtcomp/layouts/_default/result.html +++ b/web/themes/smtcomp/layouts/_default/result.html @@ -4,7 +4,7 @@

{{ .Title }}

{{ $dateMachine := .Date | time.Format "2006-01-02T15:04:05-07:00" }} {{ $dateHuman := .Date | time.Format ":date_long" }} - {{ $trueDivision := and (isset .Params "logics") (index .Params.logics 0 | len | ne 0) }} + {{ $trueDivision := and (isset .Params "logics") (.Params.logics | len | ne 0) }}

{{ .Params.division }} ({{ .Params.track }})

@@ -23,9 +23,9 @@

{{ .Params.division }} ({{ .Params.track }})

{{ if $trueDivision }} -Logics: