Skip to content

Commit

Permalink
Merge pull request #329 from openml/fix/task_dashboard
Browse files Browse the repository at this point in the history
Fix Task Dashboard Top Score and Entry columns
  • Loading branch information
joaquinvanschoren authored Apr 2, 2024
2 parents b9d7462 + a780614 commit d615032
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 54 deletions.
29 changes: 0 additions & 29 deletions server/src/dashboard/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,35 +163,6 @@ def get_data_metadata(data_id):
return df, meta_features, numerical_features, nominal_features


def get_highest_rank(df, leaderboard):
df.sort_values(by=["upload_time"], inplace=True)
scores = []
# highest_rank = {}
highest_score = {}

setup_ids = []

for index, row in df.iterrows():
users = list(highest_score.keys())
new_user = row["uploader_name"] not in users
if row["setup_id"] not in setup_ids or new_user:
setup_ids.append(row["setup_id"])
score = row["value"]
if new_user or (score not in scores):
scores.append(score)
scores.sort(reverse=True)
# rank = scores.index(score) + 1
if new_user or (highest_score[row["uploader_name"]] < score):
# highest_rank[row['uploader_name']] = rank
highest_score[row["uploader_name"]] = score
# if highest_rank[row['uploader_name']] > row['Rank']:
# highest_rank[row['uploader_name']] = row['Rank']
# leaderboard['highest_rank'] = list(highest_rank.values())

leaderboard["Top Score"] = list(highest_score.values())
return leaderboard


def splitDataFrameList(df, target_column):
"""df = dataframe to split,
target_column = the column containing the values to split
Expand Down
42 changes: 17 additions & 25 deletions server/src/dashboard/task_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from .caching import CACHE_DIR_DASHBOARD
from .dash_config import DASH_CACHING
from .helpers import get_highest_rank
from ...setup import SERVER_BASE_URL

font = [
Expand Down Expand Up @@ -127,6 +126,7 @@ def update_task_plots(pathname, metric, n_clicks):
),
)
]

layout = go.Layout(
autosize=False,
margin={"l": 400},
Expand Down Expand Up @@ -158,7 +158,6 @@ def update_task_plots(pathname, metric, n_clicks):
tick_text.append(link)

df["upload_time"] = pd.to_datetime(df["upload_time"])
df["upload_time"] = df["upload_time"].dt.date

data = [
go.Scatter(
Expand All @@ -177,7 +176,7 @@ def update_task_plots(pathname, metric, n_clicks):
)
]
layout = go.Layout(
title="Contributions over time,<br>every point is a run, "
title="Contributions over time, every point is a run."
"click for details",
autosize=True,
margin={"l": 100},
Expand All @@ -194,29 +193,22 @@ def update_task_plots(pathname, metric, n_clicks):
fig1 = go.Figure(data, layout)

# Leaderboard table
max_score_by_uploader = df[["uploader_name", "value"]].groupby(["uploader_name"]).max()
max_score_by_uploader = max_score_by_uploader.to_dict()["value"]
submissions_by_uploader = df["uploader_name"].value_counts().to_dict()
rank_by_uploader = {
uploader: rank
for rank, (uploader, score) in enumerate(sorted(max_score_by_uploader.items(), key=lambda t: -t[1]), start=1)
}

leaderboard = df.copy()[["uploader_name"]].drop_duplicates()
leaderboard["Entries"] = leaderboard["uploader_name"].apply(submissions_by_uploader.get)
leaderboard["Top Score"] = leaderboard["uploader_name"].apply(max_score_by_uploader.get)
leaderboard["Rank"] = leaderboard["uploader_name"].apply(rank_by_uploader.get)
leaderboard = leaderboard.rename(columns={"uploader_name": "Uploader"})
# Render order of columns matches dataframe column order.
leaderboard = leaderboard[["Rank", "Uploader", "Top Score", "Entries"]]

top_uploader = df.sort_values("value", ascending=False).groupby(
["uploader_name"], sort=False
)
name = top_uploader["uploader_name"].unique()
rank = list(range(1, len(name) + 1))
entries = top_uploader["uploader_name"].value_counts().values
leaderboard = pd.DataFrame(
{"Rank": rank, "Name": name, "Entries": entries}
).reset_index()
leaderboard.drop("Name", axis=1, inplace=True)
ranks = []
df = top_uploader.head(df.shape[1])
for uploader in df["uploader_name"]:
ranks.append(
leaderboard[leaderboard["uploader_name"] == uploader].Rank.values[0]
)
df["Rank"] = ranks

# Sort by time
df.sort_values(by=["upload_time"], inplace=True)
# Get highest score
leaderboard = get_highest_rank(df, leaderboard)

# Create table
table = html.Div(
Expand Down

0 comments on commit d615032

Please sign in to comment.