Commit
·
83fabc4
1
Parent(s):
e885ab4
add ranking
Browse files- src/display/about.py +2 -2
- src/populate.py +28 -2
src/display/about.py
CHANGED
|
@@ -10,7 +10,7 @@ class Task:
|
|
| 10 |
|
| 11 |
# Init: to update with your specific keys
|
| 12 |
class Tasks(Enum):
|
| 13 |
-
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 14 |
task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
|
| 15 |
task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
|
| 16 |
task2 = Task("asr_eval3", "Bitrate", "Bitrate")
|
|
@@ -28,7 +28,7 @@ The leaderboard for discrete speech challenge (ASR Track) at Interspeech 2024. C
|
|
| 28 |
LLM_BENCHMARKS_TEXT = f"""
|
| 29 |
## How it works
|
| 30 |
|
| 31 |
-
The evaluation (static version) are conducted by the organizers only.
|
| 32 |
|
| 33 |
We will accept submissions from the google form (see rules in the challenge website).
|
| 34 |
|
|
|
|
| 10 |
|
| 11 |
# Init: to update with your specific keys
|
| 12 |
class Tasks(Enum):
|
| 13 |
+
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 14 |
task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
|
| 15 |
task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
|
| 16 |
task2 = Task("asr_eval3", "Bitrate", "Bitrate")
|
|
|
|
| 28 |
LLM_BENCHMARKS_TEXT = f"""
|
| 29 |
## How it works
|
| 30 |
|
| 31 |
+
The evaluation (static version) are conducted by the organizers only.
|
| 32 |
|
| 33 |
We will accept submissions from the google form (see rules in the challenge website).
|
| 34 |
|
src/populate.py
CHANGED
|
@@ -13,8 +13,34 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 13 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 14 |
|
| 15 |
df = pd.DataFrame.from_records(all_data_json)
|
| 16 |
-
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 17 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
df = df[cols].round(decimals=2)
|
| 19 |
|
| 20 |
# filter out if any of the benchmarks have not been produced
|
|
|
|
| 13 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 14 |
|
| 15 |
df = pd.DataFrame.from_records(all_data_json)
|
| 16 |
+
# df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False))
|
| 17 |
+
# df = df.sort_values(by=[AutoEvalColumn.task3.name], ascending=True)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
df[AutoEvalColumn.task0.name] = pd.Series(
|
| 21 |
+
np.stack(
|
| 22 |
+
np.array(df[AutoEvalColumn.task0.name].values)
|
| 23 |
+
).squeeze()
|
| 24 |
+
)
|
| 25 |
+
df[AutoEvalColumn.task1.name] = pd.Series(
|
| 26 |
+
np.stack(
|
| 27 |
+
np.array(df[AutoEvalColumn.task1.name].values)
|
| 28 |
+
).squeeze()
|
| 29 |
+
)
|
| 30 |
+
df[AutoEvalColumn.task2.name] = pd.Series(
|
| 31 |
+
np.stack(
|
| 32 |
+
np.array(df[AutoEvalColumn.task2.name].values)
|
| 33 |
+
).squeeze()
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
en_cer_rank = df[AutoEvalColumn.task0.name].rank(method="min", numeric_only=True, ascending=True)
|
| 37 |
+
ml_cer_rank = df[AutoEvalColumn.task1.name].rank(method="min", numeric_only=True, ascending=True)
|
| 38 |
+
bitrate_rank = df[AutoEvalColumn.task2.name].rank(method="min", numeric_only=True, ascending=True)
|
| 39 |
+
df["Ranking"] = pd.Series((en_cer_rank + ml_cer_rank + bitrate_rank)/3)
|
| 40 |
+
df = df.sort_values(by=["Ranking", AutoEvalColumn.task1.name], ascending=True)
|
| 41 |
+
df["Rank"] = df.groupby("Precision").cumcount() + 1
|
| 42 |
+
df.pop("Ranking")
|
| 43 |
+
|
| 44 |
df = df[cols].round(decimals=2)
|
| 45 |
|
| 46 |
# filter out if any of the benchmarks have not been produced
|