Spaces:
Running
Running
| { | |
| "perception_temporal_action_loc": { | |
| "MLAB (claude-3-5-sonnet-v2)": 2.2, | |
| "Top Human in Competition": 284.6, | |
| "MLAB (gemini-exp-1206)": -1.3, | |
| "MLAB (o3-mini)": 0.9, | |
| "MLAB (gpt-4o)": 0.9, | |
| "MLAB (llama3-1-405b-instruct)": 1.5, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 1.0, | |
| "Human Idea + MLAB (gpt-4o)": 1.5 | |
| }, | |
| "llm-merging": { | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": -0.7, | |
| "Top Human in Competition": 68.2, | |
| "MLAB (claude-3-5-sonnet-v2)": 3.4, | |
| "MLAB (gemini-exp-1206)": 3.4, | |
| "MLAB (o3-mini)": -0.7, | |
| "MLAB (gpt-4o)": 1.4, | |
| "MLAB (llama3-1-405b-instruct)": -0.7, | |
| "Human Idea + MLAB (gpt-4o)": -0.7 | |
| }, | |
| "product-recommendation": { | |
| "MLAB (claude-3-5-sonnet-v2)": 12.3, | |
| "Top Human in Competition": 412.6, | |
| "MLAB (gemini-exp-1206)": 0.6, | |
| "MLAB (o3-mini)": 0.6, | |
| "MLAB (gpt-4o)": 2.6, | |
| "MLAB (llama3-1-405b-instruct)": -0.0, | |
| "Human Idea + MLAB (gpt-4o)": 8.9, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 0.6 | |
| }, | |
| "weather_forcast": { | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 83.6, | |
| "Top Human in Competition": 212.0, | |
| "Human Idea + MLAB (gpt-4o)": 26.1, | |
| "MLAB (claude-3-5-sonnet-v2)": 31.0, | |
| "MLAB (gemini-exp-1206)": 91.4, | |
| "MLAB (o3-mini)": 53.3, | |
| "MLAB (gpt-4o)": 100.8, | |
| "MLAB (llama3-1-405b-instruct)": 66.7 | |
| }, | |
| "meta-learning": { | |
| "MLAB (claude-3-5-sonnet-v2)": -14.9, | |
| "Top Human in Competition": 304.5, | |
| "MLAB (gemini-exp-1206)": -3.2, | |
| "MLAB (o3-mini)": -14.9, | |
| "MLAB (gpt-4o)": -14.9, | |
| "MLAB (llama3-1-405b-instruct)": -14.9, | |
| "Human Idea + MLAB (gpt-4o)": -14.9, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": -14.9 | |
| }, | |
| "machine_unlearning": { | |
| "Human Idea + MLAB (gpt-4o)": 4.2, | |
| "Top Human in Competition": 61.9, | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 7.3, | |
| "MLAB (claude-3-5-sonnet-v2)": -58.6, | |
| "MLAB (gemini-exp-1206)": 3.5, | |
| "MLAB (o3-mini)": 2.2, | |
| "MLAB (gpt-4o)": -11.1, | |
| "MLAB (llama3-1-405b-instruct)": 3.8 | |
| }, | |
| "backdoor-trigger-recovery": { | |
| "CoI-Agent (o1) + MLAB (gpt-4o)": 24.9, | |
| "Top Human in Competition": 621.3, | |
| "MLAB (claude-3-5-sonnet-v2)": 247.9, | |
| "MLAB (gemini-exp-1206)": 80.4, | |
| "MLAB (o3-mini)": 38.8, | |
| "MLAB (gpt-4o)": 64.5, | |
| "MLAB (llama3-1-405b-instruct)": 71.7, | |
| "Human Idea + MLAB (gpt-4o)": 54.5 | |
| } | |
| } |