Create tasks.md
Browse files
tasks.md
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- 0 babi_nli/counting
|
| 2 |
+
- 1 babi_nli/indefinite-knowledge
|
| 3 |
+
- 2 babi_nli/simple-negation
|
| 4 |
+
- 3 babi_nli/three-arg-relations
|
| 5 |
+
- 4 babi_nli/basic-induction
|
| 6 |
+
- 5 babi_nli/time-reasoning
|
| 7 |
+
- 6 babi_nli/compound-coreference
|
| 8 |
+
- 7 babi_nli/path-finding
|
| 9 |
+
- 8 babi_nli/positional-reasoning
|
| 10 |
+
- 9 babi_nli/conjunction
|
| 11 |
+
- 10 babi_nli/size-reasoning
|
| 12 |
+
- 11 babi_nli/yes-no-questions
|
| 13 |
+
- 12 babi_nli/basic-coreference
|
| 14 |
+
- 13 babi_nli/two-supporting-facts
|
| 15 |
+
- 14 babi_nli/lists-sets
|
| 16 |
+
- 15 babi_nli/two-arg-relations
|
| 17 |
+
- 16 babi_nli/three-supporting-facts
|
| 18 |
+
- 17 babi_nli/basic-deduction
|
| 19 |
+
- 18 babi_nli/single-supporting-fact
|
| 20 |
+
- 19 anli/a1
|
| 21 |
+
- 20 anli/a2
|
| 22 |
+
- 21 anli/a3
|
| 23 |
+
- 22 sick/label
|
| 24 |
+
- 23 sick/relatedness
|
| 25 |
+
- 24 sick/entailment_AB
|
| 26 |
+
- 25 sick/entailment_BA
|
| 27 |
+
- 26 snli
|
| 28 |
+
- 27 scitail/snli_format
|
| 29 |
+
- 28 hans
|
| 30 |
+
- 29 WANLI
|
| 31 |
+
- 30 recast/recast_kg_relations
|
| 32 |
+
- 31 recast/recast_puns
|
| 33 |
+
- 32 recast/recast_factuality
|
| 34 |
+
- 33 recast/recast_megaveridicality
|
| 35 |
+
- 34 recast/recast_verbcorner
|
| 36 |
+
- 35 recast/recast_verbnet
|
| 37 |
+
- 36 recast/recast_ner
|
| 38 |
+
- 37 recast/recast_sentiment
|
| 39 |
+
- 38 probability_words_nli/usnli
|
| 40 |
+
- 39 probability_words_nli/reasoning_1hop
|
| 41 |
+
- 40 probability_words_nli/reasoning_2hop
|
| 42 |
+
- 41 nan-nli/joey234--nan-nli
|
| 43 |
+
- 42 nli_fever
|
| 44 |
+
- 43 breaking_nli
|
| 45 |
+
- 44 conj_nli
|
| 46 |
+
- 45 fracas
|
| 47 |
+
- 46 dialogue_nli
|
| 48 |
+
- 47 mpe
|
| 49 |
+
- 48 dnc
|
| 50 |
+
- 49 gpt3_nli
|
| 51 |
+
- 50 recast_white/fnplus
|
| 52 |
+
- 51 recast_white/sprl
|
| 53 |
+
- 52 recast_white/dpr
|
| 54 |
+
- 53 joci
|
| 55 |
+
- 54 contrast_nli
|
| 56 |
+
- 55 robust_nli/IS_CS
|
| 57 |
+
- 56 robust_nli/LI_LI
|
| 58 |
+
- 57 robust_nli/ST_WO
|
| 59 |
+
- 58 robust_nli/PI_SP
|
| 60 |
+
- 59 robust_nli/PI_CD
|
| 61 |
+
- 60 robust_nli/ST_SE
|
| 62 |
+
- 61 robust_nli/ST_NE
|
| 63 |
+
- 62 robust_nli/ST_LM
|
| 64 |
+
- 63 robust_nli_is_sd
|
| 65 |
+
- 64 robust_nli_li_ts
|
| 66 |
+
- 65 gen_debiased_nli/snli_seq_z
|
| 67 |
+
- 66 gen_debiased_nli/snli_z_aug
|
| 68 |
+
- 67 gen_debiased_nli/snli_par_z
|
| 69 |
+
- 68 gen_debiased_nli/mnli_par_z
|
| 70 |
+
- 69 gen_debiased_nli/mnli_z_aug
|
| 71 |
+
- 70 gen_debiased_nli/mnli_seq_z
|
| 72 |
+
- 71 add_one_rte
|
| 73 |
+
- 72 imppres/presupposition_cleft_uniqueness/presupposition
|
| 74 |
+
- 73 imppres/presupposition_possessed_definites_uniqueness/presupposition
|
| 75 |
+
- 74 imppres/presupposition_possessed_definites_existence/presupposition
|
| 76 |
+
- 75 imppres/presupposition_only_presupposition/presupposition
|
| 77 |
+
- 76 imppres/presupposition_all_n_presupposition/presupposition
|
| 78 |
+
- 77 imppres/presupposition_both_presupposition/presupposition
|
| 79 |
+
- 78 imppres/presupposition_change_of_state/presupposition
|
| 80 |
+
- 79 imppres/presupposition_cleft_existence/presupposition
|
| 81 |
+
- 80 imppres/presupposition_question_presupposition/presupposition
|
| 82 |
+
- 81 imppres/implicature_modals/prag
|
| 83 |
+
- 82 imppres/implicature_numerals_10_100/prag
|
| 84 |
+
- 83 imppres/implicature_numerals_2_3/prag
|
| 85 |
+
- 84 imppres/implicature_gradable_adjective/prag
|
| 86 |
+
- 85 imppres/implicature_quantifiers/prag
|
| 87 |
+
- 86 imppres/implicature_gradable_verb/prag
|
| 88 |
+
- 87 imppres/implicature_connectives/prag
|
| 89 |
+
- 88 imppres/implicature_gradable_adjective/log
|
| 90 |
+
- 89 imppres/implicature_gradable_verb/log
|
| 91 |
+
- 90 imppres/implicature_numerals_2_3/log
|
| 92 |
+
- 91 imppres/implicature_numerals_10_100/log
|
| 93 |
+
- 92 imppres/implicature_modals/log
|
| 94 |
+
- 93 imppres/implicature_quantifiers/log
|
| 95 |
+
- 94 imppres/implicature_connectives/log
|
| 96 |
+
- 95 glue_diagnostics/diagnostics
|
| 97 |
+
- 96 hlgd
|
| 98 |
+
- 97 paws/labeled_final
|
| 99 |
+
- 98 paws/labeled_swap
|
| 100 |
+
- 99 quora
|
| 101 |
+
- 100 medical_questions_pairs
|
| 102 |
+
- 101 conll2003/pos_tags
|
| 103 |
+
- 102 conll2003/chunk_tags
|
| 104 |
+
- 103 conll2003/ner_tags
|
| 105 |
+
- 104 hh-rlhf
|
| 106 |
+
- 105 model-written-evals
|
| 107 |
+
- 106 truthful_qa/multiple_choice
|
| 108 |
+
- 107 fig-qa
|
| 109 |
+
- 108 bigbench/fantasy_reasoning
|
| 110 |
+
- 109 bigbench/nonsense_words_grammar
|
| 111 |
+
- 110 bigbench/analytic_entailment
|
| 112 |
+
- 111 bigbench/logic_grid_puzzle
|
| 113 |
+
- 112 bigbench/geometric_shapes
|
| 114 |
+
- 113 bigbench/key_value_maps
|
| 115 |
+
- 114 bigbench/analogical_similarity
|
| 116 |
+
- 115 bigbench/metaphor_understanding
|
| 117 |
+
- 116 bigbench/metaphor_boolean
|
| 118 |
+
- 117 bigbench/ruin_names
|
| 119 |
+
- 118 bigbench/cs_algorithms
|
| 120 |
+
- 119 bigbench/physical_intuition
|
| 121 |
+
- 120 bigbench/mnist_ascii
|
| 122 |
+
- 121 bigbench/moral_permissibility
|
| 123 |
+
- 122 bigbench/emoji_movie
|
| 124 |
+
- 123 bigbench/snarks
|
| 125 |
+
- 124 bigbench/timedial
|
| 126 |
+
- 125 bigbench/dark_humor_detection
|
| 127 |
+
- 126 bigbench/gre_reading_comprehension
|
| 128 |
+
- 127 bigbench/empirical_judgments
|
| 129 |
+
- 128 bigbench/causal_judgment
|
| 130 |
+
- 129 bigbench/fact_checker
|
| 131 |
+
- 130 bigbench/logical_fallacy_detection
|
| 132 |
+
- 131 bigbench/identify_math_theorems
|
| 133 |
+
- 132 bigbench/dyck_languages
|
| 134 |
+
- 133 bigbench/winowhy
|
| 135 |
+
- 134 bigbench/logical_sequence
|
| 136 |
+
- 135 bigbench/strategyqa
|
| 137 |
+
- 136 bigbench/unit_interpretation
|
| 138 |
+
- 137 bigbench/authorship_verification
|
| 139 |
+
- 138 bigbench/undo_permutation
|
| 140 |
+
- 139 bigbench/epistemic_reasoning
|
| 141 |
+
- 140 bigbench/human_organs_senses
|
| 142 |
+
- 141 bigbench/misconceptions
|
| 143 |
+
- 142 bigbench/international_phonetic_alphabet_nli
|
| 144 |
+
- 143 bigbench/identify_odd_metaphor
|
| 145 |
+
- 144 bigbench/mathematical_induction
|
| 146 |
+
- 145 bigbench/odd_one_out
|
| 147 |
+
- 146 bigbench/reasoning_about_colored_objects
|
| 148 |
+
- 147 bigbench/strange_stories
|
| 149 |
+
- 148 bigbench/evaluating_information_essentiality
|
| 150 |
+
- 149 bigbench/figure_of_speech_detection
|
| 151 |
+
- 150 bigbench/english_proverbs
|
| 152 |
+
- 151 bigbench/general_knowledge
|
| 153 |
+
- 152 bigbench/tracking_shuffled_objects
|
| 154 |
+
- 153 bigbench/physics
|
| 155 |
+
- 154 bigbench/anachronisms
|
| 156 |
+
- 155 bigbench/simple_ethical_questions
|
| 157 |
+
- 156 bigbench/logical_args
|
| 158 |
+
- 157 bigbench/suicide_risk
|
| 159 |
+
- 158 bigbench/sentence_ambiguity
|
| 160 |
+
- 159 bigbench/temporal_sequences
|
| 161 |
+
- 160 bigbench/penguins_in_a_table
|
| 162 |
+
- 161 bigbench/sports_understanding
|
| 163 |
+
- 162 bigbench/hyperbaton
|
| 164 |
+
- 163 bigbench/code_line_description
|
| 165 |
+
- 164 bigbench/question_selection
|
| 166 |
+
- 165 bigbench/disambiguation_qa
|
| 167 |
+
- 166 bigbench/date_understanding
|
| 168 |
+
- 167 bigbench/play_dialog_same_or_different
|
| 169 |
+
- 168 bigbench/salient_translation_error_detection
|
| 170 |
+
- 169 bigbench/irony_identification
|
| 171 |
+
- 170 bigbench/emojis_emotion_prediction
|
| 172 |
+
- 171 bigbench/hindu_knowledge
|
| 173 |
+
- 172 bigbench/conceptual_combinations
|
| 174 |
+
- 173 bigbench/implicatures
|
| 175 |
+
- 174 bigbench/movie_dialog_same_or_different
|
| 176 |
+
- 175 bigbench/social_support
|
| 177 |
+
- 176 bigbench/presuppositions_as_nli
|
| 178 |
+
- 177 bigbench/vitaminc_fact_verification
|
| 179 |
+
- 178 bigbench/hhh_alignment
|
| 180 |
+
- 179 bigbench/implicit_relations
|
| 181 |
+
- 180 bigbench/bbq_lite_json
|
| 182 |
+
- 181 bigbench/phrase_relatedness
|
| 183 |
+
- 182 bigbench/logical_deduction
|
| 184 |
+
- 183 bigbench/discourse_marker_prediction
|
| 185 |
+
- 184 bigbench/movie_recommendation
|
| 186 |
+
- 185 bigbench/real_or_fake_text
|
| 187 |
+
- 186 bigbench/formal_fallacies_syllogisms_negation
|
| 188 |
+
- 187 bigbench/crass_ai
|
| 189 |
+
- 188 blimp/inchoative
|
| 190 |
+
- 189 blimp/principle_A_c_command
|
| 191 |
+
- 190 blimp/matrix_question_npi_licensor_present
|
| 192 |
+
- 191 blimp/wh_questions_subject_gap_long_distance
|
| 193 |
+
- 192 blimp/sentential_subject_island
|
| 194 |
+
- 193 blimp/existential_there_quantifiers_2
|
| 195 |
+
- 194 blimp/sentential_negation_npi_scope
|
| 196 |
+
- 195 blimp/complex_NP_island
|
| 197 |
+
- 196 blimp/principle_A_reconstruction
|
| 198 |
+
- 197 blimp/animate_subject_passive
|
| 199 |
+
- 198 blimp/tough_vs_raising_1
|
| 200 |
+
- 199 blimp/wh_vs_that_with_gap
|
| 201 |
+
- 200 blimp/principle_A_domain_2
|
| 202 |
+
- 201 blimp/npi_present_1
|
| 203 |
+
- 202 blimp/wh_vs_that_with_gap_long_distance
|
| 204 |
+
- 203 blimp/superlative_quantifiers_1
|
| 205 |
+
- 204 blimp/npi_present_2
|
| 206 |
+
- 205 blimp/wh_questions_object_gap
|
| 207 |
+
- 206 blimp/coordinate_structure_constraint_complex_left_branch
|
| 208 |
+
- 207 blimp/coordinate_structure_constraint_object_extraction
|
| 209 |
+
- 208 blimp/left_branch_island_echo_question
|
| 210 |
+
- 209 blimp/drop_argument
|
| 211 |
+
- 210 cos_e/v1.0
|
| 212 |
+
- 211 cosmos_qa
|
| 213 |
+
- 212 dream
|
| 214 |
+
- 213 openbookqa
|
| 215 |
+
- 214 qasc
|
| 216 |
+
- 215 quartz
|
| 217 |
+
- 216 quail
|
| 218 |
+
- 217 head_qa/en
|
| 219 |
+
- 218 sciq
|
| 220 |
+
- 219 social_i_qa
|
| 221 |
+
- 220 wiki_hop
|
| 222 |
+
- 221 wiqa
|
| 223 |
+
- 222 piqa
|
| 224 |
+
- 223 hellaswag
|
| 225 |
+
- 224 super_glue/copa
|
| 226 |
+
- 225 art
|
| 227 |
+
- 226 hendrycks_test/moral_disputes
|
| 228 |
+
- 227 hendrycks_test/moral_scenarios
|
| 229 |
+
- 228 hendrycks_test/nutrition
|
| 230 |
+
- 229 hendrycks_test/philosophy
|
| 231 |
+
- 230 hendrycks_test/prehistory
|
| 232 |
+
- 231 hendrycks_test/professional_accounting
|
| 233 |
+
- 232 hendrycks_test/professional_law
|
| 234 |
+
- 233 hendrycks_test/world_religions
|
| 235 |
+
- 234 hendrycks_test/professional_psychology
|
| 236 |
+
- 235 hendrycks_test/public_relations
|
| 237 |
+
- 236 hendrycks_test/security_studies
|
| 238 |
+
- 237 hendrycks_test/sociology
|
| 239 |
+
- 238 hendrycks_test/us_foreign_policy
|
| 240 |
+
- 239 hendrycks_test/virology
|
| 241 |
+
- 240 hendrycks_test/miscellaneous
|
| 242 |
+
- 241 hendrycks_test/professional_medicine
|
| 243 |
+
- 242 hendrycks_test/medical_genetics
|
| 244 |
+
- 243 hendrycks_test/college_mathematics
|
| 245 |
+
- 244 hendrycks_test/management
|
| 246 |
+
- 245 hendrycks_test/high_school_computer_science
|
| 247 |
+
- 246 hendrycks_test/astronomy
|
| 248 |
+
- 247 hendrycks_test/high_school_chemistry
|
| 249 |
+
- 248 hendrycks_test/high_school_biology
|
| 250 |
+
- 249 hendrycks_test/global_facts
|
| 251 |
+
- 250 hendrycks_test/formal_logic
|
| 252 |
+
- 251 hendrycks_test/elementary_mathematics
|
| 253 |
+
- 252 hendrycks_test/high_school_european_history
|
| 254 |
+
- 253 hendrycks_test/electrical_engineering
|
| 255 |
+
- 254 hendrycks_test/conceptual_physics
|
| 256 |
+
- 255 hendrycks_test/computer_security
|
| 257 |
+
- 256 hendrycks_test/college_physics
|
| 258 |
+
- 257 hendrycks_test/college_medicine
|
| 259 |
+
- 258 hendrycks_test/college_computer_science
|
| 260 |
+
- 259 hendrycks_test/college_chemistry
|
| 261 |
+
- 260 hendrycks_test/college_biology
|
| 262 |
+
- 261 hendrycks_test/econometrics
|
| 263 |
+
- 262 hendrycks_test/clinical_knowledge
|
| 264 |
+
- 263 hendrycks_test/anatomy
|
| 265 |
+
- 264 hendrycks_test/marketing
|
| 266 |
+
- 265 hendrycks_test/machine_learning
|
| 267 |
+
- 266 hendrycks_test/logical_fallacies
|
| 268 |
+
- 267 hendrycks_test/jurisprudence
|
| 269 |
+
- 268 hendrycks_test/international_law
|
| 270 |
+
- 269 hendrycks_test/human_sexuality
|
| 271 |
+
- 270 hendrycks_test/human_aging
|
| 272 |
+
- 271 hendrycks_test/high_school_world_history
|
| 273 |
+
- 272 hendrycks_test/abstract_algebra
|
| 274 |
+
- 273 hendrycks_test/high_school_us_history
|
| 275 |
+
- 274 hendrycks_test/high_school_psychology
|
| 276 |
+
- 275 hendrycks_test/high_school_physics
|
| 277 |
+
- 276 hendrycks_test/high_school_microeconomics
|
| 278 |
+
- 277 hendrycks_test/high_school_mathematics
|
| 279 |
+
- 278 hendrycks_test/high_school_macroeconomics
|
| 280 |
+
- 279 hendrycks_test/high_school_government_and_politics
|
| 281 |
+
- 280 hendrycks_test/high_school_geography
|
| 282 |
+
- 281 hendrycks_test/high_school_statistics
|
| 283 |
+
- 282 hendrycks_test/business_ethics
|
| 284 |
+
- 283 winogrande/winogrande_xl
|
| 285 |
+
- 284 codah/codah
|
| 286 |
+
- 285 ai2_arc/ARC-Challenge/challenge
|
| 287 |
+
- 286 ai2_arc/ARC-Easy/challenge
|
| 288 |
+
- 287 definite_pronoun_resolution
|
| 289 |
+
- 288 swag
|
| 290 |
+
- 289 math_qa
|
| 291 |
+
- 290 utilitarianism
|
| 292 |
+
- 291 TuringBench
|
| 293 |
+
- 292 trec
|
| 294 |
+
- 293 vitaminc/tals--vitaminc
|
| 295 |
+
- 294 hope_edi/english
|
| 296 |
+
- 295 rumoureval_2019/RumourEval2019
|
| 297 |
+
- 296 ethos/binary
|
| 298 |
+
- 297 ethos/multilabel
|
| 299 |
+
- 298 glue/cola
|
| 300 |
+
- 299 glue/sst2
|
| 301 |
+
- 300 glue/mrpc
|
| 302 |
+
- 301 glue/qqp
|
| 303 |
+
- 302 glue/stsb
|
| 304 |
+
- 303 glue/mnli
|
| 305 |
+
- 304 glue/qnli
|
| 306 |
+
- 305 glue/rte
|
| 307 |
+
- 306 glue/wnli
|
| 308 |
+
- 307 super_glue/boolq
|
| 309 |
+
- 308 super_glue/cb
|
| 310 |
+
- 309 super_glue/multirc
|
| 311 |
+
- 310 super_glue/wic
|
| 312 |
+
- 311 super_glue/axg
|
| 313 |
+
- 312 tweet_eval/stance_feminist
|
| 314 |
+
- 313 tweet_eval/stance_atheism
|
| 315 |
+
- 314 tweet_eval/stance_hillary
|
| 316 |
+
- 315 tweet_eval/stance_abortion
|
| 317 |
+
- 316 tweet_eval/sentiment
|
| 318 |
+
- 317 tweet_eval/offensive
|
| 319 |
+
- 318 tweet_eval/stance_climate
|
| 320 |
+
- 319 tweet_eval/irony
|
| 321 |
+
- 320 tweet_eval/emotion
|
| 322 |
+
- 321 tweet_eval/emoji
|
| 323 |
+
- 322 tweet_eval/hate
|
| 324 |
+
- 323 discovery/discovery
|
| 325 |
+
- 324 pragmeval/switchboard
|
| 326 |
+
- 325 pragmeval/squinky-informativeness
|
| 327 |
+
- 326 pragmeval/emobank-arousal
|
| 328 |
+
- 327 pragmeval/emobank-dominance
|
| 329 |
+
- 328 pragmeval/emobank-valence
|
| 330 |
+
- 329 pragmeval/mrda
|
| 331 |
+
- 330 pragmeval/verifiability
|
| 332 |
+
- 331 pragmeval/squinky-implicature
|
| 333 |
+
- 332 pragmeval/squinky-formality
|
| 334 |
+
- 333 pragmeval/gum
|
| 335 |
+
- 334 pragmeval/emergent
|
| 336 |
+
- 335 pragmeval/persuasiveness-premisetype
|
| 337 |
+
- 336 pragmeval/pdtb
|
| 338 |
+
- 337 pragmeval/persuasiveness-eloquence
|
| 339 |
+
- 338 pragmeval/persuasiveness-specificity
|
| 340 |
+
- 339 pragmeval/persuasiveness-strength
|
| 341 |
+
- 340 pragmeval/sarcasm
|
| 342 |
+
- 341 pragmeval/stac
|
| 343 |
+
- 342 pragmeval/persuasiveness-claimtype
|
| 344 |
+
- 343 pragmeval/persuasiveness-relevance
|
| 345 |
+
- 344 lex_glue/eurlex
|
| 346 |
+
- 345 lex_glue/scotus
|
| 347 |
+
- 346 lex_glue/ledgar
|
| 348 |
+
- 347 lex_glue/unfair_tos
|
| 349 |
+
- 348 lex_glue/case_hold
|
| 350 |
+
- 349 imdb
|
| 351 |
+
- 350 rotten_tomatoes
|
| 352 |
+
- 351 ag_news
|
| 353 |
+
- 352 yelp_review_full/yelp_review_full
|
| 354 |
+
- 353 financial_phrasebank/sentences_allagree
|
| 355 |
+
- 354 poem_sentiment
|
| 356 |
+
- 355 dbpedia_14/dbpedia_14
|
| 357 |
+
- 356 amazon_polarity/amazon_polarity
|
| 358 |
+
- 357 app_reviews
|
| 359 |
+
- 358 hate_speech18
|
| 360 |
+
- 359 sms_spam
|
| 361 |
+
- 360 humicroedit/subtask-1
|
| 362 |
+
- 361 humicroedit/subtask-2
|
| 363 |
+
- 362 snips_built_in_intents
|
| 364 |
+
- 363 banking77
|
| 365 |
+
- 364 hate_speech_offensive
|
| 366 |
+
- 365 hyperpartisan_news_detection/byarticle
|
| 367 |
+
- 366 hyperpartisan_news_detection/bypublisher
|
| 368 |
+
- 367 go_emotions/simplified
|
| 369 |
+
- 368 scicite
|
| 370 |
+
- 369 liar
|
| 371 |
+
- 370 lexical_relation_classification/ROOT09
|
| 372 |
+
- 371 lexical_relation_classification/EVALution
|
| 373 |
+
- 372 lexical_relation_classification/CogALexV
|
| 374 |
+
- 373 lexical_relation_classification/BLESS
|
| 375 |
+
- 374 lexical_relation_classification/K&H+N
|
| 376 |
+
- 375 linguisticprobing/coordination_inversion
|
| 377 |
+
- 376 linguisticprobing/odd_man_out
|
| 378 |
+
- 377 linguisticprobing/word_content
|
| 379 |
+
- 378 linguisticprobing/obj_number
|
| 380 |
+
- 379 linguisticprobing/past_present
|
| 381 |
+
- 380 linguisticprobing/tree_depth
|
| 382 |
+
- 381 linguisticprobing/sentence_length
|
| 383 |
+
- 382 linguisticprobing/top_constituents
|
| 384 |
+
- 383 linguisticprobing/bigram_shift
|
| 385 |
+
- 384 linguisticprobing/subj_number
|
| 386 |
+
- 385 crowdflower/sentiment_nuclear_power
|
| 387 |
+
- 386 crowdflower/tweet_global_warming
|
| 388 |
+
- 387 crowdflower/airline-sentiment
|
| 389 |
+
- 388 crowdflower/economic-news
|
| 390 |
+
- 389 crowdflower/political-media-audience
|
| 391 |
+
- 390 crowdflower/political-media-bias
|
| 392 |
+
- 391 crowdflower/political-media-message
|
| 393 |
+
- 392 crowdflower/text_emotion
|
| 394 |
+
- 393 crowdflower/corporate-messaging
|
| 395 |
+
- 394 ethics/commonsense
|
| 396 |
+
- 395 ethics/deontology
|
| 397 |
+
- 396 ethics/justice
|
| 398 |
+
- 397 ethics/virtue
|
| 399 |
+
- 398 emo/emo2019
|
| 400 |
+
- 399 google_wellformed_query
|
| 401 |
+
- 400 tweets_hate_speech_detection
|
| 402 |
+
- 401 adv_glue/adv_sst2
|
| 403 |
+
- 402 adv_glue/adv_qqp
|
| 404 |
+
- 403 adv_glue/adv_mnli
|
| 405 |
+
- 404 adv_glue/adv_mnli_mismatched
|
| 406 |
+
- 405 adv_glue/adv_qnli
|
| 407 |
+
- 406 adv_glue/adv_rte
|
| 408 |
+
- 407 has_part
|
| 409 |
+
- 408 wnut_17/wnut_17
|
| 410 |
+
- 409 ncbi_disease/ncbi_disease
|
| 411 |
+
- 410 acronym_identification
|
| 412 |
+
- 411 jnlpba/jnlpba
|
| 413 |
+
- 412 species_800/species_800
|
| 414 |
+
- 413 ontonotes_english/SpeedOfMagic--ontonotes_english
|
| 415 |
+
- 414 blog_authorship_corpus/gender
|
| 416 |
+
- 415 blog_authorship_corpus/age
|
| 417 |
+
- 416 blog_authorship_corpus/horoscope
|
| 418 |
+
- 417 blog_authorship_corpus/job
|
| 419 |
+
- 418 open_question_type
|
| 420 |
+
- 419 health_fact
|
| 421 |
+
- 420 commonsense_qa
|
| 422 |
+
- 421 mc_taco
|
| 423 |
+
- 422 ade_corpus_v2/Ade_corpus_v2_classification
|
| 424 |
+
- 423 discosense
|
| 425 |
+
- 424 circa
|
| 426 |
+
- 425 code_x_glue_cc_defect_detection
|
| 427 |
+
- 426 code_x_glue_cc_clone_detection_big_clone_bench
|
| 428 |
+
- 427 code_x_glue_cc_code_refinement/medium
|
| 429 |
+
- 428 EffectiveFeedbackStudentWriting
|
| 430 |
+
- 429 promptSentiment
|
| 431 |
+
- 430 promptNLI
|
| 432 |
+
- 431 promptSpoke
|
| 433 |
+
- 432 promptProficiency
|
| 434 |
+
- 433 promptGrammar
|
| 435 |
+
- 434 promptCoherence
|
| 436 |
+
- 435 phrase_similarity
|
| 437 |
+
- 436 scientific-exaggeration-detection
|
| 438 |
+
- 437 quarel
|
| 439 |
+
- 438 fever-evidence-related/mwong--fever-related
|
| 440 |
+
- 439 numer_sense
|
| 441 |
+
- 440 dynasent/dynabench.dynasent.r1.all/r1
|
| 442 |
+
- 441 dynasent/dynabench.dynasent.r2.all/r2
|
| 443 |
+
- 442 Sarcasm_News_Headline
|
| 444 |
+
- 443 sem_eval_2010_task_8
|