File size: 7,622 Bytes
500fbd7 7987659 500fbd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
{
"tquad": {
"name": "TQUAD",
"task": "extractive_question_answering",
"description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.",
"url": "https://github.com/TQuad/turkish-nlp-qa-dataset",
"hf_name": "mcemilg/tquad",
"generative": false
},
"xquad_tr": {
"name": "XQUAD",
"task": "extractive_question_answering",
"description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..",
"url": "https://github.com/google-deepmind/xquad",
"hf_name": "google/xquad",
"generative": false
},
"mkqa_tr": {
"name": "MKQA",
"task": "extractive_question_answering",
"description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.",
"url": "https://github.com/apple/ml-mkqa",
"hf_name": "mcemilg/mkqa_tr",
"generative": false
},
"xlsum_tr": {
"name": "XLSum",
"task": "summarization",
"description": "Abstractive summarization dataset for 44 languages.",
"url": "https://github.com/csebuetnlp/xl-sum",
"hf_name": "csebuetnlp/xlsum",
"generative": true
},
"mlsum_tr": {
"name": "MLSum",
"task": "summarization",
"description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.",
"url": "https://huggingface.co/datasets/reciTAL/mlsum",
"hf_name": "reciTAL/mlsum",
"generative": true
},
"wiki_lingua_tr": {
"name": "WikiLingua",
"task": "summarization",
"description": "A multilingual abstractive summarization dataset covering 17 languages.",
"url": "https://github.com/esdurmus/Wikilingua",
"hf_name": "GEM/wiki_lingua",
"generative": true
},
"tr-wikihow-summ": {
"name": "WikiHow",
"task": "summarization",
"description": "A summarization dataset obtained from WikiHow website.",
"url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ",
"hf_name": "ardauzunoglu/tr-wikihow-summ",
"generative": true
},
"mnli_tr": {
"name": "MNLI",
"task": "natural_language_inference",
"description": "Multi-Genre NLI (MNLI) dataset.",
"url": "https://cims.nyu.edu/~sbowman/multinli/",
"hf_name": "boun-tabi/nli_tr",
"generative": false
},
"snli_tr": {
"name": "SNLI",
"task": "natural_language_inference",
"description": "The Stanford NLI (SNLI) dataset.",
"url": "https://nlp.stanford.edu/projects/snli/",
"hf_name": "boun-tabi/nli_tr",
"generative": false
},
"xnli_tr": {
"name": "XNLI",
"task": "natural_language_inference",
"description": "The Cross-Lingual NLI (XNLI) dataset.",
"url": "https://github.com/facebookresearch/XNLI",
"hf_name": "boun-tabi/nli_tr",
"generative": false
},
"xcopa_tr": {
"name": "XCOPA",
"task": "multiple_choice",
"description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.",
"url": "https://github.com/cambridgeltl/xcopa",
"hf_name": "cambridgeltl/xcopa",
"generative": false
},
"exams_tr": {
"name": "Exams",
"task": "multiple_choice",
"description": "A question answering dataset covering high school exams.",
"url": "https://huggingface.co/datasets/exams",
"hf_name": "exams",
"generative": false
},
"belebele_tr": {
"name": "Belebele",
"task": "multiple_choice",
"description": "A multiple choice question answering dataset to evaluate machine comprehension.",
"url": "https://github.com/facebookresearch/belebele",
"generative": false
},
"turkish_plu_goal_inference": {
"name": "PLU-GI",
"task": "multiple_choice",
"description": "TurkishPLU - Goal Inference task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-goal-inference",
"generative": false
},
"turkish_plu_next_event_prediction": {
"name": "PLU-NE",
"task": "multiple_choice",
"description": "TurkishPLU - Next Event Prediction task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-next-event-prediction",
"generative": false
},
"turkish_plu_step_inference": {
"name": "PLU-SI",
"task": "multiple_choice",
"description": "TurkishPLU - Step Inference task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-step-inference",
"generative": false
},
"turkish_plu_step_ordering": {
"name": "PLU-SO",
"task": "multiple_choice",
"description": "TurkishPLU - Step Ordering task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-step-ordering",
"generative": false
},
"sts_tr": {
"name": "STS",
"task": "text_classification",
"description": "The machine-translated Semantic Textual Similarity dataset in Turkish.",
"url": "https://github.com/emrecncelik/sts-benchmark-tr",
"hf_name": "emrecan/stsb-mt-turkish",
"generative": false
},
"offenseval_tr": {
"name": "OffensEval",
"task": "text_classification",
"description": "A dataset for offensive speech recognition in Turkish.",
"url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020",
"hf_name": "coltekin/offenseval2020_tr",
"generative": false
},
"news_cat": {
"name": "NewsCat",
"task": "text_classification",
"description": "News classification dataset collected from Turkish newspapers websites.",
"url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html",
"hf_name": "mcemilg/news-cat",
"generative": false
},
"ironytr": {
"name": "IronyTR",
"task": "text_classification",
"description": "Irony detection dataset in Turkish.",
"url": "https://github.com/teghub/IronyTR",
"hf_name": "mcemilg/IronyTR",
"generative": false
},
"wmt-tr-en-prompt": {
"name": "WMT",
"task": "machine_translation",
"description": "English-to-Turkish machine translation dataset.",
"url": "http://www.aclweb.org/anthology/W/W16/W16-2301",
"hf_name": "wmt/wmt16",
"generative": true
},
"gecturk_generation": {
"name": "GECTurk",
"task": "grammatical_error_correction",
"description": "A dataset for grammatical error correction.",
"url": "https://github.com/GGLAB-KU/gecturk",
"hf_name": "mcemilg/GECTurk-generation",
"generative": true
}
} |