|
{ |
|
"tquad": { |
|
"name": "TQUAD", |
|
"task": "extractive_question_answering", |
|
"description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.", |
|
"url": "https://github.com/TQuad/turkish-nlp-qa-dataset", |
|
"hf_name": "mcemilg/tquad", |
|
"generative": false |
|
}, |
|
"xquad_tr": { |
|
"name": "XQUAD", |
|
"task": "extractive_question_answering", |
|
"description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..", |
|
"url": "https://github.com/google-deepmind/xquad", |
|
"hf_name": "google/xquad", |
|
"generative": false |
|
}, |
|
"mkqa_tr": { |
|
"name": "MKQA", |
|
"task": "extractive_question_answering", |
|
"description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.", |
|
"url": "https://github.com/apple/ml-mkqa", |
|
"hf_name": "mcemilg/mkqa_tr", |
|
"generative": false |
|
}, |
|
"xlsum_tr": { |
|
"name": "XLSum", |
|
"task": "summarization", |
|
"description": "Abstractive summarization dataset for 44 languages.", |
|
"url": "https://github.com/csebuetnlp/xl-sum", |
|
"hf_name": "csebuetnlp/xlsum", |
|
"generative": true |
|
}, |
|
"mlsum_tr": { |
|
"name": "MLSum", |
|
"task": "summarization", |
|
"description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.", |
|
"url": "https://huggingface.co/datasets/reciTAL/mlsum", |
|
"hf_name": "reciTAL/mlsum", |
|
"generative": true |
|
}, |
|
"wiki_lingua_tr": { |
|
"name": "WikiLingua", |
|
"task": "summarization", |
|
"description": "A multilingual abstractive summarization dataset covering 17 languages.", |
|
"url": "https://github.com/esdurmus/Wikilingua", |
|
"hf_name": "GEM/wiki_lingua", |
|
"generative": true |
|
}, |
|
"tr-wikihow-summ": { |
|
"name": "WikiHow", |
|
"task": "summarization", |
|
"description": "A summarization dataset obtained from WikiHow website.", |
|
"url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ", |
|
"hf_name": "ardauzunoglu/tr-wikihow-summ", |
|
"generative": true |
|
}, |
|
"mnli_tr": { |
|
"name": "MNLI", |
|
"task": "natural_language_inference", |
|
"description": "Multi-Genre NLI (MNLI) dataset.", |
|
"url": "https://cims.nyu.edu/~sbowman/multinli/", |
|
"hf_name": "boun-tabi/nli_tr", |
|
"generative": false |
|
}, |
|
"snli_tr": { |
|
"name": "SNLI", |
|
"task": "natural_language_inference", |
|
"description": "The Stanford NLI (SNLI) dataset.", |
|
"url": "https://nlp.stanford.edu/projects/snli/", |
|
"hf_name": "boun-tabi/nli_tr", |
|
"generative": false |
|
}, |
|
"xnli_tr": { |
|
"name": "XNLI", |
|
"task": "natural_language_inference", |
|
"description": "The Cross-Lingual NLI (XNLI) dataset.", |
|
"url": "https://github.com/facebookresearch/XNLI", |
|
"hf_name": "boun-tabi/nli_tr", |
|
"generative": false |
|
}, |
|
"xcopa_tr": { |
|
"name": "XCOPA", |
|
"task": "multiple_choice", |
|
"description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.", |
|
"url": "https://github.com/cambridgeltl/xcopa", |
|
"hf_name": "cambridgeltl/xcopa", |
|
"generative": false |
|
}, |
|
"exams_tr": { |
|
"name": "Exams", |
|
"task": "multiple_choice", |
|
"description": "A question answering dataset covering high school exams.", |
|
"url": "https://huggingface.co/datasets/exams", |
|
"hf_name": "exams", |
|
"generative": false |
|
}, |
|
"belebele_tr": { |
|
"name": "Belebele", |
|
"task": "multiple_choice", |
|
"description": "A multiple choice question answering dataset to evaluate machine comprehension.", |
|
"url": "https://github.com/facebookresearch/belebele", |
|
"generative": false |
|
}, |
|
"turkish_plu_goal_inference": { |
|
"name": "PLU-GI", |
|
"task": "multiple_choice", |
|
"description": "TurkishPLU - Goal Inference task.", |
|
"url": "https://github.com/GGLAB-KU/turkish-plu", |
|
"hf_name": "mcemilg/turkish-plu-goal-inference", |
|
"generative": false |
|
}, |
|
"turkish_plu_next_event_prediction": { |
|
"name": "PLU-NE", |
|
"task": "multiple_choice", |
|
"description": "TurkishPLU - Next Event Prediction task.", |
|
"url": "https://github.com/GGLAB-KU/turkish-plu", |
|
"hf_name": "mcemilg/turkish-plu-next-event-prediction", |
|
"generative": false |
|
}, |
|
"turkish_plu_step_inference": { |
|
"name": "PLU-SI", |
|
"task": "multiple_choice", |
|
"description": "TurkishPLU - Step Inference task.", |
|
"url": "https://github.com/GGLAB-KU/turkish-plu", |
|
"hf_name": "mcemilg/turkish-plu-step-inference", |
|
"generative": false |
|
}, |
|
"turkish_plu_step_ordering": { |
|
"name": "PLU-SO", |
|
"task": "multiple_choice", |
|
"description": "TurkishPLU - Step Ordering task.", |
|
"url": "https://github.com/GGLAB-KU/turkish-plu", |
|
"hf_name": "mcemilg/turkish-plu-step-ordering", |
|
"generative": false |
|
}, |
|
"sts_tr": { |
|
"name": "STS", |
|
"task": "text_classification", |
|
"description": "The machine-translated Semantic Textual Similarity dataset in Turkish.", |
|
"url": "https://github.com/emrecncelik/sts-benchmark-tr", |
|
"hf_name": "emrecan/stsb-mt-turkish", |
|
"generative": false |
|
}, |
|
"offenseval_tr": { |
|
"name": "OffensEval", |
|
"task": "text_classification", |
|
"description": "A dataset for offensive speech recognition in Turkish.", |
|
"url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020", |
|
"hf_name": "coltekin/offenseval2020_tr", |
|
"generative": false |
|
}, |
|
"news_cat": { |
|
"name": "NewsCat", |
|
"task": "text_classification", |
|
"description": "News classification dataset collected from Turkish newspapers websites.", |
|
"url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html", |
|
"hf_name": "mcemilg/news-cat", |
|
"generative": false |
|
}, |
|
"ironytr": { |
|
"name": "IronyTR", |
|
"task": "text_classification", |
|
"description": "Irony detection dataset in Turkish.", |
|
"url": "https://github.com/teghub/IronyTR", |
|
"hf_name": "mcemilg/IronyTR", |
|
"generative": false |
|
}, |
|
"wmt-tr-en-prompt": { |
|
"name": "WMT", |
|
"task": "machine_translation", |
|
"description": "English-to-Turkish machine translation dataset.", |
|
"url": "http://www.aclweb.org/anthology/W/W16/W16-2301", |
|
"hf_name": "wmt/wmt16", |
|
"generative": true |
|
}, |
|
"gecturk_generation": { |
|
"name": "GECTurk", |
|
"task": "grammatical_error_correction", |
|
"description": "A dataset for grammatical error correction.", |
|
"url": "https://github.com/GGLAB-KU/gecturk", |
|
"hf_name": "mcemilg/GECTurk-generation", |
|
"generative": true |
|
} |
|
} |