{ "tquad": { "name": "TQUAD", "task": "extractive_question_answering", "description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.", "url": "https://github.com/TQuad/turkish-nlp-qa-dataset", "hf_name": "mcemilg/tquad", "generative": false }, "xquad_tr": { "name": "XQUAD", "task": "extractive_question_answering", "description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..", "url": "https://github.com/google-deepmind/xquad", "hf_name": "google/xquad", "generative": false }, "mkqa_tr": { "name": "MKQA", "task": "extractive_question_answering", "description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.", "url": "https://github.com/apple/ml-mkqa", "hf_name": "mcemilg/mkqa_tr", "generative": false }, "xlsum_tr": { "name": "XLSum", "task": "summarization", "description": "Abstractive summarization dataset for 44 languages.", "url": "https://github.com/csebuetnlp/xl-sum", "hf_name": "csebuetnlp/xlsum", "generative": true }, "mlsum_tr": { "name": "MLSum", "task": "summarization", "description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.", "url": "https://huggingface.co/datasets/reciTAL/mlsum", "hf_name": "reciTAL/mlsum", "generative": true }, "wiki_lingua_tr": { "name": "WikiLingua", "task": "summarization", "description": "A multilingual abstractive summarization dataset covering 17 languages.", "url": "https://github.com/esdurmus/Wikilingua", "hf_name": "GEM/wiki_lingua", "generative": true }, "tr-wikihow-summ": { "name": "WikiHow", "task": "summarization", "description": "A summarization dataset obtained from WikiHow website.", "url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ", "hf_name": "ardauzunoglu/tr-wikihow-summ", "generative": true }, "mnli_tr": { "name": "MNLI", "task": "natural_language_inference", "description": "Multi-Genre NLI (MNLI) dataset.", "url": "https://cims.nyu.edu/~sbowman/multinli/", "hf_name": "boun-tabi/nli_tr", "generative": false }, "snli_tr": { "name": "SNLI", "task": "natural_language_inference", "description": "The Stanford NLI (SNLI) dataset.", "url": "https://nlp.stanford.edu/projects/snli/", "hf_name": "boun-tabi/nli_tr", "generative": false }, "xnli_tr": { "name": "XNLI", "task": "natural_language_inference", "description": "The Cross-Lingual NLI (XNLI) dataset.", "url": "https://github.com/facebookresearch/XNLI", "hf_name": "boun-tabi/nli_tr", "generative": false }, "xcopa_tr": { "name": "XCOPA", "task": "multiple_choice", "description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.", "url": "https://github.com/cambridgeltl/xcopa", "hf_name": "cambridgeltl/xcopa", "generative": false }, "exams_tr": { "name": "Exams", "task": "multiple_choice", "description": "A question answering dataset covering high school exams.", "url": "https://huggingface.co/datasets/exams", "hf_name": "exams", "generative": false }, "belebele_tr": { "name": "Belebele", "task": "multiple_choice", "description": "A multiple choice question answering dataset to evaluate machine comprehension.", "url": "https://github.com/facebookresearch/belebele", "generative": false }, "turkish_plu_goal_inference": { "name": "PLU-GI", "task": "multiple_choice", "description": "TurkishPLU - Goal Inference task.", "url": "https://github.com/GGLAB-KU/turkish-plu", "hf_name": "mcemilg/turkish-plu-goal-inference", "generative": false }, "turkish_plu_next_event_prediction": { "name": "PLU-NE", "task": "multiple_choice", "description": "TurkishPLU - Next Event Prediction task.", "url": "https://github.com/GGLAB-KU/turkish-plu", "hf_name": "mcemilg/turkish-plu-next-event-prediction", "generative": false }, "turkish_plu_step_inference": { "name": "PLU-SI", "task": "multiple_choice", "description": "TurkishPLU - Step Inference task.", "url": "https://github.com/GGLAB-KU/turkish-plu", "hf_name": "mcemilg/turkish-plu-step-inference", "generative": false }, "turkish_plu_step_ordering": { "name": "PLU-SO", "task": "multiple_choice", "description": "TurkishPLU - Step Ordering task.", "url": "https://github.com/GGLAB-KU/turkish-plu", "hf_name": "mcemilg/turkish-plu-step-ordering", "generative": false }, "sts_tr": { "name": "STS", "task": "text_classification", "description": "The machine-translated Semantic Textual Similarity dataset in Turkish.", "url": "https://github.com/emrecncelik/sts-benchmark-tr", "hf_name": "emrecan/stsb-mt-turkish", "generative": false }, "offenseval_tr": { "name": "OffensEval", "task": "text_classification", "description": "A dataset for offensive speech recognition in Turkish.", "url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020", "hf_name": "coltekin/offenseval2020_tr", "generative": false }, "news_cat": { "name": "NewsCat", "task": "text_classification", "description": "News classification dataset collected from Turkish newspapers websites.", "url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html", "hf_name": "mcemilg/news-cat", "generative": false }, "ironytr": { "name": "IronyTR", "task": "text_classification", "description": "Irony detection dataset in Turkish.", "url": "https://github.com/teghub/IronyTR", "hf_name": "mcemilg/IronyTR", "generative": false }, "wmt-tr-en-prompt": { "name": "WMT", "task": "machine_translation", "description": "English-to-Turkish machine translation dataset.", "url": "http://www.aclweb.org/anthology/W/W16/W16-2301", "hf_name": "wmt/wmt16", "generative": true }, "gecturk_generation": { "name": "GECTurk", "task": "grammatical_error_correction", "description": "A dataset for grammatical error correction.", "url": "https://github.com/GGLAB-KU/gecturk", "hf_name": "mcemilg/GECTurk-generation", "generative": true } }