Cetvel / data /datasets.json
Ilker Kesen
update results
7987659
{
"tquad": {
"name": "TQUAD",
"task": "extractive_question_answering",
"description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.",
"url": "https://github.com/TQuad/turkish-nlp-qa-dataset",
"hf_name": "mcemilg/tquad",
"generative": false
},
"xquad_tr": {
"name": "XQUAD",
"task": "extractive_question_answering",
"description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..",
"url": "https://github.com/google-deepmind/xquad",
"hf_name": "google/xquad",
"generative": false
},
"mkqa_tr": {
"name": "MKQA",
"task": "extractive_question_answering",
"description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.",
"url": "https://github.com/apple/ml-mkqa",
"hf_name": "mcemilg/mkqa_tr",
"generative": false
},
"xlsum_tr": {
"name": "XLSum",
"task": "summarization",
"description": "Abstractive summarization dataset for 44 languages.",
"url": "https://github.com/csebuetnlp/xl-sum",
"hf_name": "csebuetnlp/xlsum",
"generative": true
},
"mlsum_tr": {
"name": "MLSum",
"task": "summarization",
"description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.",
"url": "https://huggingface.co/datasets/reciTAL/mlsum",
"hf_name": "reciTAL/mlsum",
"generative": true
},
"wiki_lingua_tr": {
"name": "WikiLingua",
"task": "summarization",
"description": "A multilingual abstractive summarization dataset covering 17 languages.",
"url": "https://github.com/esdurmus/Wikilingua",
"hf_name": "GEM/wiki_lingua",
"generative": true
},
"tr-wikihow-summ": {
"name": "WikiHow",
"task": "summarization",
"description": "A summarization dataset obtained from WikiHow website.",
"url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ",
"hf_name": "ardauzunoglu/tr-wikihow-summ",
"generative": true
},
"mnli_tr": {
"name": "MNLI",
"task": "natural_language_inference",
"description": "Multi-Genre NLI (MNLI) dataset.",
"url": "https://cims.nyu.edu/~sbowman/multinli/",
"hf_name": "boun-tabi/nli_tr",
"generative": false
},
"snli_tr": {
"name": "SNLI",
"task": "natural_language_inference",
"description": "The Stanford NLI (SNLI) dataset.",
"url": "https://nlp.stanford.edu/projects/snli/",
"hf_name": "boun-tabi/nli_tr",
"generative": false
},
"xnli_tr": {
"name": "XNLI",
"task": "natural_language_inference",
"description": "The Cross-Lingual NLI (XNLI) dataset.",
"url": "https://github.com/facebookresearch/XNLI",
"hf_name": "boun-tabi/nli_tr",
"generative": false
},
"xcopa_tr": {
"name": "XCOPA",
"task": "multiple_choice",
"description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.",
"url": "https://github.com/cambridgeltl/xcopa",
"hf_name": "cambridgeltl/xcopa",
"generative": false
},
"exams_tr": {
"name": "Exams",
"task": "multiple_choice",
"description": "A question answering dataset covering high school exams.",
"url": "https://huggingface.co/datasets/exams",
"hf_name": "exams",
"generative": false
},
"belebele_tr": {
"name": "Belebele",
"task": "multiple_choice",
"description": "A multiple choice question answering dataset to evaluate machine comprehension.",
"url": "https://github.com/facebookresearch/belebele",
"generative": false
},
"turkish_plu_goal_inference": {
"name": "PLU-GI",
"task": "multiple_choice",
"description": "TurkishPLU - Goal Inference task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-goal-inference",
"generative": false
},
"turkish_plu_next_event_prediction": {
"name": "PLU-NE",
"task": "multiple_choice",
"description": "TurkishPLU - Next Event Prediction task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-next-event-prediction",
"generative": false
},
"turkish_plu_step_inference": {
"name": "PLU-SI",
"task": "multiple_choice",
"description": "TurkishPLU - Step Inference task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-step-inference",
"generative": false
},
"turkish_plu_step_ordering": {
"name": "PLU-SO",
"task": "multiple_choice",
"description": "TurkishPLU - Step Ordering task.",
"url": "https://github.com/GGLAB-KU/turkish-plu",
"hf_name": "mcemilg/turkish-plu-step-ordering",
"generative": false
},
"sts_tr": {
"name": "STS",
"task": "text_classification",
"description": "The machine-translated Semantic Textual Similarity dataset in Turkish.",
"url": "https://github.com/emrecncelik/sts-benchmark-tr",
"hf_name": "emrecan/stsb-mt-turkish",
"generative": false
},
"offenseval_tr": {
"name": "OffensEval",
"task": "text_classification",
"description": "A dataset for offensive speech recognition in Turkish.",
"url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020",
"hf_name": "coltekin/offenseval2020_tr",
"generative": false
},
"news_cat": {
"name": "NewsCat",
"task": "text_classification",
"description": "News classification dataset collected from Turkish newspapers websites.",
"url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html",
"hf_name": "mcemilg/news-cat",
"generative": false
},
"ironytr": {
"name": "IronyTR",
"task": "text_classification",
"description": "Irony detection dataset in Turkish.",
"url": "https://github.com/teghub/IronyTR",
"hf_name": "mcemilg/IronyTR",
"generative": false
},
"wmt-tr-en-prompt": {
"name": "WMT",
"task": "machine_translation",
"description": "English-to-Turkish machine translation dataset.",
"url": "http://www.aclweb.org/anthology/W/W16/W16-2301",
"hf_name": "wmt/wmt16",
"generative": true
},
"gecturk_generation": {
"name": "GECTurk",
"task": "grammatical_error_correction",
"description": "A dataset for grammatical error correction.",
"url": "https://github.com/GGLAB-KU/gecturk",
"hf_name": "mcemilg/GECTurk-generation",
"generative": true
}
}