Spaces:

KUIS-AI
/

Cetvel

Running

File size: 7,622 Bytes

{
    "tquad": {
        "name": "TQUAD",
        "task": "extractive_question_answering",
        "description": "This dataset is the Turkish Question & Answer dataset on Turkish & Islamic Science History within the scope of Teknofest 2018 Artificial Intelligence competition.",
        "url": "https://github.com/TQuad/turkish-nlp-qa-dataset",
        "hf_name": "mcemilg/tquad",
        "generative": false
    },
    "xquad_tr": {
        "name": "XQUAD",
        "task": "extractive_question_answering",
        "description": "XQuAD (Cross-lingual Question Answering Dataset) is a benchmark dataset for evaluating cross-lingual question answering performance. The dataset consists of a subset of 240 paragraphs and 1190 question-answer pairs from the development set of SQuAD v1.1 together with their professional translations into ten languages: Spanish, German, Greek, Russian, Turkish, Arabic, Vietnamese, Thai, Chinese, and Hindi..",
        "url": "https://github.com/google-deepmind/xquad",
        "hf_name": "google/xquad",
        "generative": false
    },
    "mkqa_tr": {
        "name": "MKQA",
        "task": "extractive_question_answering",
        "description": "MKQA: Multilingual Knowledge Questions & Answers. MKQA includes 10k open-domain question-answer pairs in 26 languages, resulting 260k examples in total.",
        "url": "https://github.com/apple/ml-mkqa",
        "hf_name": "mcemilg/mkqa_tr",
        "generative": false
    },
    "xlsum_tr": {
        "name": "XLSum",
        "task": "summarization",
        "description": "Abstractive summarization dataset for 44 languages.",
        "url": "https://github.com/csebuetnlp/xl-sum",
        "hf_name": "csebuetnlp/xlsum",
        "generative": true
    },
    "mlsum_tr": {
        "name": "MLSum",
        "task": "summarization",
        "description": "A multilingual summarization dataset collected from the newspapers' websites. MLSum contains 1.5M examples in 5 languages including Turkish.",
        "url": "https://huggingface.co/datasets/reciTAL/mlsum",
        "hf_name": "reciTAL/mlsum",
        "generative": true
    },
    "wiki_lingua_tr": {
        "name": "WikiLingua",
        "task": "summarization",
        "description": "A multilingual abstractive summarization dataset covering 17 languages.",
        "url": "https://github.com/esdurmus/Wikilingua",
        "hf_name": "GEM/wiki_lingua",
        "generative": true
    },
    "tr-wikihow-summ": {
        "name": "WikiHow",
        "task": "summarization",
        "description": "A summarization dataset obtained from WikiHow website.",
        "url": "https://huggingface.co/datasets/ardauzunoglu/tr-wikihow-summ",
        "hf_name": "ardauzunoglu/tr-wikihow-summ",
        "generative": true
    },
    "mnli_tr": {
        "name": "MNLI",
        "task": "natural_language_inference",
        "description": "Multi-Genre NLI (MNLI) dataset.",
        "url": "https://cims.nyu.edu/~sbowman/multinli/",
        "hf_name": "boun-tabi/nli_tr",
        "generative": false
    },
    "snli_tr": {
        "name": "SNLI",
        "task": "natural_language_inference",
        "description": "The Stanford NLI (SNLI) dataset.",
        "url": "https://nlp.stanford.edu/projects/snli/",
        "hf_name": "boun-tabi/nli_tr",
        "generative": false
    },
    "xnli_tr": {
        "name": "XNLI",
        "task": "natural_language_inference",
        "description": "The Cross-Lingual NLI (XNLI) dataset.",
        "url": "https://github.com/facebookresearch/XNLI",
        "hf_name": "boun-tabi/nli_tr",
        "generative": false
    },
    "xcopa_tr": {
        "name": "XCOPA",
        "task": "multiple_choice",
        "description": "A multilingual dataset for evaluating causal commonsense reasoning capabilities of language models.",
        "url": "https://github.com/cambridgeltl/xcopa",
        "hf_name": "cambridgeltl/xcopa",
        "generative": false
    },
    "exams_tr": {
        "name": "Exams",
        "task": "multiple_choice",
        "description": "A question answering dataset covering high school exams.",
        "url": "https://huggingface.co/datasets/exams",
        "hf_name": "exams",
        "generative": false
    },
    "belebele_tr": {
        "name": "Belebele",
        "task": "multiple_choice",
        "description": "A multiple choice question answering dataset to evaluate machine comprehension.",
        "url": "https://github.com/facebookresearch/belebele",
        "generative": false
    },
    "turkish_plu_goal_inference": {
        "name": "PLU-GI",
        "task": "multiple_choice",
        "description": "TurkishPLU - Goal Inference task.",
        "url": "https://github.com/GGLAB-KU/turkish-plu",
        "hf_name": "mcemilg/turkish-plu-goal-inference",
        "generative": false
    },
    "turkish_plu_next_event_prediction": {
        "name": "PLU-NE",
        "task": "multiple_choice",
        "description": "TurkishPLU - Next Event Prediction task.",
        "url": "https://github.com/GGLAB-KU/turkish-plu",
        "hf_name": "mcemilg/turkish-plu-next-event-prediction",
        "generative": false
    },
    "turkish_plu_step_inference": {
        "name": "PLU-SI",
        "task": "multiple_choice",
        "description": "TurkishPLU - Step Inference task.",
        "url": "https://github.com/GGLAB-KU/turkish-plu",
        "hf_name": "mcemilg/turkish-plu-step-inference",
        "generative": false
    },
    "turkish_plu_step_ordering": {
        "name": "PLU-SO",
        "task": "multiple_choice",
        "description": "TurkishPLU - Step Ordering task.",
        "url": "https://github.com/GGLAB-KU/turkish-plu",
        "hf_name": "mcemilg/turkish-plu-step-ordering",
        "generative": false
    },
    "sts_tr": {
        "name": "STS",
        "task": "text_classification",
        "description": "The machine-translated Semantic Textual Similarity dataset in Turkish.",
        "url": "https://github.com/emrecncelik/sts-benchmark-tr",
        "hf_name": "emrecan/stsb-mt-turkish",
        "generative": false
    },
    "offenseval_tr": {
        "name": "OffensEval",
        "task": "text_classification",
        "description": "A dataset for offensive speech recognition in Turkish.",
        "url": "https://sites.google.com/site/offensevalsharedtask/offenseval-2020",
        "hf_name": "coltekin/offenseval2020_tr",
        "generative": false
    },
    "news_cat": {
        "name": "NewsCat",
        "task": "text_classification",
        "description": "News classification dataset collected from Turkish newspapers websites.",
        "url": "http://www.kemik.yildiz.edu.tr/veri_kumelerimiz.html",
        "hf_name": "mcemilg/news-cat",
        "generative": false
    },
    "ironytr": {
        "name": "IronyTR",
        "task": "text_classification",
        "description": "Irony detection dataset in Turkish.",
        "url": "https://github.com/teghub/IronyTR",
        "hf_name": "mcemilg/IronyTR",
        "generative": false
    },
    "wmt-tr-en-prompt": {
        "name": "WMT",
        "task": "machine_translation",
        "description": "English-to-Turkish machine translation dataset.",
        "url": "http://www.aclweb.org/anthology/W/W16/W16-2301",
        "hf_name": "wmt/wmt16",
        "generative": true
    },
    "gecturk_generation": {
        "name": "GECTurk",
        "task": "grammatical_error_correction",
        "description": "A dataset for grammatical error correction.",
        "url": "https://github.com/GGLAB-KU/gecturk",
        "hf_name": "mcemilg/GECTurk-generation",
        "generative": true
    }
}