Embedding-Quantization / conala /dataset_info.json
SwastikM's picture
Upload 19 files
ef56763 verified
raw
history blame contribute delete
No virus
1.79 kB
{
"builder_name": "parquet",
"citation": "@inproceedings{yin2018learning,\n title={Learning to mine aligned code and natural language pairs from stack overflow},\n author={Yin, Pengcheng and Deng, Bowen and Chen, Edgar and Vasilescu, Bogdan and Neubig, Graham},\n booktitle={2018 IEEE/ACM 15th international conference on mining software repositories (MSR)},\n pages={476--486},\n year={2018},\n organization={IEEE}\n}\n",
"config_name": "mined",
"dataset_name": "conala",
"dataset_size": 104561297,
"description": "CoNaLa is a dataset of code and natural language pairs crawled from Stack Overflow, for more details please refer to this paper: https://arxiv.org/pdf/1805.08949.pdf or the dataset page https://conala-corpus.github.io/.\n",
"download_checksums": {
"hf://datasets/neulab/conala@798cef31a9b480d9c31aed21e745c9e485ed2647/mined/train/0000.parquet": {
"num_bytes": 74356953,
"checksum": null
}
},
"download_size": 74356953,
"features": {
"question_id": {
"dtype": "int64",
"_type": "Value"
},
"parent_answer_post_id": {
"dtype": "int64",
"_type": "Value"
},
"prob": {
"dtype": "float64",
"_type": "Value"
},
"snippet": {
"dtype": "string",
"_type": "Value"
},
"intent": {
"dtype": "string",
"_type": "Value"
},
"id": {
"dtype": "string",
"_type": "Value"
}
},
"homepage": "https://conala-corpus.github.io/",
"license": "",
"size_in_bytes": 178918250,
"splits": {
"train": {
"name": "train",
"num_bytes": 104561297,
"num_examples": 593891,
"dataset_name": "conala"
}
},
"version": {
"version_str": "1.1.0",
"major": 1,
"minor": 1,
"patch": 0
}
}