{ "builder_name": "parquet", "citation": "@inproceedings{yin2018learning,\n title={Learning to mine aligned code and natural language pairs from stack overflow},\n author={Yin, Pengcheng and Deng, Bowen and Chen, Edgar and Vasilescu, Bogdan and Neubig, Graham},\n booktitle={2018 IEEE/ACM 15th international conference on mining software repositories (MSR)},\n pages={476--486},\n year={2018},\n organization={IEEE}\n}\n", "config_name": "mined", "dataset_name": "conala", "dataset_size": 104561297, "description": "CoNaLa is a dataset of code and natural language pairs crawled from Stack Overflow, for more details please refer to this paper: https://arxiv.org/pdf/1805.08949.pdf or the dataset page https://conala-corpus.github.io/.\n", "download_checksums": { "hf://datasets/neulab/conala@798cef31a9b480d9c31aed21e745c9e485ed2647/mined/train/0000.parquet": { "num_bytes": 74356953, "checksum": null } }, "download_size": 74356953, "features": { "question_id": { "dtype": "int64", "_type": "Value" }, "parent_answer_post_id": { "dtype": "int64", "_type": "Value" }, "prob": { "dtype": "float64", "_type": "Value" }, "snippet": { "dtype": "string", "_type": "Value" }, "intent": { "dtype": "string", "_type": "Value" }, "id": { "dtype": "string", "_type": "Value" }, "embedding": { "feature": { "dtype": "float32", "_type": "Value" }, "_type": "Sequence" } }, "homepage": "https://conala-corpus.github.io/", "license": "", "size_in_bytes": 178918250, "splits": { "train": { "name": "train", "num_bytes": 104561297, "num_examples": 593891, "dataset_name": "conala" } }, "version": { "version_str": "1.1.0", "major": 1, "minor": 1, "patch": 0 } }