{
  "builder_name": "parquet",
  "citation": "@inproceedings{yin2018learning,\n  title={Learning to mine aligned code and natural language pairs from stack overflow},\n  author={Yin, Pengcheng and Deng, Bowen and Chen, Edgar and Vasilescu, Bogdan and Neubig, Graham},\n  booktitle={2018 IEEE/ACM 15th international conference on mining software repositories (MSR)},\n  pages={476--486},\n  year={2018},\n  organization={IEEE}\n}\n",
  "config_name": "mined",
  "dataset_name": "conala",
  "dataset_size": 104561297,
  "description": "CoNaLa is a dataset of code and natural language pairs crawled from Stack Overflow, for more details please refer to this paper: https://arxiv.org/pdf/1805.08949.pdf or the dataset page https://conala-corpus.github.io/.\n",
  "download_checksums": {
    "hf://datasets/neulab/conala@798cef31a9b480d9c31aed21e745c9e485ed2647/mined/train/0000.parquet": {
      "num_bytes": 74356953,
      "checksum": null
    }
  },
  "download_size": 74356953,
  "features": {
    "question_id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "parent_answer_post_id": {
      "dtype": "int64",
      "_type": "Value"
    },
    "prob": {
      "dtype": "float64",
      "_type": "Value"
    },
    "snippet": {
      "dtype": "string",
      "_type": "Value"
    },
    "intent": {
      "dtype": "string",
      "_type": "Value"
    },
    "id": {
      "dtype": "string",
      "_type": "Value"
    },
    "embedding": {
      "feature": {
        "dtype": "float32",
        "_type": "Value"
      },
      "_type": "Sequence"
    }
  },
  "homepage": "https://conala-corpus.github.io/",
  "license": "",
  "size_in_bytes": 178918250,
  "splits": {
    "train": {
      "name": "train",
      "num_bytes": 104561297,
      "num_examples": 593891,
      "dataset_name": "conala"
    }
  },
  "version": {
    "version_str": "1.1.0",
    "major": 1,
    "minor": 1,
    "patch": 0
  }
}