Spaces:

SwastikM
/

Embedding-Quantization

Sleeping

Embedding-Quantization / conala /dataset_info.json

Upload 19 files

ef56763 verified 4 months ago

No virus

1.79 kB

	{
	"builder_name": "parquet",
	"citation": "@inproceedings{yin2018learning,\n title={Learning to mine aligned code and natural language pairs from stack overflow},\n author={Yin, Pengcheng and Deng, Bowen and Chen, Edgar and Vasilescu, Bogdan and Neubig, Graham},\n booktitle={2018 IEEE/ACM 15th international conference on mining software repositories (MSR)},\n pages={476--486},\n year={2018},\n organization={IEEE}\n}\n",
	"config_name": "mined",
	"dataset_name": "conala",
	"dataset_size": 104561297,
	"description": "CoNaLa is a dataset of code and natural language pairs crawled from Stack Overflow, for more details please refer to this paper: https://arxiv.org/pdf/1805.08949.pdf or the dataset page https://conala-corpus.github.io/.\n",
	"download_checksums": {
	"hf://datasets/neulab/conala@798cef31a9b480d9c31aed21e745c9e485ed2647/mined/train/0000.parquet": {
	"num_bytes": 74356953,
	"checksum": null
	}
	},
	"download_size": 74356953,
	"features": {
	"question_id": {
	"dtype": "int64",
	"_type": "Value"
	},
	"parent_answer_post_id": {
	"dtype": "int64",
	"_type": "Value"
	},
	"prob": {
	"dtype": "float64",
	"_type": "Value"
	},
	"snippet": {
	"dtype": "string",
	"_type": "Value"
	},
	"intent": {
	"dtype": "string",
	"_type": "Value"
	},
	"id": {
	"dtype": "string",
	"_type": "Value"
	}
	},
	"homepage": "https://conala-corpus.github.io/",
	"license": "",
	"size_in_bytes": 178918250,
	"splits": {
	"train": {
	"name": "train",
	"num_bytes": 104561297,
	"num_examples": 593891,
	"dataset_name": "conala"
	}
	},
	"version": {
	"version_str": "1.1.0",
	"major": 1,
	"minor": 1,
	"patch": 0
	}
	}