chrisdono commited on
Commit
e81bd43
1 Parent(s): e6bf7a9

added root files

Browse files
file_copies_from_project_root/dataset_train_augmented.csv ADDED
The diff for this file is too large to render. See raw diff
 
file_copies_from_project_root/dataset_val_augmented.csv ADDED
The diff for this file is too large to render. See raw diff
 
file_copies_from_project_root/finetune.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from typing import List
4
+
5
+ import fire
6
+ import torch
7
+ import transformers
8
+ from datasets import load_dataset
9
+ import json
10
+
11
+ """
12
+ Unused imports:
13
+ import torch.nn as nn
14
+ import bitsandbytes as bnb
15
+ """
16
+
17
+ from peft import (
18
+ LoraConfig,
19
+ get_peft_model,
20
+ get_peft_model_state_dict,
21
+ prepare_model_for_int8_training,
22
+ set_peft_model_state_dict,
23
+ )
24
+ from transformers import LlamaForCausalLM, LlamaTokenizer
25
+
26
+ from utils.prompter import Prompter
27
+
28
+ def train(
29
+ # model/data params
30
+ base_model: str = "", # the only required argument
31
+ data_path: str = "yahma/alpaca-cleaned",
32
+ output_dir: str = "./lora-alpaca",
33
+ # training hyperparams
34
+ batch_size: int = 128,
35
+ micro_batch_size: int = 4,
36
+ num_epochs: int = 3,
37
+ learning_rate: float = 3e-4,
38
+ cutoff_len: int = 256,
39
+ val_set_size: int = 2000,
40
+ # lora hyperparams
41
+ lora_r: int = 8,
42
+ lora_alpha: int = 16,
43
+ lora_dropout: float = 0.05,
44
+ lora_target_modules: List[str] = [
45
+ "q_proj",
46
+ "v_proj",
47
+ ],
48
+ # llm hyperparams
49
+ train_on_inputs: bool = True, # if False, masks out inputs in loss
50
+ add_eos_token: bool = False,
51
+ group_by_length: bool = False, # faster, but produces an odd training loss curve
52
+ # wandb params
53
+ wandb_project: str = "",
54
+ wandb_run_name: str = "",
55
+ wandb_watch: str = "", # options: false | gradients | all
56
+ wandb_log_model: str = "", # options: false | true
57
+ resume_from_checkpoint: str = None, # either training checkpoint or final adapter
58
+ prompt_template_name: str = "alpaca", # The prompt template to use, will default to alpaca.
59
+ ):
60
+ if int(os.environ.get("LOCAL_RANK", 0)) == 0:
61
+ print(
62
+ f"Training Alpaca-LoRA model with params:\n"
63
+ f"base_model: {base_model}\n"
64
+ f"data_path: {data_path}\n"
65
+ f"output_dir: {output_dir}\n"
66
+ f"batch_size: {batch_size}\n"
67
+ f"micro_batch_size: {micro_batch_size}\n"
68
+ f"num_epochs: {num_epochs}\n"
69
+ f"learning_rate: {learning_rate}\n"
70
+ f"cutoff_len: {cutoff_len}\n"
71
+ f"val_set_size: {val_set_size}\n"
72
+ f"lora_r: {lora_r}\n"
73
+ f"lora_alpha: {lora_alpha}\n"
74
+ f"lora_dropout: {lora_dropout}\n"
75
+ f"lora_target_modules: {lora_target_modules}\n"
76
+ f"train_on_inputs: {train_on_inputs}\n"
77
+ f"add_eos_token: {add_eos_token}\n"
78
+ f"group_by_length: {group_by_length}\n"
79
+ f"wandb_project: {wandb_project}\n"
80
+ f"wandb_run_name: {wandb_run_name}\n"
81
+ f"wandb_watch: {wandb_watch}\n"
82
+ f"wandb_log_model: {wandb_log_model}\n"
83
+ f"resume_from_checkpoint: {resume_from_checkpoint or False}\n"
84
+ f"prompt template: {prompt_template_name}\n"
85
+ )
86
+ assert (
87
+ base_model
88
+ ), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
89
+ gradient_accumulation_steps = batch_size // micro_batch_size
90
+
91
+ prompter = Prompter(prompt_template_name)
92
+
93
+ device_map = "auto"
94
+ world_size = int(os.environ.get("WORLD_SIZE", 1))
95
+ ddp = world_size != 1
96
+ if ddp:
97
+ device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
98
+ gradient_accumulation_steps = gradient_accumulation_steps // world_size
99
+
100
+ # Check if parameter passed or if set within environ
101
+ use_wandb = len(wandb_project) > 0 or (
102
+ "WANDB_PROJECT" in os.environ and len(os.environ["WANDB_PROJECT"]) > 0
103
+ )
104
+ # Only overwrite environ if wandb param passed
105
+ if len(wandb_project) > 0:
106
+ os.environ["WANDB_PROJECT"] = wandb_project
107
+ if len(wandb_watch) > 0:
108
+ os.environ["WANDB_WATCH"] = wandb_watch
109
+ if len(wandb_log_model) > 0:
110
+ os.environ["WANDB_LOG_MODEL"] = wandb_log_model
111
+
112
+ model = LlamaForCausalLM.from_pretrained(
113
+ base_model,
114
+ load_in_8bit=True,
115
+ torch_dtype=torch.float16,
116
+ device_map=device_map,
117
+ )
118
+
119
+ tokenizer = LlamaTokenizer.from_pretrained(base_model)
120
+
121
+ tokenizer.pad_token_id = (
122
+ 0 # unk. we want this to be different from the eos token
123
+ )
124
+ tokenizer.padding_side = "left" # Allow batched inference
125
+
126
+ def tokenize(prompt, add_eos_token=True):
127
+ # there's probably a way to do this with the tokenizer settings
128
+ # but again, gotta move fast
129
+ result = tokenizer(
130
+ prompt,
131
+ truncation=True,
132
+ max_length=cutoff_len,
133
+ padding=False,
134
+ return_tensors=None,
135
+ )
136
+ if (
137
+ result["input_ids"][-1] != tokenizer.eos_token_id
138
+ and len(result["input_ids"]) < cutoff_len
139
+ and add_eos_token
140
+ ):
141
+ result["input_ids"].append(tokenizer.eos_token_id)
142
+ result["attention_mask"].append(1)
143
+
144
+ result["labels"] = result["input_ids"].copy()
145
+
146
+ return result
147
+
148
+ def generate_and_tokenize_prompt(data_point):
149
+ full_prompt = prompter.generate_prompt(
150
+ # For Alpaca-Lora
151
+ # data_point["instruction"],
152
+ # data_point["input"],
153
+ # data_point["output"],
154
+ # For SQL Create Context
155
+ # data_point["question"],
156
+ # data_point["context"],
157
+ # data_point["answer"],
158
+ # For Spider
159
+ instruction = data_point["question"],
160
+ input = data_point["db_context"],
161
+ label = data_point["query"],
162
+ )
163
+ # print("FULL PROMPT")
164
+ # print(full_prompt) # String with instruction, inputs, and response
165
+
166
+ tokenized_full_prompt = tokenize(full_prompt)
167
+ if not train_on_inputs:
168
+ print("SHOULDNT BE HERE")
169
+ user_prompt = prompter.generate_prompt(
170
+ # For Alpaca-Lora
171
+ # data_point["instruction"], data_point["input"]
172
+ # For SQL Create Context
173
+ # data_point["question"], data_point["context"]
174
+ # For Spider
175
+ data_point["question"], data_point["db_context"]
176
+ )
177
+ tokenized_user_prompt = tokenize(
178
+ user_prompt, add_eos_token=add_eos_token
179
+ )
180
+ user_prompt_len = len(tokenized_user_prompt["input_ids"])
181
+
182
+ if add_eos_token:
183
+ user_prompt_len -= 1
184
+
185
+ tokenized_full_prompt["labels"] = [
186
+ -100
187
+ ] * user_prompt_len + tokenized_full_prompt["labels"][
188
+ user_prompt_len:
189
+ ] # could be sped up, probably
190
+ return tokenized_full_prompt
191
+
192
+ model = prepare_model_for_int8_training(model)
193
+
194
+ config = LoraConfig(
195
+ r=lora_r,
196
+ lora_alpha=lora_alpha,
197
+ target_modules=lora_target_modules,
198
+ lora_dropout=lora_dropout,
199
+ bias="none",
200
+ task_type="CAUSAL_LM",
201
+ )
202
+ model = get_peft_model(model, config)
203
+
204
+ if data_path.endswith(".json") or data_path.endswith(".jsonl"):
205
+ data = load_dataset("json", data_files=data_path)
206
+ else:
207
+ data = load_dataset(data_path)
208
+
209
+ # For SPIDER only !!!!! -------------------------------------
210
+ data = load_dataset("csv", data_files={"train": "dataset_train_augmented.csv", "validation": "dataset_val_augmented.csv"})
211
+
212
+ # End of code for SPIDER only !!!! --------------------------
213
+
214
+ if resume_from_checkpoint:
215
+ # Check the available weights and load them
216
+ checkpoint_name = os.path.join(
217
+ resume_from_checkpoint, "pytorch_model.bin"
218
+ ) # Full checkpoint
219
+ if not os.path.exists(checkpoint_name):
220
+ checkpoint_name = os.path.join(
221
+ resume_from_checkpoint, "adapter_model.bin"
222
+ ) # only LoRA model - LoRA config above has to fit
223
+ resume_from_checkpoint = (
224
+ False # So the trainer won't try loading its state
225
+ )
226
+ # The two files above have a different name depending on how they were saved, but are actually the same.
227
+ if os.path.exists(checkpoint_name):
228
+ print(f"Restarting from {checkpoint_name}")
229
+ adapters_weights = torch.load(checkpoint_name)
230
+ set_peft_model_state_dict(model, adapters_weights)
231
+ else:
232
+ print(f"Checkpoint {checkpoint_name} not found")
233
+
234
+ model.print_trainable_parameters() # Be more transparent about the % of trainable params.
235
+
236
+ if val_set_size > 0:
237
+ train_val = data["train"].train_test_split(
238
+ test_size=val_set_size, shuffle=True, seed=42
239
+ )
240
+ train_data = (
241
+ train_val["train"].shuffle().map(generate_and_tokenize_prompt)
242
+ )
243
+ print("TRAIN DATA")
244
+ print(train_data[0])
245
+ val_data = (
246
+ train_val["test"].shuffle().map(generate_and_tokenize_prompt)
247
+ )
248
+ else:
249
+ train_data = data["train"].shuffle().map(generate_and_tokenize_prompt)
250
+ val_data = None
251
+
252
+ if not ddp and torch.cuda.device_count() > 1:
253
+ # keeps Trainer from trying its own DataParallelism when more than 1 gpu is available
254
+ model.is_parallelizable = True
255
+ model.model_parallel = True
256
+
257
+ trainer = transformers.Trainer(
258
+ model=model,
259
+ train_dataset=train_data,
260
+ eval_dataset=val_data,
261
+ args=transformers.TrainingArguments(
262
+ per_device_train_batch_size=micro_batch_size,
263
+ gradient_accumulation_steps=gradient_accumulation_steps,
264
+ warmup_steps=100,
265
+ num_train_epochs=num_epochs,
266
+ learning_rate=learning_rate,
267
+ fp16=True,
268
+ logging_steps=10,
269
+ optim="adamw_torch",
270
+ evaluation_strategy="steps" if val_set_size > 0 else "no",
271
+ save_strategy="steps",
272
+ eval_steps=200 if val_set_size > 0 else None,
273
+ save_steps=200,
274
+ output_dir=output_dir,
275
+ save_total_limit=3,
276
+ load_best_model_at_end=True if val_set_size > 0 else False,
277
+ ddp_find_unused_parameters=False if ddp else None,
278
+ group_by_length=group_by_length,
279
+ report_to="wandb" if use_wandb else None,
280
+ run_name=wandb_run_name if use_wandb else None,
281
+ ),
282
+ data_collator=transformers.DataCollatorForSeq2Seq(
283
+ tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
284
+ ),
285
+ )
286
+ model.config.use_cache = False
287
+
288
+ old_state_dict = model.state_dict
289
+ model.state_dict = (
290
+ lambda self, *_, **__: get_peft_model_state_dict(
291
+ self, old_state_dict()
292
+ )
293
+ ).__get__(model, type(model))
294
+
295
+ if torch.__version__ >= "2" and sys.platform != "win32":
296
+ model = torch.compile(model)
297
+
298
+ trainer.train(resume_from_checkpoint=resume_from_checkpoint)
299
+
300
+ model.save_pretrained(output_dir)
301
+
302
+ print(
303
+ "\n If there's a warning about missing keys above, please disregard :)"
304
+ )
305
+
306
+
307
+ if __name__ == "__main__":
308
+ fire.Fire(train)
file_copies_from_project_root/generate.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ import fire
5
+ import gradio as gr
6
+ import torch
7
+ import transformers
8
+ from peft import PeftModel
9
+ from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
10
+
11
+ from utils.callbacks import Iteratorize, Stream
12
+ from utils.prompter import Prompter
13
+
14
+ if torch.cuda.is_available():
15
+ device = "cuda"
16
+ else:
17
+ device = "cpu"
18
+
19
+ try:
20
+ if torch.backends.mps.is_available():
21
+ device = "mps"
22
+ except: # noqa: E722
23
+ pass
24
+
25
+
26
+ def main(
27
+ load_8bit: bool = False,
28
+ base_model: str = "",
29
+ lora_weights: str = "tloen/alpaca-lora-7b",
30
+ prompt_template: str = "", # The prompt template to use, will default to alpaca.
31
+ server_name: str = "0.0.0.0", # Allows to listen on all interfaces by providing '0.
32
+ share_gradio: bool = False,
33
+ ):
34
+ base_model = base_model or os.environ.get("BASE_MODEL", "")
35
+ assert (
36
+ base_model
37
+ ), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
38
+
39
+ prompter = Prompter(prompt_template)
40
+ tokenizer = LlamaTokenizer.from_pretrained(base_model)
41
+ if device == "cuda":
42
+ model = LlamaForCausalLM.from_pretrained(
43
+ base_model,
44
+ load_in_8bit=load_8bit,
45
+ torch_dtype=torch.float16,
46
+ # device_map="auto",
47
+ device_map={'':0},
48
+ )
49
+ model = PeftModel.from_pretrained(
50
+ model,
51
+ lora_weights,
52
+ torch_dtype=torch.float16,
53
+ device_map={'':0},
54
+ )
55
+ elif device == "mps":
56
+ model = LlamaForCausalLM.from_pretrained(
57
+ base_model,
58
+ device_map={"": device},
59
+ torch_dtype=torch.float16,
60
+ )
61
+ model = PeftModel.from_pretrained(
62
+ model,
63
+ lora_weights,
64
+ device_map={"": device},
65
+ torch_dtype=torch.float16,
66
+ )
67
+ else:
68
+ model = LlamaForCausalLM.from_pretrained(
69
+ base_model, device_map={"": device}, low_cpu_mem_usage=True
70
+ )
71
+ model = PeftModel.from_pretrained(
72
+ model,
73
+ lora_weights,
74
+ device_map={"": device},
75
+ )
76
+
77
+ # unwind broken decapoda-research config
78
+ model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
79
+ model.config.bos_token_id = 1
80
+ model.config.eos_token_id = 2
81
+
82
+ if not load_8bit:
83
+ model.half() # seems to fix bugs for some users.
84
+
85
+ model.eval()
86
+ if torch.__version__ >= "2" and sys.platform != "win32":
87
+ model = torch.compile(model)
88
+
89
+ def evaluate(
90
+ instruction,
91
+ input=None,
92
+ temperature=0.1,
93
+ top_p=0.75,
94
+ top_k=40,
95
+ num_beams=4,
96
+ max_new_tokens=128,
97
+ stream_output=False,
98
+ **kwargs,
99
+ ):
100
+ prompt = prompter.generate_prompt(instruction, input)
101
+ inputs = tokenizer(prompt, return_tensors="pt")
102
+ input_ids = inputs["input_ids"].to(device)
103
+ generation_config = GenerationConfig(
104
+ temperature=temperature,
105
+ top_p=top_p,
106
+ top_k=top_k,
107
+ num_beams=num_beams,
108
+ **kwargs,
109
+ )
110
+
111
+ generate_params = {
112
+ "input_ids": input_ids,
113
+ "generation_config": generation_config,
114
+ "return_dict_in_generate": True,
115
+ "output_scores": True,
116
+ "max_new_tokens": max_new_tokens,
117
+ }
118
+
119
+ if stream_output:
120
+ # Stream the reply 1 token at a time.
121
+ # This is based on the trick of using 'stopping_criteria' to create an iterator,
122
+ # from https://github.com/oobabooga/text-generation-webui/blob/ad37f396fc8bcbab90e11ecf17c56c97bfbd4a9c/modules/text_generation.py#L216-L243.
123
+
124
+ def generate_with_callback(callback=None, **kwargs):
125
+ kwargs.setdefault(
126
+ "stopping_criteria", transformers.StoppingCriteriaList()
127
+ )
128
+ kwargs["stopping_criteria"].append(
129
+ Stream(callback_func=callback)
130
+ )
131
+ with torch.no_grad():
132
+ model.generate(**kwargs)
133
+
134
+ def generate_with_streaming(**kwargs):
135
+ return Iteratorize(
136
+ generate_with_callback, kwargs, callback=None
137
+ )
138
+
139
+ with generate_with_streaming(**generate_params) as generator:
140
+ for output in generator:
141
+ # new_tokens = len(output) - len(input_ids[0])
142
+ decoded_output = tokenizer.decode(output)
143
+
144
+ if output[-1] in [tokenizer.eos_token_id]:
145
+ break
146
+
147
+ yield prompter.get_response(decoded_output)
148
+ return # early return for stream_output
149
+
150
+ # Without streaming
151
+ with torch.no_grad():
152
+ generation_output = model.generate(
153
+ input_ids=input_ids,
154
+ generation_config=generation_config,
155
+ return_dict_in_generate=True,
156
+ output_scores=True,
157
+ max_new_tokens=max_new_tokens,
158
+ )
159
+ s = generation_output.sequences[0]
160
+ output = tokenizer.decode(s)
161
+ yield prompter.get_response(output)
162
+
163
+ gr.Interface(
164
+ fn=evaluate,
165
+ inputs=[
166
+ gr.components.Textbox(
167
+ lines=2,
168
+ label="Instruction",
169
+ placeholder="Tell me about alpacas.",
170
+ ),
171
+ gr.components.Textbox(lines=2, label="Input", placeholder="none"),
172
+ gr.components.Slider(
173
+ minimum=0, maximum=1, value=0.1, label="Temperature"
174
+ ),
175
+ gr.components.Slider(
176
+ minimum=0, maximum=1, value=0.75, label="Top p"
177
+ ),
178
+ gr.components.Slider(
179
+ minimum=0, maximum=100, step=1, value=40, label="Top k"
180
+ ),
181
+ gr.components.Slider(
182
+ minimum=1, maximum=4, step=1, value=4, label="Beams"
183
+ ),
184
+ gr.components.Slider(
185
+ minimum=1, maximum=2000, step=1, value=128, label="Max tokens"
186
+ ),
187
+ gr.components.Checkbox(label="Stream output"),
188
+ ],
189
+ outputs=[
190
+ gr.inputs.Textbox(
191
+ lines=5,
192
+ label="Output",
193
+ )
194
+ ],
195
+ title="🦙🌲 Alpaca-LoRA",
196
+ description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).", # noqa: E501
197
+ ).queue().launch(server_name="0.0.0.0", share=share_gradio)
198
+ # Old testing code follows.
199
+
200
+ """
201
+ # testing code for readme
202
+ for instruction in [
203
+ "Tell me about alpacas.",
204
+ "Tell me about the president of Mexico in 2019.",
205
+ "Tell me about the king of France in 2019.",
206
+ "List all Canadian provinces in alphabetical order.",
207
+ "Write a Python program that prints the first 10 Fibonacci numbers.",
208
+ "Write a program that prints the numbers from 1 to 100. But for multiples of three print 'Fizz' instead of the number and for the multiples of five print 'Buzz'. For numbers which are multiples of both three and five print 'FizzBuzz'.", # noqa: E501
209
+ "Tell me five words that rhyme with 'shock'.",
210
+ "Translate the sentence 'I have no mouth but I must scream' into Spanish.",
211
+ "Count up from 1 to 500.",
212
+ ]:
213
+ print("Instruction:", instruction)
214
+ print("Response:", evaluate(instruction))
215
+ print()
216
+ """
217
+
218
+
219
+ if __name__ == "__main__":
220
+ fire.Fire(main)
file_copies_from_project_root/generate_eval.py ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ import fire
5
+ import gradio as gr
6
+ import torch
7
+ import transformers
8
+ from peft import PeftModel
9
+ from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
10
+
11
+ from utils.callbacks import Iteratorize, Stream
12
+ from utils.prompter import Prompter
13
+ from datasets import load_dataset
14
+
15
+ if torch.cuda.is_available():
16
+ device = "cuda"
17
+ else:
18
+ device = "cpu"
19
+
20
+ try:
21
+ if torch.backends.mps.is_available():
22
+ device = "mps"
23
+ except: # noqa: E722
24
+ pass
25
+
26
+
27
+ def main(
28
+ load_8bit: bool = False,
29
+ base_model: str = "",
30
+ lora_weights: str = "tloen/alpaca-lora-7b",
31
+ prompt_template: str = "", # The prompt template to use, will default to alpaca.
32
+ server_name: str = "0.0.0.0", # Allows to listen on all interfaces by providing '0.
33
+ share_gradio: bool = False,
34
+ gold_file: str = "gold.txt",
35
+ predict_file: str = "predict.txt",
36
+ num_tests: int = 10
37
+ ):
38
+ base_model = base_model or os.environ.get("BASE_MODEL", "")
39
+ assert (
40
+ base_model
41
+ ), "Please specify a --base_model, e.g. --base_model='huggyllama/llama-7b'"
42
+
43
+ prompter = Prompter(prompt_template)
44
+ tokenizer = LlamaTokenizer.from_pretrained(base_model)
45
+ if device == "cuda":
46
+ model = LlamaForCausalLM.from_pretrained(
47
+ base_model,
48
+ load_in_8bit=load_8bit,
49
+ torch_dtype=torch.float16,
50
+ # device_map="auto",
51
+ device_map={'':0},
52
+ )
53
+ model = PeftModel.from_pretrained(
54
+ model,
55
+ lora_weights,
56
+ torch_dtype=torch.float16,
57
+ device_map={'':0},
58
+ )
59
+ elif device == "mps":
60
+ model = LlamaForCausalLM.from_pretrained(
61
+ base_model,
62
+ device_map={"": device},
63
+ torch_dtype=torch.float16,
64
+ )
65
+ model = PeftModel.from_pretrained(
66
+ model,
67
+ lora_weights,
68
+ device_map={"": device},
69
+ torch_dtype=torch.float16,
70
+ )
71
+ else:
72
+ model = LlamaForCausalLM.from_pretrained(
73
+ base_model, device_map={"": device}, low_cpu_mem_usage=True
74
+ )
75
+ model = PeftModel.from_pretrained(
76
+ model,
77
+ lora_weights,
78
+ device_map={"": device},
79
+ )
80
+
81
+ # unwind broken decapoda-research config
82
+ model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
83
+ model.config.bos_token_id = 1
84
+ model.config.eos_token_id = 2
85
+
86
+ if not load_8bit:
87
+ model.half() # seems to fix bugs for some users.
88
+
89
+ model.eval()
90
+ if torch.__version__ >= "2" and sys.platform != "win32":
91
+ model = torch.compile(model)
92
+
93
+ '''
94
+ def evaluate(
95
+ instruction,
96
+ input=None,
97
+ temperature=0.1,
98
+ top_p=0.75,
99
+ top_k=40,
100
+ num_beams=4,
101
+ max_new_tokens=128,
102
+ stream_output=False,
103
+ **kwargs,
104
+ ):
105
+ print("CHECKING")
106
+ prompt = prompter.generate_prompt(instruction, input)
107
+ inputs = tokenizer(prompt, return_tensors="pt")
108
+ input_ids = inputs["input_ids"].to(device)
109
+ generation_config = GenerationConfig(
110
+ temperature=temperature,
111
+ top_p=top_p,
112
+ top_k=top_k,
113
+ num_beams=num_beams,
114
+ **kwargs,
115
+ )
116
+ print("CHECKING")
117
+
118
+ generate_params = {
119
+ "input_ids": input_ids,
120
+ "generation_config": generation_config,
121
+ "return_dict_in_generate": True,
122
+ "output_scores": True,
123
+ "max_new_tokens": max_new_tokens,
124
+ }
125
+
126
+ if stream_output:
127
+ # Stream the reply 1 token at a time.
128
+ # This is based on the trick of using 'stopping_criteria' to create an iterator,
129
+ # from https://github.com/oobabooga/text-generation-webui/blob/ad37f396fc8bcbab90e11ecf17c56c97bfbd4a9c/modules/text_generation.py#L216-L243.
130
+
131
+ def generate_with_callback(callback=None, **kwargs):
132
+ kwargs.setdefault(
133
+ "stopping_criteria", transformers.StoppingCriteriaList()
134
+ )
135
+ kwargs["stopping_criteria"].append(
136
+ Stream(callback_func=callback)
137
+ )
138
+ with torch.no_grad():
139
+ model.generate(**kwargs)
140
+
141
+ def generate_with_streaming(**kwargs):
142
+ return Iteratorize(
143
+ generate_with_callback, kwargs, callback=None
144
+ )
145
+
146
+ with generate_with_streaming(**generate_params) as generator:
147
+ for output in generator:
148
+ # new_tokens = len(output) - len(input_ids[0])
149
+ decoded_output = tokenizer.decode(output)
150
+
151
+ if output[-1] in [tokenizer.eos_token_id]:
152
+ break
153
+
154
+ yield prompter.get_response(decoded_output)
155
+ print("EARLY RETURN")
156
+ return # early return for stream_output
157
+
158
+ # Without streaming
159
+ with torch.no_grad():
160
+ generation_output = model.generate(
161
+ input_ids=input_ids,
162
+ generation_config=generation_config,
163
+ return_dict_in_generate=True,
164
+ output_scores=True,
165
+ max_new_tokens=max_new_tokens,
166
+ )
167
+ s = generation_output.sequences[0]
168
+ output = tokenizer.decode(s)
169
+ print("WHERE I SHOULD BE")
170
+ print("OUTPUT")
171
+ print(output)
172
+ yield prompter.get_response(output)
173
+ '''
174
+
175
+
176
+ # Modification --------------------------------------------------------------------
177
+ # data = load_dataset("csv", "dataset_val_augmented.csv")
178
+ data = load_dataset("csv", data_files={"train": "dataset_train_augmented.csv", "validation": "dataset_val_augmented.csv"})
179
+ # data_val = data["validation"]
180
+ data_val = data["validation"].shuffle(seed=42)
181
+ print(data_val)
182
+ datalength = len(data_val)
183
+ print(datalength)
184
+ golds = []
185
+ predicts = []
186
+ for idx in range(num_tests):
187
+ print(idx)
188
+ instr = data_val["question"][idx]
189
+ inp = data_val["db_context"][idx]
190
+ new_gold = data_val["query"][idx] + "\t" + data_val["db_id"][idx]
191
+ golds.append(new_gold)
192
+ print("INSTRUCTION")
193
+ print(instr)
194
+ print("INPUT")
195
+ print(inp)
196
+ print("GOLD")
197
+ print(new_gold)
198
+
199
+ # response = evaluate(instr, input=inp, temperature=0.1, top_p=0.75, top_k=10, num_beams=1, max_new_tokens=128)
200
+ # evaluate(instr, input=inp, temperature=0.1, top_p=0.75, top_k=10, num_beams=1, max_new_tokens=128)
201
+ # print(response)
202
+ # evaluate(instruction, input=None, temperature=0.1, top_p=0.75, top_k=40, num_beams=4, max_new_tokens=128, stream_output=False, **kwargs,):
203
+
204
+ prompt = prompter.generate_prompt(instr, inp)
205
+ inputs = tokenizer(prompt, return_tensors="pt")
206
+ input_ids = inputs["input_ids"].to(device)
207
+ generation_config = GenerationConfig(
208
+ temperature=0.1,
209
+ top_p=0.75,
210
+ top_k=40,
211
+ num_beams=4,
212
+ # eos_token_id=[0, 2] # including pad token
213
+ # eos_token_id=2
214
+ )
215
+ max_new_tokens = 128
216
+
217
+ generate_params = {
218
+ "input_ids": input_ids,
219
+ "generation_config": generation_config,
220
+ "return_dict_in_generate": True,
221
+ "output_scores": True,
222
+ "max_new_tokens": max_new_tokens,
223
+ }
224
+
225
+ # Without streaming
226
+ with torch.no_grad():
227
+ generation_output = model.generate(
228
+ input_ids=input_ids,
229
+ generation_config=generation_config,
230
+ return_dict_in_generate=True,
231
+ output_scores=True,
232
+ max_new_tokens=max_new_tokens,
233
+ )
234
+ s = generation_output.sequences[0]
235
+ output = tokenizer.decode(s)
236
+ # print(output)
237
+ # yield prompter.get_response(output)
238
+ # USE ONE OF THE FOLLOWING TWO
239
+ # prediction = prompter.get_response(output).split("<unk>")[0].split("\n")[0]
240
+ prediction = prompter.get_response(output).split("<unk>")[0]
241
+
242
+ # FOR TESTING BASE LLAMA / ALPACA MODELS
243
+ # prediction = output.strip()
244
+ # prediction = output.split("### Answer:")[1].strip().split("<unk>")[0].split("\n")[0]
245
+ # try:
246
+ # prediction = prompter.get_response(output)
247
+ # except:
248
+ # prediction = "no response"
249
+ print("OUTPUT")
250
+ print(prediction)
251
+ predicts.append(prediction)
252
+
253
+ with open(gold_file, 'w') as file:
254
+ for g in golds:
255
+ file.write(g + '\n')
256
+
257
+ with open(predict_file, 'w') as file:
258
+ for p in predicts:
259
+ file.write(p + '\n')
260
+
261
+ # fg = open(gold_file, 'w')
262
+ # fp = open(predict_file, 'w')
263
+
264
+ # No need for gradio interface
265
+ '''
266
+ gr.Interface(
267
+ fn=evaluate,
268
+ inputs=[
269
+ gr.components.Textbox(
270
+ lines=2,
271
+ label="Instruction",
272
+ placeholder="Tell me about alpacas.",
273
+ ),
274
+ gr.components.Textbox(lines=2, label="Input", placeholder="none"),
275
+ gr.components.Slider(
276
+ minimum=0, maximum=1, value=0.1, label="Temperature"
277
+ ),
278
+ gr.components.Slider(
279
+ minimum=0, maximum=1, value=0.75, label="Top p"
280
+ ),
281
+ gr.components.Slider(
282
+ minimum=0, maximum=100, step=1, value=40, label="Top k"
283
+ ),
284
+ gr.components.Slider(
285
+ minimum=1, maximum=4, step=1, value=4, label="Beams"
286
+ ),
287
+ gr.components.Slider(
288
+ minimum=1, maximum=2000, step=1, value=128, label="Max tokens"
289
+ ),
290
+ gr.components.Checkbox(label="Stream output"),
291
+ ],
292
+ outputs=[
293
+ gr.inputs.Textbox(
294
+ lines=5,
295
+ label="Output",
296
+ )
297
+ ],
298
+ title="🦙🌲 Alpaca-LoRA",
299
+ description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).", # noqa: E501
300
+ ).queue().launch(server_name="0.0.0.0", share=share_gradio)
301
+ '''
302
+
303
+ # Old testing code follows.
304
+
305
+ """
306
+ # testing code for readme
307
+ for instruction in [
308
+ "Tell me about alpacas.",
309
+ "Tell me about the president of Mexico in 2019.",
310
+ "Tell me about the king of France in 2019.",
311
+ "List all Canadian provinces in alphabetical order.",
312
+ "Write a Python program that prints the first 10 Fibonacci numbers.",
313
+ "Write a program that prints the numbers from 1 to 100. But for multiples of three print 'Fizz' instead of the number and for the multiples of five print 'Buzz'. For numbers which are multiples of both three and five print 'FizzBuzz'.", # noqa: E501
314
+ "Tell me five words that rhyme with 'shock'.",
315
+ "Translate the sentence 'I have no mouth but I must scream' into Spanish.",
316
+ "Count up from 1 to 500.",
317
+ ]:
318
+ print("Instruction:", instruction)
319
+ print("Response:", evaluate(instruction))
320
+ print()
321
+ """
322
+
323
+
324
+ if __name__ == "__main__":
325
+ fire.Fire(main)
file_copies_from_project_root/gold3.txt ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SELECT count(*) , country_code FROM players GROUP BY country_code wta_1
2
+ SELECT count(*) FROM Documents AS T1 JOIN Templates AS T2 ON T1.Template_ID = T2.Template_ID WHERE T2.Template_Type_Code = 'PPT' cre_Doc_Template_Mgt
3
+ SELECT count(*) FROM Cartoon WHERE Written_by = "Joseph Kuhr"; tvshow
4
+ SELECT name , country , age FROM singer ORDER BY age DESC concert_singer
5
+ SELECT Name FROM country WHERE continent = "Europe" AND Population = "80000" world_1
6
+ SELECT count(*) FROM Highschooler WHERE grade = 9 OR grade = 10 network_1
7
+ SELECT COUNT(*) FROM (SELECT T1.Name FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T2.Language = "English" INTERSECT SELECT T1.Name FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T2.Language = "Dutch") world_1
8
+ select name from teacher where hometown != "little lever urban district" course_teach
9
+ SELECT T1.address_id , T1.line_1 , T1.line_2 FROM Addresses AS T1 JOIN Students AS T2 ON T1.address_id = T2.current_address_id GROUP BY T1.address_id ORDER BY count(*) DESC LIMIT 1 student_transcripts_tracking
10
+ SELECT Orchestra FROM orchestra WHERE Orchestra_ID NOT IN (SELECT Orchestra_ID FROM performance) orchestra
11
+ SELECT DISTINCT T1.model FROM MODEL_LIST AS T1 JOIN CAR_NAMES AS T2 ON T1.Model = T2.Model JOIN CARS_DATA AS T3 ON T2.MakeId = T3.Id JOIN CAR_MAKERS AS T4 ON T1.Maker = T4.Id WHERE T3.weight < 3500 AND T4.FullName != 'Ford Motor Company'; car_1
12
+ SELECT count(*) FROM conductor orchestra
13
+ select t2.name , t2.capacity from concert as t1 join stadium as t2 on t1.stadium_id = t2.stadium_id where t1.year > 2013 group by t2.stadium_id order by count(*) desc limit 1 concert_singer
14
+ SELECT grade FROM Highschooler WHERE name = "Kyle" network_1
15
+ SELECT DISTINCT T2.semester_id FROM Degree_Programs AS T1 JOIN Student_Enrolment AS T2 ON T1.degree_program_id = T2.degree_program_id WHERE degree_summary_name = 'Master' INTERSECT SELECT DISTINCT T2.semester_id FROM Degree_Programs AS T1 JOIN Student_Enrolment AS T2 ON T1.degree_program_id = T2.degree_program_id WHERE degree_summary_name = 'Bachelor' student_transcripts_tracking
16
+ SELECT avg(LifeExpectancy) FROM country WHERE Continent = "Africa" AND GovernmentForm = "Republic" world_1
17
+ SELECT T2.Language FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T1.HeadOfState = "Beatrix" AND T2.IsOfficial = "T" world_1
18
+ SELECT first_name , middle_name , last_name FROM Students ORDER BY date_first_registered ASC LIMIT 1 student_transcripts_tracking
19
+ SELECT count(*) FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid WHERE T1.age > 20 pets_1
20
+ SELECT section_name FROM Sections ORDER BY section_name DESC student_transcripts_tracking
21
+ SELECT count(*) FROM Paragraphs AS T1 JOIN Documents AS T2 ON T1.document_ID = T2.document_ID WHERE T2.document_name = 'Summer Show' cre_Doc_Template_Mgt
22
+ SELECT document_id FROM Paragraphs GROUP BY document_id HAVING count(*) >= 2 cre_Doc_Template_Mgt
23
+ SELECT country FROM TV_Channel EXCEPT SELECT T1.country FROM TV_Channel AS T1 JOIN cartoon AS T2 ON T1.id = T2.Channel WHERE T2.written_by = 'Todd Casey' tvshow
24
+ select t1.name from country as t1 join countrylanguage as t2 on t1.code = t2.countrycode where t2.language = "english" and isofficial = "t" union select t1.name from country as t1 join countrylanguage as t2 on t1.code = t2.countrycode where t2.language = "dutch" and isofficial = "t" world_1
25
+ SELECT count(*) FROM CAR_MAKERS AS T1 JOIN MODEL_LIST AS T2 ON T1.Id = T2.Maker WHERE T1.FullName = 'American Motor Company'; car_1
26
+ SELECT name FROM shop WHERE shop_id NOT IN (SELECT shop_id FROM hiring) employee_hire_evaluation
27
+ SELECT template_id FROM Templates EXCEPT SELECT template_id FROM Documents cre_Doc_Template_Mgt
28
+ SELECT DISTINCT T1.course_name FROM Courses AS T1 JOIN Student_Enrolment_Courses AS T2 ON T1.course_id = T2.course_id student_transcripts_tracking
29
+ SELECT T2.template_id FROM Ref_template_types AS T1 JOIN Templates AS T2 ON T1.template_type_code = T2.template_type_code WHERE T1.template_type_description = "Presentation" cre_Doc_Template_Mgt
30
+ SELECT count(*) FROM teacher course_teach
31
+ SELECT template_type_code FROM Templates GROUP BY template_type_code HAVING count(*) < 3 cre_Doc_Template_Mgt
32
+ SELECT Record_Company , COUNT(*) FROM orchestra GROUP BY Record_Company orchestra
33
+ SELECT Name FROM people WHERE People_ID NOT IN (SELECT People_ID FROM poker_player) poker_player
34
+ SELECT T2.property_type_description FROM Properties AS T1 JOIN Ref_Property_Types AS T2 ON T1.property_type_code = T2.property_type_code GROUP BY T1.property_type_code real_estate_properties
35
+ SELECT T3.Name , T2.Course FROM course_arrange AS T1 JOIN course AS T2 ON T1.Course_ID = T2.Course_ID JOIN teacher AS T3 ON T1.Teacher_ID = T3.Teacher_ID ORDER BY T3.Name course_teach
36
+ SELECT template_type_code FROM Ref_template_types WHERE template_type_description = "Book" cre_Doc_Template_Mgt
37
+ SELECT T1.FullName , T1.Id FROM CAR_MAKERS AS T1 JOIN MODEL_LIST AS T2 ON T1.Id = T2.Maker GROUP BY T1.Id HAVING count(*) > 3; car_1
38
+ SELECT T2.feature_type_name FROM Other_Available_Features AS T1 JOIN Ref_Feature_Types AS T2 ON T1.feature_type_code = T2.feature_type_code WHERE T1.feature_name = "AirCon" real_estate_properties
39
+ SELECT department_description FROM Departments WHERE department_name LIKE '%computer%' student_transcripts_tracking
40
+ select t1.first_name from students as t1 join addresses as t2 on t1.permanent_address_id = t2.address_id where t2.country = 'haiti' or t1.cell_mobile_number = '09700166582' student_transcripts_tracking
41
+ SELECT Airline FROM AIRLINES WHERE Abbreviation = "UAL" flight_2
42
+ SELECT charge_type , charge_amount FROM Charges dog_kennels
43
+ SELECT professional_id , last_name , cell_number FROM Professionals WHERE state = 'Indiana' UNION SELECT T1.professional_id , T1.last_name , T1.cell_number FROM Professionals AS T1 JOIN Treatments AS T2 ON T1.professional_id = T2.professional_id GROUP BY T1.professional_id HAVING count(*) > 2 dog_kennels
44
+ SELECT count(*) FROM pets WHERE weight > 10 pets_1
45
+ SELECT first_name , birth_date FROM players WHERE country_code = 'USA' wta_1
46
+ SELECT T1.model FROM CAR_NAMES AS T1 JOIN CARS_DATA AS T2 ON T1.MakeId = T2.Id WHERE T2.Weight < (SELECT avg(Weight) FROM CARS_DATA) car_1
47
+ SELECT T1.name , T2.date_of_treatment FROM Dogs AS T1 JOIN Treatments AS T2 ON T1.dog_id = T2.dog_id WHERE T1.breed_code = ( SELECT breed_code FROM Dogs GROUP BY breed_code ORDER BY count(*) ASC LIMIT 1 ) dog_kennels
48
+ SELECT Name FROM conductor WHERE Nationality != 'USA' orchestra
49
+ SELECT T1.Airline FROM AIRLINES AS T1 JOIN FLIGHTS AS T2 ON T1.uid = T2.Airline WHERE T2.SourceAirport = "CVO" EXCEPT SELECT T1.Airline FROM AIRLINES AS T1 JOIN FLIGHTS AS T2 ON T1.uid = T2.Airline WHERE T2.SourceAirport = "APG" flight_2
50
+ SELECT T1.lname FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid JOIN pets AS T3 ON T3.petid = T2.petid WHERE T3.pet_age = 3 AND T3.pettype = 'cat' pets_1
51
+ SELECT Population , LifeExpectancy FROM country WHERE Name = "Brazil" world_1
52
+ SELECT Name FROM country WHERE Continent = "Africa" AND population < (SELECT max(population) FROM country WHERE Continent = "Asia") world_1
53
+ SELECT count(*) , District FROM city WHERE Population > (SELECT avg(Population) FROM city) GROUP BY District world_1
54
+ SELECT max(Final_Table_Made) FROM poker_player WHERE Earnings < 200000 poker_player
55
+ SELECT avg(Weight) , YEAR FROM CARS_DATA GROUP BY YEAR; car_1
56
+ SELECT name , grade FROM Highschooler network_1
57
+ SELECT grade FROM Highschooler WHERE name = "Kyle" network_1
58
+ SELECT avg(horsepower) FROM CARS_DATA WHERE YEAR < 1980; car_1
59
+ SELECT t2.visitor_id , t1.name , t1.Level_of_membership FROM visitor AS t1 JOIN visit AS t2 ON t1.id = t2.visitor_id GROUP BY t2.visitor_id ORDER BY sum(t2.Total_spent) DESC LIMIT 1 museum_visit
60
+ SELECT T2.name FROM singer_in_concert AS T1 JOIN singer AS T2 ON T1.singer_id = T2.singer_id JOIN concert AS T3 ON T1.concert_id = T3.concert_id WHERE T3.year = 2014 concert_singer
61
+ SELECT DISTINCT T2.Model FROM CAR_NAMES AS T1 JOIN MODEL_LIST AS T2 ON T1.Model = T2.Model JOIN CAR_MAKERS AS T3 ON T2.Maker = T3.Id JOIN CARS_DATA AS T4 ON T1.MakeId = T4.Id WHERE T3.FullName = 'General Motors' OR T4.weight > 3500; car_1
62
+ SELECT country_code FROM players GROUP BY country_code ORDER BY count(*) DESC LIMIT 1 wta_1
63
+ SELECT T1.model FROM CAR_NAMES AS T1 JOIN CARS_DATA AS T2 ON T1.MakeId = T2.Id WHERE T2.Weight < (SELECT avg(Weight) FROM CARS_DATA) car_1
64
+ SELECT Country FROM AIRLINES WHERE Airline = "JetBlue Airways" flight_2
65
+ SELECT count(*) FROM Professionals WHERE professional_id NOT IN ( SELECT professional_id FROM Treatments ) dog_kennels
66
+ SELECT T1.series_name FROM TV_Channel AS T1 JOIN TV_series AS T2 ON T1.id = T2.Channel WHERE T2.Episode = "A Love of a Lifetime"; tvshow
67
+ SELECT avg(T2.Earnings) FROM people AS T1 JOIN poker_player AS T2 ON T1.People_ID = T2.People_ID WHERE T1.Height > 200 poker_player
68
+ SELECT T1.City FROM AIRPORTS AS T1 JOIN FLIGHTS AS T2 ON T1.AirportCode = T2.DestAirport GROUP BY T1.City ORDER BY count(*) DESC LIMIT 1 flight_2
69
+ SELECT DISTINCT T1.Maker FROM CAR_MAKERS AS T1 JOIN MODEL_LIST AS T2 ON T1.Id = T2.Maker JOIN CAR_NAMES AS T3 ON T2.model = T3.model JOIN CARS_DATA AS T4 ON T3.MakeId = T4.id WHERE T4.year = '1970'; car_1
70
+ SELECT professional_id , last_name , cell_number FROM Professionals WHERE state = 'Indiana' UNION SELECT T1.professional_id , T1.last_name , T1.cell_number FROM Professionals AS T1 JOIN Treatments AS T2 ON T1.professional_id = T2.professional_id GROUP BY T1.professional_id HAVING count(*) > 2 dog_kennels
71
+ SELECT DISTINCT T1.first_name , T1.last_name FROM Professionals AS T1 JOIN Treatments AS T2 WHERE cost_of_treatment < ( SELECT avg(cost_of_treatment) FROM Treatments ) dog_kennels
72
+ SELECT first_name , birth_date FROM players WHERE country_code = 'USA' wta_1
73
+ SELECT Citizenship FROM singer WHERE Birth_Year < 1945 INTERSECT SELECT Citizenship FROM singer WHERE Birth_Year > 1955 singer
74
+ SELECT song_name , song_release_year FROM singer ORDER BY age LIMIT 1 concert_singer
75
+ SELECT Name FROM country ORDER BY Population DESC LIMIT 3 world_1
76
+ SELECT DISTINCT T1.first_name , T1.last_name FROM Professionals AS T1 JOIN Treatments AS T2 WHERE cost_of_treatment < ( SELECT avg(cost_of_treatment) FROM Treatments ) dog_kennels
77
+ SELECT sum(ranking_points) , T1.first_name FROM players AS T1 JOIN rankings AS T2 ON T1.player_id = T2.player_id GROUP BY T1.first_name wta_1
78
+ SELECT count(*) FROM MODEL_LIST AS T1 JOIN CAR_MAKERS AS T2 ON T1.Maker = T2.Id JOIN COUNTRIES AS T3 ON T2.Country = T3.CountryId WHERE T3.CountryName = 'usa'; car_1
79
+ SELECT Episode FROM TV_series ORDER BY rating tvshow
80
+ SELECT Title FROM Cartoon ORDER BY title tvshow
81
+ SELECT count(*) FROM Friend AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id WHERE T2.name = "Kyle" network_1
82
+ SELECT count(DISTINCT pettype) FROM pets pets_1
83
+ SELECT email_address , cell_number , home_phone FROM professionals dog_kennels
84
+ SELECT major , age FROM student WHERE stuid NOT IN (SELECT T1.stuid FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid JOIN pets AS T3 ON T3.petid = T2.petid WHERE T3.pettype = 'cat') pets_1
85
+ SELECT Name FROM conductor ORDER BY Age ASC orchestra
86
+ SELECT winner_name , loser_name FROM matches ORDER BY minutes DESC LIMIT 1 wta_1
87
+ SELECT country FROM TV_Channel EXCEPT SELECT T1.country FROM TV_Channel AS T1 JOIN cartoon AS T2 ON T1.id = T2.Channel WHERE T2.written_by = 'Todd Casey' tvshow
88
+ SELECT Country , count(*) FROM TV_Channel GROUP BY Country ORDER BY count(*) DESC LIMIT 1; tvshow
89
+ SELECT T2.Language FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T1.GovernmentForm = "Republic" GROUP BY T2.Language HAVING COUNT(*) = 1 world_1
90
+ SELECT T1.Airline FROM AIRLINES AS T1 JOIN FLIGHTS AS T2 ON T1.uid = T2.Airline WHERE T2.DestAirport = "AHD" flight_2
91
+ SELECT DISTINCT T2.Name FROM country AS T1 JOIN city AS T2 ON T2.CountryCode = T1.Code WHERE T1.Continent = 'Europe' AND T1.Name NOT IN (SELECT T3.Name FROM country AS T3 JOIN countrylanguage AS T4 ON T3.Code = T4.CountryCode WHERE T4.IsOfficial = 'T' AND T4.Language = 'English') world_1
92
+ SELECT section_description FROM Sections WHERE section_name = 'h' student_transcripts_tracking
93
+ SELECT T1.series_name FROM TV_Channel AS T1 JOIN Cartoon AS T2 ON T1.id = T2.Channel WHERE T2.Title = "The Rise of the Blue Beetle!"; tvshow
94
+ SELECT Name FROM conductor WHERE Nationality != 'USA' orchestra
95
+ SELECT T1.Continent FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode GROUP BY T1.Continent ORDER BY COUNT(*) DESC LIMIT 1 world_1
96
+ SELECT T1.Name FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T2.Language = "English" INTERSECT SELECT T1.Name FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T2.Language = "French" world_1
97
+ SELECT T1.FlightNo FROM FLIGHTS AS T1 JOIN AIRLINES AS T2 ON T2.uid = T1.Airline WHERE T2.Airline = "United Airlines" flight_2
98
+ SELECT LANGUAGE , count(*) FROM TV_Channel GROUP BY LANGUAGE tvshow
99
+ SELECT T2.name FROM Friend AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id GROUP BY T1.student_id HAVING count(*) >= 3 network_1
100
+ SELECT Air_Date FROM TV_series WHERE Episode = "A Love of a Lifetime"; tvshow
101
+ SELECT count(*) FROM Dogs WHERE age < ( SELECT avg(age) FROM Dogs ) dog_kennels
102
+ SELECT T1.Continent FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode GROUP BY T1.Continent ORDER BY COUNT(*) DESC LIMIT 1 world_1
103
+ SELECT name FROM shop WHERE number_products > (SELECT avg(number_products) FROM shop) employee_hire_evaluation
104
+ SELECT count(*) FROM AIRLINES AS T1 JOIN FLIGHTS AS T2 ON T2.Airline = T1.uid WHERE T1.Airline = "United Airlines" AND T2.SourceAirport = "AHD" flight_2
105
+ SELECT template_type_code , count(*) FROM Templates GROUP BY template_type_code cre_Doc_Template_Mgt
106
+ SELECT Title FROM Cartoon WHERE Directed_by = "Ben Jones" OR Directed_by = "Brandon Vietti"; tvshow
107
+ SELECT T1.paragraph_id , T1.paragraph_text FROM Paragraphs AS T1 JOIN Documents AS T2 ON T1.document_id = T2.document_id WHERE T2.Document_Name = 'Welcome to NY' cre_Doc_Template_Mgt
108
+ SELECT DISTINCT T1.template_type_description FROM Ref_template_types AS T1 JOIN Templates AS T2 ON T1.template_type_code = T2.template_type_code JOIN Documents AS T3 ON T2.Template_ID = T3.template_ID cre_Doc_Template_Mgt
109
+ SELECT contestant_name FROM contestants WHERE contestant_name != 'Jessie Alloway' voter_1
110
+ SELECT T2.name FROM Friend AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id WHERE T2.grade > 5 GROUP BY T1.student_id HAVING count(*) >= 2 network_1
111
+ SELECT count(*) FROM FLIGHTS WHERE SourceAirport = "APG" flight_2
112
+ SELECT DISTINCT T3.Name FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode JOIN city AS T3 ON T1.Code = T3.CountryCode WHERE T2.IsOfficial = 'T' AND T2.Language = 'Chinese' AND T1.Continent = "Asia" world_1
113
+ SELECT T1.fname , T1.sex FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid GROUP BY T1.stuid HAVING count(*) > 1 pets_1
114
+ SELECT T1.FlightNo FROM FLIGHTS AS T1 JOIN AIRPORTS AS T2 ON T1.DestAirport = T2.AirportCode WHERE T2.City = "Aberdeen" flight_2
115
+ SELECT avg(ranking) , T1.first_name FROM players AS T1 JOIN rankings AS T2 ON T1.player_id = T2.player_id GROUP BY T1.first_name wta_1
116
+ SELECT T2.name FROM Friend AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id INTERSECT SELECT T2.name FROM Likes AS T1 JOIN Highschooler AS T2 ON T1.liked_id = T2.id network_1
117
+ SELECT count(*) FROM Friend AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id WHERE T2.name = "Kyle" network_1
118
+ SELECT Model FROM CAR_NAMES GROUP BY Model ORDER BY count(*) DESC LIMIT 1; car_1
119
+ SELECT count(DISTINCT loser_name) FROM matches wta_1
120
+ SELECT T1.owner_id , T2.first_name , T2.last_name FROM Dogs AS T1 JOIN Owners AS T2 ON T1.owner_id = T2.owner_id GROUP BY T1.owner_id ORDER BY count(*) DESC LIMIT 1 dog_kennels
121
+ SELECT weight FROM pets ORDER BY pet_age LIMIT 1 pets_1
122
+ SELECT T1.student_id , T1.first_name , T1.middle_name , T1.last_name , count(*) , T1.student_id FROM Students AS T1 JOIN Student_Enrolment AS T2 ON T1.student_id = T2.student_id GROUP BY T1.student_id ORDER BY count(*) DESC LIMIT 1 student_transcripts_tracking
123
+ SELECT Hometown , COUNT(*) FROM teacher GROUP BY Hometown course_teach
124
+ SELECT T2.document_name FROM Templates AS T1 JOIN Documents AS T2 ON T1.template_id = T2.template_id WHERE T1.template_type_code = "BK" cre_Doc_Template_Mgt
125
+ SELECT T1.Name FROM people AS T1 JOIN poker_player AS T2 ON T1.People_ID = T2.People_ID WHERE T2.Earnings > 300000 poker_player
126
+ SELECT winner_name FROM matches WHERE YEAR = 2013 INTERSECT SELECT winner_name FROM matches WHERE YEAR = 2016 wta_1
127
+ SELECT DISTINCT T1.Name FROM singer AS T1 JOIN song AS T2 ON T1.Singer_ID = T2.Singer_ID WHERE T2.Sales > 300000 singer
128
+ SELECT Record_Company FROM orchestra GROUP BY Record_Company ORDER BY COUNT(*) DESC LIMIT 1 orchestra
129
+ SELECT avg(Weight) , YEAR FROM CARS_DATA GROUP BY YEAR; car_1
130
+ SELECT DISTINCT CountryCode FROM countrylanguage WHERE LANGUAGE != "English" world_1
131
+ SELECT Count(*) , T2.FullName , T2.id FROM MODEL_LIST AS T1 JOIN CAR_MAKERS AS T2 ON T1.Maker = T2.Id GROUP BY T2.id; car_1
132
+ SELECT count(*) , YEAR FROM matches GROUP BY YEAR wta_1
133
+ SELECT count(*) FROM CARS_DATA WHERE horsepower > 150; car_1
134
+ SELECT other_student_details FROM Students ORDER BY other_student_details DESC student_transcripts_tracking
135
+ SELECT count(*) FROM country WHERE GovernmentForm = "Republic" world_1
136
+ SELECT T1.semester_name , T1.semester_id FROM Semesters AS T1 JOIN Student_Enrolment AS T2 ON T1.semester_id = T2.semester_id GROUP BY T1.semester_id ORDER BY count(*) DESC LIMIT 1 student_transcripts_tracking
137
+ SELECT document_id FROM Paragraphs WHERE paragraph_text = 'Brazil' INTERSECT SELECT document_id FROM Paragraphs WHERE paragraph_text = 'Ireland' cre_Doc_Template_Mgt
138
+ SELECT T1.series_name FROM TV_Channel AS T1 JOIN TV_series AS T2 ON T1.id = T2.Channel WHERE T2.Episode = "A Love of a Lifetime"; tvshow
139
+ SELECT name FROM Highschooler EXCEPT SELECT T2.name FROM Friend AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id network_1
140
+ SELECT T2.name FROM Likes AS T1 JOIN Highschooler AS T2 ON T1.student_id = T2.id GROUP BY T1.student_id ORDER BY count(*) DESC LIMIT 1 network_1
141
+ SELECT name FROM employee WHERE Employee_ID NOT IN (SELECT Employee_ID FROM evaluation) employee_hire_evaluation
142
+ select production_code , channel from cartoon order by original_air_date desc limit 1 tvshow
143
+ select avg(horsepower) from cars_data where year < 1980; car_1
144
+ SELECT Record_Company FROM orchestra WHERE Year_of_Founded < 2003 INTERSECT SELECT Record_Company FROM orchestra WHERE Year_of_Founded > 2003 orchestra
145
+ SELECT T2.name , count(*) FROM singer_in_concert AS T1 JOIN singer AS T2 ON T1.singer_id = T2.singer_id GROUP BY T2.singer_id concert_singer
146
+ SELECT name , LOCATION , district FROM shop ORDER BY number_products DESC employee_hire_evaluation
147
+ SELECT count(*) FROM players wta_1
148
+ SELECT student_id FROM Friend INTERSECT SELECT liked_id FROM Likes network_1
149
+ SELECT T1.series_name FROM TV_Channel AS T1 JOIN Cartoon AS T2 ON T1.id = T2.Channel WHERE T2.Title = "The Rise of the Blue Beetle!"; tvshow
150
+ SELECT T2.Language FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T1.GovernmentForm = "Republic" GROUP BY T2.Language HAVING COUNT(*) = 1 world_1
151
+ SELECT document_id , template_id , Document_Description FROM Documents WHERE document_name = "Robbin CV" cre_Doc_Template_Mgt
152
+ SELECT T1.owner_id , T1.last_name FROM Owners AS T1 JOIN Dogs AS T2 ON T1.owner_id = T2.owner_id JOIN Treatments AS T3 ON T2.dog_id = T3.dog_id GROUP BY T1.owner_id ORDER BY count(*) DESC LIMIT 1 dog_kennels
153
+ SELECT DISTINCT template_type_code FROM Templates cre_Doc_Template_Mgt
154
+ SELECT count(*) FROM CARS_DATA WHERE Cylinders > 4; car_1
155
+ SELECT ID FROM Highschooler WHERE name = "Kyle" network_1
156
+ SELECT Name FROM teacher WHERE Teacher_id NOT IN (SELECT Teacher_id FROM course_arrange) course_teach
157
+ SELECT first_name , middle_name , last_name FROM Students ORDER BY date_left ASC LIMIT 1 student_transcripts_tracking
158
+ SELECT T1.Name FROM conductor AS T1 JOIN orchestra AS T2 ON T1.Conductor_ID = T2.Conductor_ID GROUP BY T2.Conductor_ID ORDER BY COUNT(*) DESC LIMIT 1 orchestra
159
+ SELECT T1.treatment_type_description FROM Treatment_types AS T1 JOIN Treatments AS T2 ON T1.treatment_type_code = T2.treatment_type_code GROUP BY T1.treatment_type_code ORDER BY sum(cost_of_treatment) ASC LIMIT 1 dog_kennels
160
+ SELECT T1.Model FROM CAR_NAMES AS T1 JOIN CARS_DATA AS T2 ON T1.MakeId = T2.Id WHERE T2.Cylinders = 4 ORDER BY T2.horsepower DESC LIMIT 1; car_1
161
+ SELECT Country FROM AIRLINES WHERE Airline = "JetBlue Airways" flight_2
162
+ SELECT template_type_code FROM Templates GROUP BY template_type_code ORDER BY count(*) DESC LIMIT 1 cre_Doc_Template_Mgt
163
+ SELECT DISTINCT T2.Model FROM CAR_NAMES AS T1 JOIN MODEL_LIST AS T2 ON T1.Model = T2.Model JOIN CAR_MAKERS AS T3 ON T2.Maker = T3.Id JOIN CARS_DATA AS T4 ON T1.MakeId = T4.Id WHERE T3.FullName = 'General Motors' OR T4.weight > 3500; car_1
164
+ SELECT T1.Model FROM CAR_NAMES AS T1 JOIN CARS_DATA AS T2 ON T1.MakeId = T2.Id ORDER BY T2.horsepower ASC LIMIT 1; car_1
165
+ SELECT count(*) , T1.stuid FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid GROUP BY T1.stuid pets_1
166
+ SELECT document_id FROM Paragraphs WHERE paragraph_text = 'Brazil' INTERSECT SELECT document_id FROM Paragraphs WHERE paragraph_text = 'Ireland' cre_Doc_Template_Mgt
167
+ SELECT template_id FROM Templates WHERE template_type_code = "PP" OR template_type_code = "PPT" cre_Doc_Template_Mgt
168
+ select contestant_name from contestants where contestant_name like "%al%" voter_1
169
+ SELECT count(DISTINCT Nationality) FROM people poker_player
170
+ SELECT count(*) FROM employee employee_hire_evaluation
171
+ SELECT Title FROM Cartoon WHERE Directed_by = "Ben Jones"; tvshow
172
+ SELECT student_id FROM Friend INTERSECT SELECT liked_id FROM Likes network_1
173
+ SELECT grade FROM Highschooler network_1
174
+ SELECT Nationality FROM people GROUP BY Nationality HAVING COUNT(*) >= 2 poker_player
175
+ SELECT count(*) FROM CAR_MAKERS AS T1 JOIN COUNTRIES AS T2 ON T1.Country = T2.CountryId WHERE T2.CountryName = 'france'; car_1
176
+ SELECT T2.horsepower , T1.Make FROM CAR_NAMES AS T1 JOIN CARS_DATA AS T2 ON T1.MakeId = T2.Id WHERE T2.cylinders = 3 ORDER BY T2.horsepower DESC LIMIT 1; car_1
177
+ SELECT FlightNo FROM FLIGHTS WHERE SourceAirport = "APG" flight_2
178
+ SELECT country FROM singer WHERE age > 40 INTERSECT SELECT country FROM singer WHERE age < 30 concert_singer
179
+ SELECT winner_name , winner_rank_points FROM matches GROUP BY winner_name ORDER BY count(*) DESC LIMIT 1 wta_1
180
+ SELECT grade , count(*) FROM Highschooler GROUP BY grade network_1
181
+ SELECT T1.owner_id , T1.zip_code FROM Owners AS T1 JOIN Dogs AS T2 ON T1.owner_id = T2.owner_id JOIN Treatments AS T3 ON T2.dog_id = T3.dog_id GROUP BY T1.owner_id ORDER BY sum(T3.cost_of_treatment) DESC LIMIT 1 dog_kennels
182
+ SELECT count(*) FROM FLIGHTS flight_2
183
+ SELECT AirportCode , AirportName FROM AIRPORTS WHERE city = "Anthony" flight_2
184
+ SELECT T1.killed , T1.injured FROM death AS T1 JOIN ship AS t2 ON T1.caused_by_ship_id = T2.id WHERE T2.tonnage = 't' battle_death
185
+ SELECT YEAR FROM matches GROUP BY YEAR ORDER BY count(*) DESC LIMIT 1 wta_1
186
+ SELECT DISTINCT T1.Name FROM singer AS T1 JOIN song AS T2 ON T1.Singer_ID = T2.Singer_ID WHERE T2.Sales > 300000 singer
187
+ SELECT T2.Name FROM course_arrange AS T1 JOIN teacher AS T2 ON T1.Teacher_ID = T2.Teacher_ID GROUP BY T2.Name HAVING COUNT(*) >= 2 course_teach
188
+ SELECT count(*) FROM CARS_DATA WHERE horsepower > 150; car_1
189
+ SELECT count(*) , LOCATION FROM shop GROUP BY LOCATION employee_hire_evaluation
190
+ SELECT T1.Name FROM singer AS T1 JOIN song AS T2 ON T1.Singer_ID = T2.Singer_ID GROUP BY T1.Name HAVING COUNT(*) > 1 singer
191
+ SELECT T1.last_name FROM Students AS T1 JOIN Addresses AS T2 ON T1.current_address_id = T2.address_id WHERE T2.state_province_county = 'NorthCarolina' EXCEPT SELECT DISTINCT T3.last_name FROM Students AS T3 JOIN Student_Enrolment AS T4 ON T3.student_id = T4.student_id student_transcripts_tracking
192
+ SELECT t1.name FROM visitor AS t1 JOIN visit AS t2 ON t1.id = t2.visitor_id JOIN museum AS t3 ON t3.Museum_ID = t2.Museum_ID WHERE t3.open_year < 2009 INTERSECT SELECT t1.name FROM visitor AS t1 JOIN visit AS t2 ON t1.id = t2.visitor_id JOIN museum AS t3 ON t3.Museum_ID = t2.Museum_ID WHERE t3.open_year > 2011 museum_visit
193
+ SELECT COUNT(T2.Language) FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T1.Name = "Aruba" world_1
194
+ SELECT Episode FROM TV_series ORDER BY rating tvshow
195
+ SELECT T1.Airline FROM AIRLINES AS T1 JOIN FLIGHTS AS T2 ON T1.uid = T2.Airline GROUP BY T1.Airline ORDER BY count(*) DESC LIMIT 1 flight_2
196
+ SELECT count(*) FROM COUNTRIES; car_1
197
+ SELECT count(*) FROM Courses student_transcripts_tracking
198
+ SELECT grade FROM Highschooler GROUP BY grade ORDER BY count(*) DESC LIMIT 1 network_1
199
+ SELECT count(*) FROM country WHERE GovernmentForm = "Republic" world_1
200
+ SELECT T1.fname , T1.age FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid JOIN pets AS T3 ON T3.petid = T2.petid WHERE T3.pettype = 'dog' AND T1.stuid NOT IN (SELECT T1.stuid FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid JOIN pets AS T3 ON T3.petid = T2.petid WHERE T3.pettype = 'cat') pets_1
file_copies_from_project_root/gold_base.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ SELECT count(*) , country_code FROM players GROUP BY country_code wta_1
2
+ SELECT count(*) FROM Documents AS T1 JOIN Templates AS T2 ON T1.Template_ID = T2.Template_ID WHERE T2.Template_Type_Code = 'PPT' cre_Doc_Template_Mgt
3
+ SELECT count(*) FROM Cartoon WHERE Written_by = "Joseph Kuhr"; tvshow
4
+ SELECT name , country , age FROM singer ORDER BY age DESC concert_singer
5
+ SELECT Name FROM country WHERE continent = "Europe" AND Population = "80000" world_1
6
+ SELECT count(*) FROM Highschooler WHERE grade = 9 OR grade = 10 network_1
7
+ SELECT COUNT(*) FROM (SELECT T1.Name FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T2.Language = "English" INTERSECT SELECT T1.Name FROM country AS T1 JOIN countrylanguage AS T2 ON T1.Code = T2.CountryCode WHERE T2.Language = "Dutch") world_1
8
+ select name from teacher where hometown != "little lever urban district" course_teach
9
+ SELECT T1.address_id , T1.line_1 , T1.line_2 FROM Addresses AS T1 JOIN Students AS T2 ON T1.address_id = T2.current_address_id GROUP BY T1.address_id ORDER BY count(*) DESC LIMIT 1 student_transcripts_tracking
10
+ SELECT Orchestra FROM orchestra WHERE Orchestra_ID NOT IN (SELECT Orchestra_ID FROM performance) orchestra
file_copies_from_project_root/predict3_processed.txt ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SELECT country_code , count(*) FROM loser GROUP BY country_code
2
+ SELECT count(*) FROM Documents AS T1 JOIN Templates AS T2 ON T1.template_id = T2.template_id WHERE T2.template_type_code = "PPT"
3
+ SELECT count(*) FROM cartoon WHERE Directed_by = "Joseph Kuhr"�, Title = "18 49 Rating Share"
4
+ SELECT T1.Name , T1.Country , T2.Age FROM concert_record AS T1 JOIN singer AS T2 ON T1.Concert_ID = T2.Concert_ID ORDER BY T2.Age
5
+ SELECT name FROM country_codes WHERE country_code = "Europe" AND population = 80000
6
+ SELECT count(*) FROM list AS t1 JOIN students AS t2 ON t1.stuid = t2.stuid WHERE t1.name = "high schooler" AND t1.grade = 9 OR t1.grade = 10
7
+ SELECT count(DISTRICT) FROM city WHERE language = 'English' AND language = 'Dutch'
8
+ SELECT T2.name FROM list AS T1 JOIN teacher AS T2 ON T1.Teacher_ID = T2.Teacher_ID WHERE T1.hometown != 'Little Lever Urban District'
9
+ SELECT T1.address_id , T1.line_1 , T1.line_2 , max(T2.course_id) FROM addresses AS T1 JOIN course_sections AS T2 ON T1.address_id = T2.current_address_id GROUP BY T1.address_id ORDER BY count(T2.course_id) DESC LIMIT 1;
10
+ SELECT T1.Orchestra FROM performance AS T1 JOIN orchestra AS T2 ON T1.Orchestra_ID = T2.Orchestra_ID WHERE T1.Performance NOT IN (SELECT T1.Performance FROM performance AS T1 JOIN performance_detail AS T2 ON T1.Performance_ID = T2.Performance_ID GROUP BY T1.Performance_ID HAVING COUNT(*) = 1)
11
+ SELECT model FROM motor_cycle WHERE mpg < 3500 EXCEPT SELECT T1.model FROM model AS T1 JOIN motor_cycle AS T2 ON T1.id = T2.model WHERE T2.maker = 'Ford Motor Company'
12
+ SELECT count(DISTINCT Conductor_ID) FROM conductor
13
+ SELECT T1.name , T1.capacity FROM concert AS T1 JOIN stadium AS T2 ON T1.Stadium_ID = T2.Stadium_ID GROUP BY T2.Stadium_ID HAVING count(*) >= (SELECT count(*) FROM concert WHERE YEAR >= 2013) ORDER BY T2.capacity DESC LIMIT 1
14
+ SELECT grade FROM students AS T1 JOIN friends AS T2 ON T1.StuID = T2.StuID WHERE T1.name = "Kyle"
15
+ SELECT semester_name FROM student_enrolments GROUP BY semester_name UNION SELECT semester_name FROM student_enrolments_masters GROUP BY semester_name UNION SELECT semester_name FROM student_enrolments_baccalaureate
16
+ SELECT avg(life_expectancy) FROM country WHERE country_code = "Africa" AND government_form = "Republic"
17
+ SELECT Language FROM country WHERE Head_of_State = "Beatrix"
18
+ SELECT T1.first_name , T1.middle_name , T1.last_name FROM student_registrations AS T1 JOIN addresses AS T2 ON T1.current_address_id = T2.address_id WHERE T1.student_id = (SELECT T1.student_id FROM student_registrations AS T1 JOIN addresses AS T2 ON T1.current_address_id = T2.address_id ORDER BY T1.student_id DESC LIMIT 1)
19
+ SELECT count(*) FROM pet WHERE age > 20 AND student_id IN (SELECT student_id FROM pet_has_pet WHERE pet_id IN (SELECT pet_id FROM pet_has_pet WHERE student_id > 20) , pet_id IN (SELECT pet_id FROM pet_has_pet WHERE student_id > 20) , pet_id IN (SELECT pet_id FROM pet_has_pet WHERE student_id > 20) , pet_id IN (SELECT pet_id FROM pet_has_pet WHERE student_
20
+ SELECT section_name FROM course_sections ORDER BY section_name LIMIT 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
21
+ SELECT count(*) FROM Documents AS T1 JOIN Paragraphs AS T2 ON T1.document_id = T2.paragraph_id JOIN Documents AS T3 ON T2.paragraph_id = T3.paragraph_id WHERE T3.document_name = "Summer Show"
22
+ SELECT T1.document_id FROM Document_contents AS T1 JOIN Paragraphs AS T2 ON T1.paragraph_id = T2.paragraph_id GROUP BY T1.document_id HAVING count(*) >= 2
23
+ SELECT country FROM channel WHERE channel_id NOT IN (SELECT id FROM 18_49_Rating_Share)
24
+ SELECT country_code , country_code FROM city WHERE language = 'English' OR language = 'Dutch'
25
+ SELECT count(*) FROM maker WHERE full_name = "American Motor Company"
26
+ SELECT name FROM shop EXCEPT SELECT T1.name FROM shop AS T1 JOIN employee AS T2 ON T1.shop_id = T2.shop_id WHERE T2.employee_id = (SELECT employee_id FROM shop WHERE name NOT IN (SELECT T1.name FROM shop AS T1 JOIN employee AS T2 ON T1.shop_id = T2.shop_id WHERE T2.employee_id = (SELECT T1.employee_id FROM shop AS T1 JOIN employee AS T2 ON T1.shop_id =
27
+ SELECT t1.id FROM templates AS t1 JOIN document_templates AS t2 ON t1.template_id = t2.template_id EXCEPT SELECT t1.id FROM document_templates AS t1 JOIN documents AS t2 ON t1.template_id = t2.template_id�
28
+ SELECT course_name FROM course_enrolments GROUP BY course_name HAVING count(*) > 1
29
+ SELECT t1.template_id FROM templates AS t1 JOIN document_templates AS t2 ON t1.template_id = t2.template_id WHERE t2.document_name = "Presentation"�
30
+ SELECT count(DISTINCT Teacher_ID) FROM course
31
+ SELECT T2.template_type_code FROM template_details AS T1 JOIN template_type AS T2 ON T1.template_type_code = T2.template_type_code GROUP BY T2.template_type_code HAVING count(*) < 3
32
+ SELECT T1.conductor_id , count(*) FROM performance AS T1 JOIN orchestra AS T2 ON T1.orchestra_id = T2.orchestra_id JOIN record_company AS T3 ON T1.record_company = T3.record_company GROUP BY T1.conductor_id , T3.record_companyC, T2.orchestra_id
33
+ SELECT Name FROM people EXCEPT SELECT T1.Name FROM people AS T1 JOIN poker_player AS T2 ON T1.People_ID = T2.People_ID WHERE T2.Poker_Player_ID NOT IN (SELECT T1.People_ID FROM people AS T1 JOIN poker_player AS T2 ON T1.People_ID = T2.People_ID WHERE T2.Poker_Player_ID IN (SELECT T1.People_ID FROM people AS T1 JOIN poker_player AS T2
34
+ SELECT T2.feature_type_description FROM features AS T1 JOIN property_features AS T2 ON T1.feature_id = T2.feature_id WHERE T1.feature_type_code = "APT_FEATURE_3"
35
+ SELECT T1.name , T2.course FROM course AS T1 JOIN teacher AS T2 ON T1.course = T2.course_id ORDER BY T1.name ASC
36
+ SELECT T2.template_type_code FROM TEMPLATES AS T1 JOIN TEMPLATE_TYPES AS T2 ON T1.template_type_code = T2.template_type_code WHERE T2.template_type_description = "Book"F
37
+ SELECT DISTINCT T1.Full_name , T1.Maker FROM maker AS T1 JOIN models AS T2 ON T1.Maker = T2.Maker GROUP BY T1.Maker HAVING count(*) > 3�, T2.Model
38
+ SELECT feature_type_name FROM Features WHERE feature_type_code = "AirCon"
39
+ SELECT department_description FROM course_sections WHERE course_name LIKE "%computer%"
40
+ SELECT T1.first_name FROM addresses AS T1 JOIN student_addresses AS T2 ON T1.address_id = T2.current_address_id WHERE T2.cell_mobile_number = 09700166582 OR T2.cell_mobile_number = 09700166582 AND T2.course_id = 1000000000000000000000000000000000
41
+ SELECT t1.abbreviation FROM airlines AS t1 JOIN flight AS t2 ON t1.airline = t2.source_airline WHERE t1.abbreviation = "UAL"
42
+ SELECT T2.charge_type_code , T2.charge_amount FROM CHARGES AS T1 JOIN TREATMENTS AS T2 ON T1.treatment_id = T2.treatment_id GROUP BY T2.charge_type_code , T2.charge_amount
43
+ SELECT T1.dog_id , T1.name , T1.cell_number FROM dog AS T1 JOIN treatment AS T2 ON T1.dog_id = T2.dog_id WHERE T2.treatment_type_code = "Treatment_Type_Professional" INTERSECT SELECT T1.dog_id , T1.name , T1.cell_number FROM dog AS T1 JOIN treatment AS T2 ON T1.dog_id = T2.dog_id WHERE T2.
44
+ SELECT pet_id FROM pet WHERE pet_weight > 10+
45
+ SELECT T1.first_name , T1.birth_date FROM player AS T1 JOIN loser AS T2 ON T1.player_id = T2.loser_id WHERE T2.country_code = 'USA'
46
+ SELECT Model FROM car WHERE Weight < (SELECT avg(Weight) FROM car)
47
+ SELECT Name , Date_of_Treatment FROM dog WHERE Treatment_Type_Code = (SELECT Treatment_Type_Code FROM dog_treatment_type WHERE breed_code = (SELECT breed_code FROM dog ORDER BY RANDOM()) ) ORDER BY RANDOM(), DATE_OF_TREATMENT DESC LIMIT 1
48
+ SELECT T1.Name FROM conductor AS T1 JOIN orchestra AS T2 ON T1.Conductor_ID = T2.Conductor_ID WHERE T1.Nationality != "USA"
49
+ SELECT DISTINCT T1.airline_id FROM airlines AS T1 JOIN flight AS T2 ON T1.airline_id = T2.source_airline WHERE T2.destination_airport = 'CVO' EXCEPT SELECT DISTINCT T1.airline_id FROM airlines AS T1 JOIN flight AS T2 ON T1.airline_id = T2.source_airline WHERE T2.destination_airport = 'APG'
50
+ SELECT T3.lname FROM pet AS T1 JOIN has_pet AS T2 ON T1.petid = T2.petid JOIN student AS T3 ON T2.stuid = T3.stuid WHERE T2.age = 3 AND T1.pettype = "cat" AND T1.petid = select petid FROM has_pet WHERE pettype = "cat" GROUP BY petid ORDER BY petid DESC LIMIT 1
51
+ SELECT Population , Life_Expectancy FROM district WHERE country_code = 'Brazil'
52
+ SELECT T1.country_code FROM country AS T1 JOIN city AS T2 ON T1.country_code = T2.country_code WHERE T2.population < (SELECT min(T3.population) FROM city AS T3 JOIN country AS T4 ON T3.country_code = T4.country_code WHERE T4.continent = 'Asia' AND T3.district = T4.district_code)C, T2.district_code IN (SELECT district_code FROM city WHERE country_
53
+ SELECT district , count(*) FROM city GROUP BY district HAVING count(*) > (SELECT avg(count()) FROM city)
54
+ SELECT max(T1.Best_Finish) FROM POKER_Player AS T1 JOIN Final_Table AS T2 ON T1.POKER_Player_ID = T2.Final_Table_Made WHERE T1.Earnings < 200000�, T2.people_id NOT IN (SELECT people_id FROM POKER_Player WHERE Nationality = 'Britain' OR Nationality = 'United States of America' OR Nationality = 'Canada' OR Nationality = '
55
+ SELECT avg(weight) , YEAR FROM motorcycle GROUP BY YEAR�, MATCH (year = (SELECT min(YEAR) FROM motorcycle) OR YEAR = (SELECT max(YEAR) FROM motorcycle) )
56
+ SELECT name , grade FROM school WHERE name = "johnson high school" UNION SELECT name , grade FROM school WHERE name = "johnson high school for boys" UNION SELECT name , grade FROM school WHERE name = "johnson high school for girls"
57
+ SELECT grade FROM students WHERE name = "Kyle"
58
+ SELECT avg(Hp) FROM car WHERE YEAR < 1980
59
+ SELECT T1.mID , T1.name , T1.level_of_membership FROM customer AS T2 JOIN museum AS T1 ON T2.museum_id = T1.mID GROUP BY T2.mID ORDER BY sum(T2.total_spent) DESC LIMIT 1F
60
+ SELECT T1.Name FROM concert AS T1 JOIN singer AS T2 ON T1.Concert_ID = T2.Concert_ID WHERE T1.Year = 2014 AND T2.Is_Male = 'Yes'F, AND T1.Concert_Name = "Concert in 2014"
61
+ SELECT DISTINCT T1.Model FROM maker AS T1 JOIN models AS T2 ON T1.MID = T2.MID WHERE T1.Full_Name = "General Motors" OR T1.Weigh > 3500
62
+ SELECT country_code FROM player GROUP BY country_code ORDER BY sum(player_rank_points) DESC LIMIT 1/]{'country_code', 'country code', 'hand', 'birth date', 'country code', 'best of', 'draw size', 'loser age', 'loser entry', 'loser hand', 'loser ht', 'loser id', 'loser ioc', 'loser name', 'loser rank', 'loser rank points', 'loser seed', 'match num', 'minutes', 'round', 'score', 'surface', 't
63
+ SELECT Model FROM car WHERE Weight < (SELECT avg(Weight) FROM car)
64
+ SELECT country_abbrev FROM airlines WHERE airline = 'Jetblue Airways'
65
+ SELECT count(*) FROM dog_treatment WHERE dog_id NOT IN (SELECT dog_id FROM dog_treatment WHERE treatment_type_code = "Treatment_Type_Code_for_Professionals_operating_on_dogs")
66
+ SELECT T1.Channel FROM series AS T1 JOIN episode AS T2 ON T1.ID = T2.Series_ID WHERE T2.Episodes = "A Love of a Lifetime" AND T2.Channel = "Channel 100"�
67
+ SELECT avg(EARNINGS) FROM poker_player WHERE People_ID > 200 AND Height > 200
68
+ SELECT destination_airport FROM flight GROUP BY destination_airport ORDER BY count(*) DESC LIMIT 1
69
+ SELECT DISTINCT T1.Full_name FROM maker AS T1 JOIN models AS T2 ON T1.Maker_ID = T2.Maker_ID WHERE T2.Year = 1970�, T1.Full_name != 'Toyota' AND T1.Full_name != 'Honda' AND T1.Full_name != 'Suzuki' AND T1.Full_name != 'Kawasaki' AND T1.Full_name != 'Yamaha' AND T1.Full_name != 'KTM
70
+ SELECT T1.dog_id , T2.last_name , T2.cell_number FROM dog AS T1 JOIN professional AS T2 ON T1.dog_id = T2.dog_id WHERE T2.state = 'Indiana' INTERSECT SELECT T1.dog_id , T2.last_name , T2.cell_number FROM dog AS T1 JOIN professional AS T2 ON T1.dog_id = T2.dog_id GROUP BY T1.dog_id HAVING count(*)
71
+ SELECT T2.first_name , T2.last_name FROM dog_treatment AS T1 JOIN professional AS T2 ON T1.professional_id = T2.professional_id WHERE T1.treatment_type_code = (SELECT cost_of_treatment FROM dog_treatment GROUP BY cost_of_treatment ORDER BY avg(cost_of_treatment) DESC LIMIT 1) AND T1.dog_id NOT IN (SELECT dog_id FROM dog_treatment WHERE cost_of_
72
+ SELECT T1.first_name , T1.birth_date FROM loser AS T1 JOIN player AS T2 ON T1.loser_name = T2.first_name AND T1.loser_birth_date = T2.birth_date AND T1.loser_country_code = 'USA'/]{'SELECT first_name , birth_date FROM loser AS T1 JOIN player AS T2 ON T1.loser_name = T2.first_name AND T1.loser_b
73
+ SELECT Citizenship FROM singer WHERE birth_year BETWEEN 1945 AND 1955 INTERSECT SELECT Citizenship FROM singer WHERE birth_year BETWEEN 1955 AND 1965+, AND Singer_id IN (SELECT T1.Singer_id FROM performance AS T1 JOIN singer AS T2 ON T1.Singer_id = T2.Singer_id WHERE T2.birth_year BETWEEN 1945 AND 1955)
74
+ SELECT T2.Song_Name , T2.Song_Release_Year FROM CONCERT AS T1 JOIN SINGER AS T2 ON T1.Singer_ID = T2.Singer_ID ORDER BY T1.Year LIMIT 1
75
+ SELECT name FROM country GROUP BY name ORDER BY population DESC LIMIT 3_, 1;
76
+ SELECT T2.first_name , T2.last_name FROM dog_treatment AS T1 JOIN professional AS T2 ON T1.professional_id = T2.professional_id WHERE T1.cost_of_treatment < (SELECT avg(T3.cost_of_treatment) FROM dog_treatment AS T2 JOIN professional AS T3 ON T2.professional_id = T3.professional_id GROUP BY T2.professional_id ORDER BY avg(T3.cost_
77
+ SELECT T1.first_name , T1.loser_rank FROM loser_record AS T1 JOIN player AS T2 ON T1.loser_id = T2.player_id GROUP BY T1.loser_id ORDER BY T2.loser_rank_points DESC LIMIT 10;
78
+ SELECT count(*) FROM model WHERE country_id = "United States"
79
+ SELECT T1.Episode FROM series AS T1 JOIN episode AS T2 ON T1.Series_ID = T2.Series_ID ORDER BY T2.Rating
80
+ SELECT title FROM cartoons ORDER BY title ASC�, series_name ASC
81
+ SELECT count(*) FROM has_friend AS T1 JOIN student AS T2 ON T1.StuID = T2.StuID WHERE T2.name = "Kyle"
82
+ SELECT count(DISTINCT pet_type) FROM pet
83
+ SELECT T3.email_address , T3.cell_number , T3.home_phone FROM dog AS T1 JOIN professional AS T2 ON T1.dog_id = T2.dog_id JOIN treatment AS T3 ON T2.treatment_id = T3.treatment_id WHERE T1.dog_id = (SELECT dog_id FROM dog_treatment ORDER BY date_of_treatment DESC LIMIT 1)
84
+ SELECT Major , age FROM STUDENT EXCEPT SELECT T1.Major , T1.age FROM STUDENT AS T1 JOIN PET AS T2 ON T1.StuID = T2.StuID WHERE T2.PetType = 'Cat'F
85
+ SELECT T1.Name FROM conductor AS T1 JOIN performance AS T2 ON T1.Conductor_ID = T2.Conductor_ID ORDER BY T1.Age ASC+, T2.Date ASC
86
+ SELECT T1.loser_name , T2.winner_name FROM loser AS T1 JOIN winner AS T2 ON T1.loser_id = T2.winner_id WHERE T1.match_num = (SELECT max(match_num) FROM loser) GROUP BY T1.loser_name , T2.winner_name ORDER BY T1.loser_rank_points DESC, T2.winner_rank_points DESC
87
+ SELECT Country FROM channel EXCEPT SELECT T1.Country FROM channel AS T1 JOIN cartoon AS T2 ON T1.Channel = T2.Channel WHERE T2.Written_by = 'Todd Casey'/]{'SELECT Country FROM channel EXCEPT SELECT T1.Country FROM channel AS T1 JOIN cartoon AS T2 ON T1.Channel = T2.Channel WHERE T2.Written_by = 'Todd Casey'}{SELECT Country FROM channel EXCEPT SELECT T1.Country FROM channel AS T1 JOIN cartoon AS
88
+ SELECT country , count(*) FROM tv_channel GROUP BY country ORDER BY count(*) DESC LIMIT 1
89
+ SELECT language FROM country GROUP BY language HAVING min(id) = 1 AND country_code = country_codes_in_republic_governments ORDER BY min(id) DESC LIMIT 1
90
+ SELECT T2.airline FROM flight AS T1 JOIN airline AS T2 ON T1.source_airport = T2.airport_code WHERE T1.destination_airport = 'AHD'
91
+ SELECT name FROM city WHERE country_code = 'Europe' EXCEPT SELECT name FROM city WHERE language = 'english'
92
+ SELECT course_description FROM course WHERE course_name = "Human Physiology"
93
+ SELECT TV_Channel FROM series AS T1 JOIN content AS T2 ON T1.Channel = T2.Channel WHERE T2.Series_Name = "The Rise of the Blue Beetle"
94
+ SELECT T1.Name FROM conductor AS T1 JOIN orchestra_conductor AS T2 ON T1.Conductor_ID = T2.Conductor_ID WHERE T2.Nationality NOT IN (SELECT Nationality FROM conductor WHERE Conductor_ID NOT IN (SELECT T1.Conductor_ID FROM conductor AS T1 JOIN orchestra_conductor AS T2 ON T1.Conductor_ID = T2.Conductor_ID GROUP BY T1.Conductor_ID ORDER BY count(*) DESC LIMIT 1
95
+ SELECT T2.Continent FROM city AS T1 JOIN country AS T2 ON T1.Continent = T2.Country_Code GROUP BY T2.Continent ORDER BY COUNT(*) DESC LIMIT 1
96
+ SELECT T1.name FROM country AS T1 JOIN city AS T2 ON T1.country_code = T2.country_code WHERE T2.language = 'French' INTERSECT SELECT T1.name FROM country AS T1 JOIN city AS T2 ON T1.country_code = T2.country_code WHERE T2.language = 'English'
97
+ SELECT flight_number FROM airlines WHERE airline = "United Airlines"
98
+ SELECT Language , COUNT(*) FROM tv_channel GROUP BY Language
99
+ SELECT name FROM student WHERE grade >= 3 GROUP BY name HAVING count(*) >= 3�
100
+ SELECT T1.Air_date FROM series AS T1 JOIN episode AS T2 ON T1.Series_ID = T2.Series_ID WHERE T2.Episode = "A Love of a Lifetime"
101
+ SELECT count(*) FROM dog WHERE age < (SELECT avg(age) FROM dog)
102
+ SELECT T1.continent , COUNT(*) FROM city AS T1 JOIN country AS T2 ON T1.continent = T2.continent GROUP BY T1.continent ORDER BY COUNT(*) DESC LIMIT 1
103
+ SELECT T2.name FROM shop AS T1 JOIN shop_stock AS T2 ON T1.shop_id = T2.shop_id WHERE T2.number_products > (SELECT avg(T3.number_products) FROM shop AS T1 JOIN shop_stock AS T2 ON T1.shop_id = T2.shop_id JOIN staff AS T3 ON T1.shop_id = T3.shop_id WHERE T3.is_full_time = 'yes' GROUP BY T1.shop_id)
104
+ SELECT count(*) FROM Flight WHERE airline = 'United Airlines' AND source_airport = 'AHD Airport'
105
+ SELECT DISTINCT t1.template_type_code , count(*) FROM template_details AS t1 JOIN document_contents AS t2 ON t1.template_id = t2.template_id GROUP BY t1.template_type_code
106
+ SELECT Title FROM cartoon WHERE Directed_by = "Ben Jones" OR Directed_by = "Brandon Vietti"�
107
+ SELECT T1.paragraph_id , T1.paragraph_text FROM Documents AS T1 JOIN Paragraphs AS T2 ON T1.document_id = T2.paragraph_id WHERE T2.document_name = 'Welcome to NY'
108
+ SELECT DISTINCT T1.document_name FROM Documents AS T1 JOIN Templates AS T2 ON T1.template_id = T2.template_id WHERE T2.documents_used = 'Y'
109
+ SELECT DISTINCT T1.contestant_name FROM contestant AS T1 JOIN vote AS T2 ON T1.contestant_number = T2.contestant_number WHERE T1.contestant_name != "Jessie Alloway"
110
+ SELECT name FROM list AS T1 JOIN high_school AS T2 ON T1.hs_id = T2.hs_id WHERE T1.grade > 5 INTERSECT SELECT name FROM list AS T1 JOIN high_school AS T2 ON T1.hs_id = T2.hs_id GROUP BY T1.hs_id HAVING count(*) >= 2
111
+ SELECT count(*) FROM FLIGHTS WHERE Destination_Airport_Code = 'APG'
112
+ SELECT DISTINCT T1.name FROM city AS T1 JOIN country AS T2 ON T1.district = T2.district WHERE T2.country_code = 'Asia' AND T2.language = 'Chinese'�, AND T1.city_code = T2.district_code
113
+ SELECT T1.fname , T1.lname , sex FROM student AS T1 JOIN has_pet AS T2 ON T1.StuID = T2.StuID GROUP BY T1.StuID HAVING count(DISTINCT T2.PetID) > 1
114
+ SELECT flight_number FROM FLIGHTS WHERE Destination_Airport_Code = 'ABZ' AND Source_Airport_Code = 'Aberdeen', Airport_Name FROM AIRPORTS WHERE City = 'Aberdeen', Country_Abbrev = 'United_Kingdom'
115
+ SELECT T1.first_name , avg(T2.loser_rank) FROM player AS T1 JOIN loser_rank AS T2 ON T1.player_id = T2.loser_id GROUP BY T1.first_name
116
+ SELECT T1.name FROM students AS T1 JOIN has_friends AS T2 ON T1.StuID = T2.StuID WHERE T2.Liked_id = (SELECT T3.StuID FROM has_friends AS T3 JOIN students AS T4 ON T3.Friend_id = T4.StuID GROUP BY T3.Friend_id ORDER BY count(*) DESC LIMIT 1) INTERSECT SELECT T1.name FROM students AS T1 JOIN has_friends AS T2 ON T1.
117
+ SELECT count(*) FROM has_friend AS T1 JOIN student AS T2 ON T1.StuID = T2.StuID WHERE T1.name = "Kyle" AND T1.grade = "Freshman"
118
+ SELECT Model FROM model GROUP BY Model HAVING COUNT(*) > (SELECT count(*) FROM model WHERE Model = MATCH (SELECT DISTINCT Model FROM model) EXCEPT SELECT Model FROM model WHERE Version = MATCH (SELECT DISTINCT Version FROM model) EXCEPT SELECT Model FROM model WHERE Version = MATCH (SELECT DISTINCT Version FROM model) EXCEPT SELECT Model FROM model WHERE Version = MATCH (SELECT DISTINCT Version FROM model) EXCEPT SELECT Model FROM model WHERE Version = MATCH (SELECT DISTINCT Version FROM model
119
+ SELECT count(DISTINCT loser_name) FROM loser
120
+ SELECT T1.owner_id , T1.first_name , T1.last_name FROM dog AS T1 JOIN dog_treatment AS T2 ON T1.dog_id = T2.dog_id GROUP BY T1.dog_id ORDER BY count(*) DESC LIMIT 1
121
+ SELECT pet_age FROM pet WHERE pet_type = "Dog" AND pet_age = (SELECT min(pet_age) FROM pet WHERE pet_type = "Dog")C, pet_id FROM pet WHERE pet_type = "Dog" AND pet_age = (SELECT min(pet_age) FROM pet WHERE pet_type = "Dog" AND pet_id = (SELECT pet_id FROM pet WHERE pet_type = "Dog" AND pet_age = (SELECT min(pet
122
+ SELECT t1.first_name , t1.middle_name , t1.last_name , t1.enrolments FROM student AS t1 JOIN course_enrolment AS t2 ON t1.student_id = t2.student_id GROUP BY t1.student_id ORDER BY t2.enrolments DESC LIMIT 1;
123
+ SELECT count(*) FROM hometown AS T1 JOIN teacher AS T2 ON T1.hometown = T2.hometown GROUP BY T1.hometown
124
+ SELECT T1.document_name FROM Documents AS T1 JOIN Document_contents AS T2 ON T1.document_id = T2.document_id JOIN TEMPLATES AS T3 ON T2.template_id = T3.template_id WHERE T3.template_type_code = "BK"�
125
+ SELECT T1.Name FROM poker_player AS T1 JOIN people AS T2 ON T1.People_ID = T2.People_ID WHERE T1.Earnings > 300000
126
+ SELECT T1.loser_name FROM loser AS T1 JOIN loser_match AS T2 ON T1.loser_id = T2.winner_id WHERE T2.match_num = 2013 AND T2.match_year = 2016 INTERSECT SELECT T1.loser_name FROM loser AS T1 JOIN loser_match AS T2 ON T1.loser_id = T2.winner_id WHERE T2.match_num = 2016 AND T
127
+ SELECT DISTINCT T1.Name FROM singer AS T1 JOIN song AS T2 ON T1.Singer_ID = T2.Singer_ID WHERE T2.Sales > 300000�, T1.Name NOT IN (SELECT DISTINCT T1.Name FROM singer AS T1 JOIN song AS T2 ON T1.Singer_ID = T2.Singer_ID WHERE T2.Sales > 300000)
128
+ SELECT Record_Company FROM orchestra GROUP BY Record_Company ORDER BY COUNT(*) DESC LIMIT 1
129
+ SELECT avg(weight) , YEAR FROM car GROUP BY YEARF, CONTIDERID
130
+ SELECT country_code FROM people WHERE language != 'english'
131
+ SELECT maker , count(*) , T1.id , T2.full_name FROM models AS T1 JOIN makes AS T2 ON T1.maker = T2.id GROUP BY T1.id , T2.full_name
132
+ SELECT Tours.Tourney_Date , COUNT(*) FROM MATCHES AS T_1 JOIN TOURNEYS AS T_2 ON T_1.Tourney_ID = T_2.Tourney_ID GROUP BY T_2.Tourney_Date
133
+ SELECT count(*) FROM car WHERE Horsepower > 150�, Maker = "YAMAHA"
134
+ SELECT T2.other_address_details FROM student_enrolments AS T1 JOIN addresses AS T2 ON T1.current_address_id = T2.address_id ORDER BY T1.student_first_name DESC, T1.student_last_name DESC
135
+ SELECT count(DISTRICT) FROM country WHERE government_form = "Republic"�, country_code IN (SELECT country_code FROM country_continent_relationship)
136
+ SELECT T1.semester_name , T1.semester_id FROM course_registrations AS T1 JOIN students AS T2 ON T1.student_enrolment_id = T2.student_enrolment_id GROUP BY T1.semester_name ORDER BY count(*) , count(*) DESC LIMIT 1
137
+ SELECT T1.document_id FROM Document_contents AS T1 JOIN Paragraphs AS T2 ON T1.paragraph_id = T2.paragraph_id WHERE T2.text = 'Brazil' INTERSECT SELECT T1.document_id FROM Document_contents AS T1 JOIN Paragraphs AS T2 ON T1.paragraph_id = T2.paragraph_id WHERE T2.text = 'Ireland'
138
+ SELECT series_name FROM series WHERE episode = "A Love of a Lifetime"
139
+ SELECT name FROM students WHERE StuID NOT IN (SELECT StuID FROM student_friends)
140
+ SELECT name FROM student WHERE likes = (SELECT likes FROM student_friend ORDER BY likes DESC LIMIT 1)
141
+ SELECT name FROM employee EXCEPT SELECT T1.name FROM employee AS T1 JOIN award AS T2 ON T1.employee_id = T2.employee_id WHERE T2.YEAR_AWARDED = '2016'
142
+ SELECT production_code , channel FROM cartoons ORDER BY episode DESC LIMIT 1
143
+ SELECT avg(Horsepower) FROM model WHERE YEAR < 1980 GROUP BY YEAR ORDER BY avg(Horsepower) DESC UNION SELECT avg(Horsepower) FROM model WHERE YEAR > 1980 GROUP BY YEAR ORDER BY avg(Horsepower) ASC
144
+ SELECT Record_Company FROM performance GROUP BY Record_Company WHERE YEAR_OF_FOUNDED < 2003 INTERSECT SELECT Record_Company FROM performance GROUP BY Record_Company WHERE YEAR_OF_FOUNDED > 2003
145
+ SELECT T2.Name , count(*) FROM concert_record AS T1 JOIN singer AS T2 ON T1.Singer_ID = T2.Singer_ID GROUP BY T1.Singer_ID
146
+ SELECT t1.name , t1.location , t1.district FROM shop AS t1 JOIN shop_stock AS t2 ON t1.shop_id = t2.shop_id GROUP BY t1.shop_id ORDER BY number_products DESC
147
+ SELECT sum(Tournament_rank_points) FROM loser
148
+ SELECT T2.student_id FROM friends AS T1 JOIN high_school AS T2 ON T1.friend_id = T2.student_id WHERE T1.student_id IN (SELECT T1.student_id FROM friends AS T1 JOIN high_school AS T2 ON T1.friend_id = T2.student_id) INTERSECT SELECT T1.student_id FROM friends AS T1 JOIN high_school AS T2 ON T1.friend_id = T2.student_id GROUP BY T1.student_id H
149
+ SELECT series_name FROM channel WHERE title = "The Rise of the Blue Beetle"C
150
+ SELECT language FROM country WHERE government_form = "republic" EXCEPT SELECT T2.language FROM country AS T1 JOIN city AS T2 ON T1.district = T2.district WHERE T1.country_code = country_code AND T1.government_form = "republic"
151
+ SELECT t1.document_id , t1.document_name , t1.document_description FROM Documents AS t1 JOIN Documents_contents AS t2 ON t1.document_id = t2.document_id JOIN Templates AS t3 ON t2.template_id = t3.template_id WHERE t1.document_name = "Robbin CV"
152
+ SELECT T2.owner_id , T2.last_name FROM dog_treatment AS T1 JOIN dog AS T2 ON T1.dog_id = T2.dog_id GROUP BY T1.dog_id ORDER BY sum(T1.cost_of_treatment) DESC LIMIT 1;
153
+ SELECT DISTINCT T2.template_type_code FROM TEMPLATES AS T1 JOIN PARAGRAPHS AS T2 ON T1.template_id = T2.template_id WHERE T1.template_type_code = "Marketing"
154
+ SELECT count(*) FROM cylinders WHERE cylinders > 4
155
+ SELECT T1.id FROM friends AS T1 JOIN students AS T2 ON T1.StuID = T2.StuID WHERE T2.name = "Kyle"
156
+ SELECT T1.name FROM list AS T1 JOIN course AS T2 ON T1.course_id = T2.course_id EXCEPT SELECT T1.name FROM list AS T1 JOIN course AS T2 ON T1.course_id = T2.course_id
157
+ SELECT T1.first_name , T1.middle_name , T1.last_name FROM student AS T1 JOIN course_enrolment AS T2 ON T1.student_id = T2.student_enrolment_id WHERE T2.course_id = (SELECT course_id FROM course_enrolment WHERE course_name = (SELECT course_name FROM course WHERE degree_program_id = (SELECT degree_program_id FROM degree_programs ORDER BY course_name DESC LIMIT 1) ) ) ORDER BY T
158
+ SELECT T1.Name FROM conductor AS T1 JOIN orchestra AS T2 ON T1.Conductor_ID = T2.Conductor_ID GROUP BY T1.Conductor_ID ORDER BY count(*) DESC LIMIT 1
159
+ SELECT T2.treatment_type_description FROM dog_treatment_types AS T1 JOIN dog_treatments AS T2 ON T1.treatment_type_code = T2.treatment_type_code WHERE T1.treatment_type_code = (SELECT T3.treatment_type_code FROM dog_treatment_types AS T1 JOIN dog_treatments AS T2 ON T1.treatment_type_code = T2.treatment_type_code JOIN dog AS
160
+ SELECT Model FROM cylinders WHERE Cylinders = 4 AND Horsepower = (SELECT max(Horsepower) FROM model WHERE Make = "Car" AND Cylinders = 4)�, Maker FROM cylinders WHERE Cylinders = 4 AND Horsepower = (SELECT max(Horsepower) FROM model WHERE Make = "Car" AND Cylinders = 4)
161
+ SELECT country_abbrev FROM airlines WHERE airline = "JetBlue Airways"�
162
+ SELECT T2.template_type_code FROM template_details AS T1 JOIN template_type AS T2 ON T1.template_type_code = T2.template_type_code GROUP BY T1.template_type_code ORDER BY count(*) DESC LIMIT 1
163
+ SELECT DISTINCT T1.model FROM car AS T1 JOIN manufacturer AS T2 ON T1.maker = T2.id JOIN country AS T3 ON T2.country_id = T3.id WHERE T3.name = "General Motors" OR T3.weight > 3500
164
+ SELECT model FROM car WHERE horsepower = (SELECT min(HORSEPOWER) FROM car)
165
+ SELECT count(*) , T1.stuid FROM pet AS T1 JOIN has_pet AS T2 ON T1.petid = T2.petid JOIN student AS T3 ON T3.stuid = T1.stuid WHERE T2.has_pet = 1 AND T3.stuid = (SELECT T1.stuid FROM pet AS T1 JOIN has_pet AS T2 ON T1.petid = T2.petid WHERE T2.has_pet = 1 GROUP BY T1.stuid ORDER BY
166
+ SELECT t3.document_id FROM Document_contents AS t1 JOIN Paragraphs AS t2 ON t1.paragraph_id = t2.paragraph_id JOIN Documents AS t3 ON t1.document_id = t3.document_id WHERE t2.text = "Brazil" INTERSECT SELECT t3.document_id FROM Document_contents AS t1 JOIN Paragraphs AS t2 ON t1.paragraph_id = t2.paragraph_id JOIN Documents AS t3 ON t1.document_id =
167
+ SELECT t1.template_id FROM templates AS t1 JOIN document_templates AS t2 ON t1.template_id = t2.template_id WHERE t2.template_type_code = "PP" OR t2.template_type_code = "PPT"F
168
+ SELECT T2.contestant_name FROM contestants AS T1 JOIN vote AS T2 ON T1.contestant_number = T2.contestant_number WHERE T1.contestant_name LIKE '%Al%'
169
+ SELECT count(DISTINCT Nationality) FROM POKER_COMPETITOR
170
+ SELECT count(DISTINCT employee_id) FROM employee/]{'employee id , name , age , city , sex , department id , name , location , district , number products , manager name , department id , start date , end date , senior employee id , end date , senior employee id , is full time , hour worked per day , hour worked per week , date of employment , resignation date , reason for leaving , senior employee id , working hours per day , working hours per week , employ
171
+ SELECT T1.Title FROM cartoon AS T1 JOIN directed_by AS T2 ON T1.ID = T2.ID_cartoon WHERE T2.Directed_by = "Ben_Jones"
172
+ SELECT T2.StuID FROM friends AS T1 JOIN student AS T2 ON T1.StuID = T2.StudentID WHERE T1.FriendID IN (SELECT T3.FriendID FROM friends AS T1 JOIN student AS T2 ON T1.StuID = T2.StudentID JOIN liked AS T3 ON T3.LikedID = T1.FriendID) INTERSECT SELECT T1.StuID FROM friends AS T1 JOIN student AS T2 ON T1.StuID = T
173
+ SELECT name FROM grade WHERE grade = "Freshman" INTERSECT SELECT name FROM students WHERE studentsid = T1.Student_IDF, SELECT name FROM grade WHERE grade = "Sophomore" INTERSECT SELECT name FROM students WHERE studentsid = T1.Student_ID, SELECT name FROM grade WHERE grade = "Junior" INTERSECT SELECT name FROM students WHERE studentsid = T1.Student_ID, SELECT name FROM grade WHERE grade = "Senior" INTERSECT
174
+ SELECT Nationality FROM people GROUP BY Nationality HAVING COUNT(*) >= 2
175
+ SELECT count(*) FROM maker WHERE country_id = "FRANCE"
176
+ SELECT max(Horsepower) , Maker FROM car WHERE Cylinders = 3
177
+ SELECT flight_number FROM FLIGHTS WHERE source_airport = "APG"
178
+ SELECT DISTINCT T1.Country FROM concert AS T1 JOIN singer AS T2 ON T1.Concert_ID = T2.Concert_ID WHERE T2.Age > 40 INTERSECT SELECT DISTINCT T1.Country FROM concert AS T1 JOIN singer AS T2 ON T1.Concert_ID = T2.Concert_ID WHERE T2.Age < 30
179
+ SELECT winner_name , winner_rank_points FROM loser GROUP BY winner_rank_points ORDER BY sum(winner_rank_points) DESC LIMIT 1
180
+ SELECT grade , count(*) FROM list WHERE name = "high school" GROUP BY grade
181
+ SELECT T1.owner_id , T2.zip_code FROM dog_treatment AS T1 JOIN dog AS T2 ON T1.dog_id = T2.dog_id GROUP BY T1.dog_id ORDER BY sum(T1.treatment_amount) DESC LIMIT 1
182
+ SELECT count(DISTINCT flight_number) FROM FLIGHTF, AIRCRAFT
183
+ SELECT T1.airport_code , T1.airport_name FROM airport AS T1 JOIN flight AS T2 ON T1.airport_code = T2.source_airport WHERE T1.city = 'Anthony', T2.destination_airport = 'T1.airport_name'
184
+ SELECT T1.Caused_by_ship_id FROM ship AS T1 JOIN killed_in_battle AS T2 ON T1.id = T2.Caused_by_ship_id WHERE T1.Tonnage = "t"
185
+ SELECT tourney_year FROM loser GROUP BY tourney_year ORDER BY count(*) DESC LIMIT 1�, 1
186
+ SELECT DISTINCT T1.name FROM singer AS T1 JOIN song AS T2 ON T1.singer_id = T2.singer_id WHERE T2.sales > 300000
187
+ SELECT T2.name FROM list AS T1 JOIN course AS T2 ON T1.course_id = T2.course_id GROUP BY T1.course_id HAVING count(*) >= 2
188
+ SELECT count(*) FROM car WHERE Horsepower > 150�, MpG > 150
189
+ SELECT LOCATION , number_shop FROM shop GROUP BY LOCATION ORDER BY number_shop DESC LIMIT 1
190
+ SELECT T1.name FROM singer AS T1 JOIN song AS T2 ON T1.singer_id = T2.singer_id GROUP BY T1.singer_id HAVING count(*) > 1
191
+ SELECT T1.last_name FROM students AS T1 JOIN addresses AS T2 ON T1.current_address_id = T2.address_id WHERE T2.state_province_county = "North Carolina" AND T1.last_name NOT IN (SELECT T1.last_name FROM students AS T1 JOIN enrolments AS T2 ON T1.student_id = T2.student_id WHERE T2.course_id NOT IN (SELECT T3.course_id FROM student_course_enrolments AS T3 JOIN
192
+ SELECT T1.name FROM museum AS T1 JOIN customer AS T2 ON T1.museum_id = T2.museum_id WHERE T1.open_year < 2009 INTERSECT SELECT T1.name FROM museum AS T1 JOIN customer AS T2 ON T1.museum_id = T2.museum_id WHERE T1.open_year > 2011
193
+ SELECT count(*) FROM city WHERE country_code = "Aruba"�, LANGUAGE IN (SELECT Language FROM language_district WHERE district = "Aruba" GROUP BY Language ORDER BY count(*) DESC LIMIT 1)
194
+ SELECT T1.Episode FROM episode AS T1 JOIN ratings AS T2 ON T1.Episode = T2.Episode ORDER BY T2.Rating
195
+ SELECT T1.airline_name FROM airlines AS T1 JOIN flight AS T2 ON T1.airline_id = T2.airline WHERE T2.flight_number = (SELECT max(flight_number) FROM airlines AS T1 JOIN flight AS T2 ON T1.airline_id = T2.airline WHERE T1.airline_name = "All Nippon Airways" AND T2.source_airport = "Tokyo" AND T2.destination_airport =
196
+ SELECT count(DISTINCT country_id) FROM country
197
+ SELECT count(*) FROM COURSES;
198
+ SELECT grade FROM students GROUP BY grade ORDER BY count(*) DESC LIMIT 1�enaid, name FROM friends AS T1 JOIN students AS T2 ON T1.enanid = T2.enanid WHERE T1.friendid = T2.enanid
199
+ SELECT count(*) FROM country WHERE government_form_code = "Republican"F, country_code IN (SELECT country_code FROM country WHERE government_form_code = "Republican")
200
+ SELECT DISTINCT T1.fname FROM student AS T1 JOIN has_pet AS T2 ON T1.stuid = T2.stuid WHERE T2.pettype != 'cat' AND T2.pettype != 'dog'
file_copies_from_project_root/predict_base.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ['player id', 'first name', 'last name', 'hand', 'birth date', 'country code', 'best of', 'draw size', 'loser age', 'loser entry', 'loser hand', 'loser ht', 'loser id', 'loser ioc', 'loser name', 'loser rank', 'loser rank points', 'loser seed', 'match num', 'minutes', 'round', 'score', 'surface', 'tourney date', 'tourney id', 'tourney level', 'tourney name', '
2
+ 2
3
+ ['id', 'series name', 'country', 'language', 'content', 'pixel aspect ratio par', 'hight definition tv', 'pay per view ppv', 'package option', 'id', 'episode', 'air date', 'rating', 'share', '18 49 rating share', 'viewers m', 'weekly rank', 'channel', 'id', 'title', 'directed by', 'written by', 'original air date', 'production code', 'channel']
4
+ ['stadium id', 'location', 'name', 'capacity', 'highest', 'lowest', 'average', 'singer id', 'name', 'country', 'song name', 'song release year', 'age', 'is male', 'concert id', 'concert name', 'theme', 'stadium id', 'year', 'concert id', 'singer id']
5
+ ['id', 'name', 'country code', 'district', 'population', 'name', 'seq', 'code', 'name', 'continent', 'region', 'surface area', 'indepdent year', 'population', 'life expectancy', 'gnp', 'gnp old', 'local name', 'government form', 'head of state', 'capital', 'code2', 'countrycode', 'language', 'is official', 'percentage']
6
+ There are 10 high schoolers in grade 9 or 10.
7
+ 2 countries speak both English and Dutch: Suriname and Netherlands.
8
+ ['course id', 'staring date', 'course', 'teacher id', 'name', 'age', 'hometown', 'course id', 'teacher id', 'grade']
9
+ ['address id', 'line 1', 'line 2', 'line 3', 'city', 'zip postcode', 'state province county', 'country', 'other address details', 'course id', 'course name', 'course description', 'other details', 'department id', 'department name', 'department description', 'other details', 'degree program id', 'department id', 'degree summary name', 'degree summary description', 'other details', 'section id', 'course id', 'section name', 'section description', 'other details', 'semester id', '
10
+ ['conductor id', 'name', 'age', 'nationality', 'year of work', 'orchestra id', 'orchestra', 'conductor id', 'record company', 'year of founded', 'major record format', 'performance id', 'orchestra id', 'type', 'date', 'official ratings (millions)', 'weekly rank', 'share', 'show id', 'performance id', 'if first show', 'result', 'attendance']
file_copies_from_project_root/spider_get_table.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import sys
3
+
4
+ def get_spider_table():
5
+ with open('tables.json') as f:
6
+ tables = json.load(f)
7
+ # tables_dictionary = {table['db_id']: table for table in tables}
8
+ tables_dictionary = {table['db_id']: [x[1] for x in table["column_names"][1:]] for table in tables}
9
+ return tables_dictionary
10
+
11
+ tables_dict = get_spider_table()
12
+
13
+ if __name__ == "__main__":
14
+ dbid = sys.argv[1]
15
+ print("Printing DB ID: ", dbid)
16
+ print("TABLE: " + str(dbid) + " \nCOLUMNS: " + str(tables_dict[dbid]) + "\n")
17
+ # print(tables_dict[sys.argv[1]])
file_copies_from_project_root/spider_raw_get_table.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import sys
3
+
4
+ def get_raw_table(dbid):
5
+ with open('tables.json') as f:
6
+ tables = json.load(f)
7
+ for table in tables:
8
+ if table['db_id'] == dbid:
9
+ return table
10
+
11
+ if __name__ == "__main__":
12
+ dbid = sys.argv[1]
13
+ print("Printing Raw Table for ID: ", dbid)
14
+ print(get_raw_table(dbid))
file_copies_from_project_root/spider_table_parse.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import json
3
+ import pandas as pd
4
+
5
+ data_path = "spider"
6
+ tables_dict = {}
7
+
8
+ def get_spider_table():
9
+ with open('tables.json') as f:
10
+ tables = json.load(f)
11
+ tables_dictionary = {table['db_id']: [x[1] for x in table["column_names"][1:]] for table in tables}
12
+ return tables_dictionary
13
+
14
+ tables_dict = get_spider_table()
15
+ # print(tables_dict["perpetrator"])
16
+ # print(tables_dict["perpetrator"]["column_names"])
17
+
18
+ # newlist = [[x[1] for x in row["column_names"]] for row in tables_dict]
19
+ # print(newlist)
20
+
21
+ data = load_dataset(data_path)
22
+ data_train = data["train"]
23
+ data_val = data["validation"]
24
+
25
+ data_train.to_csv("dataset_train_augmented.csv")
26
+ df_train = pd.read_csv("dataset_train_augmented.csv")
27
+ new_col = []
28
+ new_col = [tables_dict[dbid] for dbid in data_train['db_id']]
29
+ # new_col = ["TABLE: " + str(dbid) + " \nCOLUMNS: " + str(tables_dict[dbid]) + "\n" for dbid in data_train['db_id']]
30
+ print(new_col[0])
31
+ print(new_col[-1])
32
+ df_train["db_context"] = new_col
33
+ df_train.to_csv("dataset_train_augmented.csv")
34
+ data_train = load_dataset("csv", data_files="dataset_train_augmented.csv")
35
+
36
+ data_val.to_csv("dataset_val_augmented.csv")
37
+ df_val = pd.read_csv("dataset_val_augmented.csv")
38
+ new_col = []
39
+ new_col = [tables_dict[dbid] for dbid in data_val['db_id']]
40
+ # new_col = ["TABLE: " + str(dbid) + " \nCOLUMNS: " + str(tables_dict[dbid]) + "\n" for dbid in data_val['db_id']]
41
+ print(new_col[0])
42
+ print(new_col[-1])
43
+ df_val["db_context"] = new_col
44
+ df_val.to_csv("dataset_val_augmented.csv")
45
+ data_val = load_dataset("csv", data_files="dataset_val_augmented.csv")
46
+
file_copies_from_project_root/tables.json ADDED
The diff for this file is too large to render. See raw diff