Tonic commited on
Commit
c3ced67
·
verified ·
1 Parent(s): b6bd3b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -59
app.py CHANGED
@@ -9,78 +9,67 @@ You can also use efog 🌬️🌁🌫️SqlCoder by cloning this space. 🧬🔬
9
  Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻[![Let's build the future of AI together! 🚀🤖](https://discordapp.com/api/guilds/1109943800132010065/widget.png)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [Poly](https://github.com/tonic-ai/poly) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
10
  """
11
 
12
- class TokenizerModel:
13
- def __init__(self, model_name):
14
- self.tokenizer, self.model = self.load_model(model_name)
 
 
 
 
 
 
 
15
 
16
- def load_model(self, model_name):
17
- tokenizer = AutoTokenizer.from_pretrained(model_name)
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_name,
20
- trust_remote_code=True,
21
- torch_dtype=torch.float16,
22
- device_map="auto",
23
- use_cache=True,
24
- )
25
- return tokenizer, model
26
 
27
- class SQLQueryGenerator:
28
- def __init__(self, tokenizer_model, prompt_file="prompt.md", metadata_file="metadata.sql"):
29
- self.tokenizer_model = tokenizer_model
30
- self.prompt_file = prompt_file
31
- self.metadata_file = metadata_file
32
 
33
- def generate_prompt(self, question):
34
- with open(self.prompt_file, "r") as f:
35
- prompt = f.read()
 
36
 
37
- with open(self.metadata_file, "r") as f:
38
- table_metadata_string = f.read()
39
-
40
- prompt = prompt.format(
41
- user_question=question, table_metadata_string=table_metadata_string
42
- )
43
- return prompt
44
-
45
- @spaces.GPU
46
- def run_inference(self, question):
47
- self.tokenizer_model.model.to('cuda')
48
- prompt = self.generate_prompt(question)
49
- eos_token_id = self.tokenizer_model.tokenizer.eos_token_id
50
- pipe = pipeline(
51
- "text-generation",
52
- model=self.tokenizer_model.model,
53
- tokenizer=self.tokenizer_model.tokenizer,
54
- max_new_tokens=300,
55
- do_sample=False,
56
- num_beams=5,
57
- )
58
- generated_query = (
59
- pipe(
60
- prompt,
61
- num_return_sequences=1,
62
- eos_token_id=eos_token_id,
63
- pad_token_id=eos_token_id,
64
- )[0]["generated_text"]
65
- .split("```sql")[-1]
66
- .split("```")[0]
67
- .split(";")[0]
68
- .strip()
69
- + ";"
70
- )
71
- return generated_query
72
 
73
  def main():
74
  model_name = "defog/sqlcoder2"
75
- tokenizer_model = TokenizerModel(model_name)
76
- sql_query_generator = SQLQueryGenerator(tokenizer_model)
77
 
78
  with gr.Blocks() as demo:
79
  gr.Markdown(title)
80
  question = gr.Textbox(label="Enter your question")
81
  submit = gr.Button("Generate SQL Query")
82
  output = gr.Textbox(label="🌬️🌁🌫️SqlCoder-2")
83
- submit.click(fn=sql_query_generator.run_inference, inputs=question, outputs=output)
84
 
85
  demo.launch()
86
 
 
9
  Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻[![Let's build the future of AI together! 🚀🤖](https://discordapp.com/api/guilds/1109943800132010065/widget.png)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [Poly](https://github.com/tonic-ai/poly) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
10
  """
11
 
12
+ def load_tokenizer_model(model_name):
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ model_name,
16
+ trust_remote_code=True,
17
+ torch_dtype=torch.float16,
18
+ device_map="auto",
19
+ use_cache=True,
20
+ )
21
+ return tokenizer, model
22
 
23
+ def generate_prompt(question, prompt_file="prompt.md", metadata_file="metadata.sql"):
24
+ with open(prompt_file, "r") as f:
25
+ prompt = f.read()
 
 
 
 
 
 
 
26
 
27
+ with open(metadata_file, "r") as f:
28
+ table_metadata_string = f.read()
 
 
 
29
 
30
+ prompt = prompt.format(
31
+ user_question=question, table_metadata_string=table_metadata_string
32
+ )
33
+ return prompt
34
 
35
+ @spaces.GPU
36
+ def run_inference(question, model, tokenizer):
37
+ model.to('cuda')
38
+ prompt = generate_prompt(question)
39
+ eos_token_id = tokenizer.eos_token_id
40
+ pipe = pipeline(
41
+ "text-generation",
42
+ model=model,
43
+ tokenizer=tokenizer,
44
+ max_new_tokens=300,
45
+ do_sample=False,
46
+ num_beams=5,
47
+ )
48
+ generated_query = (
49
+ pipe(
50
+ prompt,
51
+ num_return_sequences=1,
52
+ eos_token_id=eos_token_id,
53
+ pad_token_id=eos_token_id,
54
+ )[0]["generated_text"]
55
+ .split("```sql")[-1]
56
+ .split("```")[0]
57
+ .split(";")[0]
58
+ .strip()
59
+ + ";"
60
+ )
61
+ return generated_query
 
 
 
 
 
 
 
 
62
 
63
  def main():
64
  model_name = "defog/sqlcoder2"
65
+ tokenizer, model = load_tokenizer_model(model_name)
 
66
 
67
  with gr.Blocks() as demo:
68
  gr.Markdown(title)
69
  question = gr.Textbox(label="Enter your question")
70
  submit = gr.Button("Generate SQL Query")
71
  output = gr.Textbox(label="🌬️🌁🌫️SqlCoder-2")
72
+ submit.click(fn=lambda x: run_inference(x, model, tokenizer), inputs=question, outputs=output)
73
 
74
  demo.launch()
75