playgrdstar commited on
Commit
3909801
·
1 Parent(s): 8639338

Add check truncation

Browse files
Files changed (1) hide show
  1. app.py +54 -8
app.py CHANGED
@@ -84,7 +84,6 @@ def load_and_generate(model_name, prompt):
84
 
85
  return gen_text.replace("<pad>", "").replace("</s>", "")
86
 
87
-
88
  ### This code for the inference api ###
89
 
90
  def generate_from_api(query, model_name, temperature, max_tokens):
@@ -102,15 +101,52 @@ def generate_from_api(query, model_name, temperature, max_tokens):
102
  response = requests.post(model_api_url, headers=headers, json=payload)
103
  return response.json()[0]['generated_text']
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  with gr.Blocks(css='style.css') as demo:
106
  gr.HTML("""
107
  <div style="text-align: center; max-width: 1240px; margin: 0 auto;">
108
  <h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;">
109
  Different Strokes (Prompts) for Different Folks (LLMs)
110
  </h1>
 
111
  <h4 style="font-weight: 50; font-size: 14px; margin-bottom:0px; margin-top:0px;">
112
- After reading <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a>, which is an excellent guide on prompts for large language models (LLMs), specifically OpenAI's LLMs, I was interested in seeing the results with for other LLMs. Hence, did up a simple demonstration of different prompts for different popular LLMs of different sizes. The prompt examples are taken from the Prompt Engineering Guide, and the LLMs that you can select below are all available on Hugging Face. If you are interested in comparing them with the prompts from OpenAI's model, you can refer to the writeup in the <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a> itself.
113
  </h4>
 
 
 
 
114
  </div>
115
  """)
116
 
@@ -132,7 +168,14 @@ with gr.Blocks(css='style.css') as demo:
132
  )
133
 
134
  max_tokens = gr.Slider(
135
- 10, 250, step=1, value=100, label="Max. Tokens (in Output)",
 
 
 
 
 
 
 
136
  ).style(
137
  container=False,
138
  )
@@ -142,7 +185,7 @@ with gr.Blocks(css='style.css') as demo:
142
  label="Enter your prompt",
143
  show_label=False,
144
  # max_lines=2,
145
- placeholder="Select your prompt below",
146
  ).style(
147
  container=False,
148
  )
@@ -150,7 +193,7 @@ with gr.Blocks(css='style.css') as demo:
150
 
151
  with gr.Row():
152
  output=gr.Textbox(
153
- label="LLM Output",
154
  show_label=True)
155
 
156
  gr.HTML("""
@@ -222,7 +265,10 @@ with gr.Blocks(css='style.css') as demo:
222
  inputs=[prompt])
223
 
224
  # process.click(load_and_generate, inputs=[model_name, prompt], outputs=[output])
225
- process.click(generate_from_api, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
 
 
 
226
 
227
- # demo.launch(server_port=8080)
228
- demo.launch()
 
84
 
85
  return gen_text.replace("<pad>", "").replace("</s>", "")
86
 
 
87
  ### This code for the inference api ###
88
 
89
  def generate_from_api(query, model_name, temperature, max_tokens):
 
101
  response = requests.post(model_api_url, headers=headers, json=payload)
102
  return response.json()[0]['generated_text']
103
 
104
+ def generate_from_api_check(query, model_name, temperature, max_tokens):
105
+ headers = {f"Authorization": f"Bearer {HF_READ_API_KEY}",
106
+ "wait_for_model": "true",
107
+ "temperature": str(temperature),
108
+ "max_tokens": str(max_tokens),
109
+ "max_time": str(120)}
110
+
111
+ model_api_url = f"https://api-inference.huggingface.co/models/{model_name}"
112
+
113
+ payload = {"inputs": query}
114
+ response = requests.post(model_api_url, headers=headers, json=payload)
115
+ while response.status_code != 200:
116
+ response = requests.post(model_api_url, headers=headers, json=payload)
117
+
118
+ max_times = 20
119
+ gen_text = response.json()[0]['generated_text']
120
+ while maybe_is_truncated(gen_text) and max_times > 0:
121
+ headers = {f"Authorization": f"Bearer {HF_READ_API_KEY}",
122
+ "wait_for_model": "true",
123
+ "temperature": str(temperature),
124
+ "max_tokens": str(max_tokens + len(gen_text)),
125
+ "max_time": str(120)}
126
+ payload = {"inputs": query + ' ' + gen_text}
127
+ response = requests.post(model_api_url, headers=headers, json=payload)
128
+ while response.status_code != 200:
129
+ response = requests.post(model_api_url, headers=headers, json=payload)
130
+ gen_text = response.json()[0]['generated_text']
131
+ max_times -= 1
132
+
133
+ return gen_text
134
+
135
+
136
  with gr.Blocks(css='style.css') as demo:
137
  gr.HTML("""
138
  <div style="text-align: center; max-width: 1240px; margin: 0 auto;">
139
  <h1 style="font-weight: 200; font-size: 20px; margin-bottom:8px; margin-top:0px;">
140
  Different Strokes (Prompts) for Different Folks (LLMs)
141
  </h1>
142
+ <hr style="margin-bottom:5px; margin-top:5px;">
143
  <h4 style="font-weight: 50; font-size: 14px; margin-bottom:0px; margin-top:0px;">
144
+ After reading <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a>, which is a good guide when starting to learn about prompts for large language models (LLMs), specifically OpenAI's LLMs, I was interested in seeing the results with for other LLMs. Hence, did up a simple demonstration of different prompts for different popular LLMs of different sizes. The prompt examples are taken from the Prompt Engineering Guide, and the LLMs that you can select below are all available on Hugging Face. If you are interested in comparing them with the prompts from OpenAI's model, you can refer to the writeup in the <a href="https://github.com/dair-ai/Prompt-Engineering-Guide">Prompt Engineering Guide</a> itself.
145
  </h4>
146
+ <hr style="margin-bottom:5px; margin-top:5px;">
147
+ <h5 style="font-weight: 50; font-size: 12px; margin-bottom:0px; margin-top:0px;">
148
+ Note: Larger models will take a while, especially on the first run.
149
+ </h5>
150
  </div>
151
  """)
152
 
 
168
  )
169
 
170
  max_tokens = gr.Slider(
171
+ 10, 250, step=1, value=100, label="Max. tokens (in output)",
172
+ ).style(
173
+ container=False,
174
+ )
175
+
176
+ check_truncated = gr.Checkbox(
177
+ label="Check for truncated output",
178
+ value=False,
179
  ).style(
180
  container=False,
181
  )
 
185
  label="Enter your prompt",
186
  show_label=False,
187
  # max_lines=2,
188
+ placeholder="Select your prompt from the examples below",
189
  ).style(
190
  container=False,
191
  )
 
193
 
194
  with gr.Row():
195
  output=gr.Textbox(
196
+ label="LLM output",
197
  show_label=True)
198
 
199
  gr.HTML("""
 
265
  inputs=[prompt])
266
 
267
  # process.click(load_and_generate, inputs=[model_name, prompt], outputs=[output])
268
+ if check_truncated:
269
+ process.click(generate_from_api_check, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
270
+ else:
271
+ process.click(generate_from_api, inputs=[prompt, model_name, temperature, max_tokens], outputs=[output])
272
 
273
+ demo.launch(server_port=8080)
274
+ # demo.launch()