Nils Durner commited on
Commit
43b6937
Β·
1 Parent(s): 59b8207

basic Whisper support

Browse files
Files changed (1) hide show
  1. app.py +82 -53
app.py CHANGED
@@ -12,6 +12,7 @@ log_to_console = False
12
 
13
  # constants
14
  image_embed_prefix = "πŸ–ΌοΈπŸ†™ "
 
15
 
16
  def encode_image(image_data):
17
  """Generates a prefix for image base64 data in the required format for the
@@ -74,9 +75,14 @@ def add_img(history, files):
74
  for file in files:
75
  if log_to_console:
76
  print(f"add_img {file.name}")
77
- history = history + [(image_embed_prefix + file.name, None)]
 
 
 
 
 
78
 
79
- gr.Info(f"Image added as {file.name}")
80
 
81
  return history
82
 
@@ -111,55 +117,78 @@ def bot(message, history, oai_key, system_prompt, seed, temperature, max_tokens,
111
  api_key=oai_key
112
  )
113
 
114
- seed_i = None
115
- if seed:
116
- seed_i = int(seed)
117
-
118
- if log_to_console:
119
- print(f"bot history: {str(history)}")
120
-
121
- history_openai_format = []
122
- user_msg_parts = []
123
- if system_prompt:
124
- history_openai_format.append({"role": "system", "content": system_prompt})
125
- for human, assi in history:
126
- if human is not None:
127
- if human.startswith(image_embed_prefix):
128
- with open(human.lstrip(image_embed_prefix), mode="rb") as f:
129
- content = f.read()
130
- user_msg_parts.append({"type": "image_url",
131
- "image_url":{"url": encode_image(content)}})
132
- else:
133
- user_msg_parts.append({"type": "text", "text": human})
134
-
135
- if assi is not None:
136
- if user_msg_parts:
137
- history_openai_format.append({"role": "user", "content": user_msg_parts})
138
- user_msg_parts = []
139
-
140
- history_openai_format.append({"role": "assistant", "content": assi})
141
-
142
- if message:
143
- user_msg_parts.append({"type": "text", "text": human})
144
-
145
- if user_msg_parts:
146
- history_openai_format.append({"role": "user", "content": user_msg_parts})
147
-
148
- if log_to_console:
149
- print(f"br_prompt: {str(history_openai_format)}")
150
-
151
- response = client.chat.completions.create(
152
- model=model,
153
- messages= history_openai_format,
154
- temperature=temperature,
155
- seed=seed_i,
156
- max_tokens=max_tokens
157
- )
158
-
159
- if log_to_console:
160
- print(f"br_response: {str(response)}")
161
-
162
- history[-1][1] = response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  if log_to_console:
164
  print(f"br_result: {str(history)}")
165
 
@@ -192,7 +221,7 @@ with gr.Blocks() as demo:
192
 
193
  oai_key = gr.Textbox(label="OpenAI API Key", elem_id="oai_key")
194
  model = gr.Dropdown(label="Model", value="gpt-4-turbo", allow_custom_value=True, elem_id="model",
195
- choices=["gpt-4-turbo", "gpt-4-turbo-preview", "gpt-4-1106-preview", "gpt-4", "gpt-4-vision-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-1106"])
196
  system_prompt = gr.TextArea("You are a helpful yet diligent AI assistant. Answer faithfully and factually correct. Respond with 'I do not know' if uncertain.", label="System Prompt", lines=3, max_lines=250, elem_id="system_prompt")
197
  seed = gr.Textbox(label="Seed", elem_id="seed")
198
  temp = gr.Slider(0, 1, label="Temperature", elem_id="temp", value=1)
@@ -245,7 +274,7 @@ with gr.Blocks() as demo:
245
 
246
  with gr.Row():
247
  btn = gr.UploadButton("πŸ“ Upload", size="sm", file_count="multiple")
248
- img_btn = gr.UploadButton("πŸ–ΌοΈ Upload", size="sm", file_count="multiple", file_types=["image"])
249
  undo_btn = gr.Button("↩️ Undo")
250
  undo_btn.click(undo, inputs=[chatbot], outputs=[chatbot])
251
 
 
12
 
13
  # constants
14
  image_embed_prefix = "πŸ–ΌοΈπŸ†™ "
15
+ audio_embed_prefix = "πŸŽ™οΈπŸ†™ "
16
 
17
  def encode_image(image_data):
18
  """Generates a prefix for image base64 data in the required format for the
 
75
  for file in files:
76
  if log_to_console:
77
  print(f"add_img {file.name}")
78
+
79
+ if file.name.endswith((".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")):
80
+ prefix = audio_embed_prefix
81
+ else:
82
+ prefix = image_embed_prefix
83
+ history = history + [(prefix + file.name, None)]
84
 
85
+ gr.Info(f"Media added as {file.name}")
86
 
87
  return history
88
 
 
117
  api_key=oai_key
118
  )
119
 
120
+ if model == "whisper":
121
+ result = ""
122
+ whisper_prompt = system_prompt
123
+ for human, assi in history:
124
+ if human is not None:
125
+ if human.startswith(audio_embed_prefix):
126
+ audio_fn = human.lstrip(audio_embed_prefix)
127
+ with open(audio_fn, "rb") as f:
128
+ transcription = client.audio.transcriptions.create(
129
+ model="whisper-1",
130
+ prompt=whisper_prompt,
131
+ file=f,
132
+ response_format="text"
133
+ )
134
+ whisper_prompt += f"\n{transcription}"
135
+ result += f"\n``` transcript {audio_fn}\n {transcription}\n```"
136
+ else:
137
+ whisper_prompt += f"\n{human}"
138
+ if assi is not None:
139
+ whisper_prompt += f"\n{assi}"
140
+ else:
141
+ seed_i = None
142
+ if seed:
143
+ seed_i = int(seed)
144
+
145
+ if log_to_console:
146
+ print(f"bot history: {str(history)}")
147
+
148
+ history_openai_format = []
149
+ user_msg_parts = []
150
+ if system_prompt:
151
+ history_openai_format.append({"role": "system", "content": system_prompt})
152
+ for human, assi in history:
153
+ if human is not None:
154
+ if human.startswith(image_embed_prefix):
155
+ with open(human.lstrip(image_embed_prefix), mode="rb") as f:
156
+ content = f.read()
157
+ user_msg_parts.append({"type": "image_url",
158
+ "image_url":{"url": encode_image(content)}})
159
+ else:
160
+ user_msg_parts.append({"type": "text", "text": human})
161
+
162
+ if assi is not None:
163
+ if user_msg_parts:
164
+ history_openai_format.append({"role": "user", "content": user_msg_parts})
165
+ user_msg_parts = []
166
+
167
+ history_openai_format.append({"role": "assistant", "content": assi})
168
+
169
+ if message:
170
+ user_msg_parts.append({"type": "text", "text": human})
171
+
172
+ if user_msg_parts:
173
+ history_openai_format.append({"role": "user", "content": user_msg_parts})
174
+
175
+ if log_to_console:
176
+ print(f"br_prompt: {str(history_openai_format)}")
177
+
178
+ response = client.chat.completions.create(
179
+ model=model,
180
+ messages= history_openai_format,
181
+ temperature=temperature,
182
+ seed=seed_i,
183
+ max_tokens=max_tokens
184
+ )
185
+
186
+ if log_to_console:
187
+ print(f"br_response: {str(response)}")
188
+
189
+ result = response.choices[0].message.content
190
+
191
+ history[-1][1] = result
192
  if log_to_console:
193
  print(f"br_result: {str(history)}")
194
 
 
221
 
222
  oai_key = gr.Textbox(label="OpenAI API Key", elem_id="oai_key")
223
  model = gr.Dropdown(label="Model", value="gpt-4-turbo", allow_custom_value=True, elem_id="model",
224
+ choices=["gpt-4-turbo", "gpt-4-turbo-preview", "gpt-4-1106-preview", "gpt-4", "gpt-4-vision-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-1106", "whisper"])
225
  system_prompt = gr.TextArea("You are a helpful yet diligent AI assistant. Answer faithfully and factually correct. Respond with 'I do not know' if uncertain.", label="System Prompt", lines=3, max_lines=250, elem_id="system_prompt")
226
  seed = gr.Textbox(label="Seed", elem_id="seed")
227
  temp = gr.Slider(0, 1, label="Temperature", elem_id="temp", value=1)
 
274
 
275
  with gr.Row():
276
  btn = gr.UploadButton("πŸ“ Upload", size="sm", file_count="multiple")
277
+ img_btn = gr.UploadButton("πŸ–ΌοΈ Upload", size="sm", file_count="multiple", file_types=["image", "audio"])
278
  undo_btn = gr.Button("↩️ Undo")
279
  undo_btn.click(undo, inputs=[chatbot], outputs=[chatbot])
280