versae commited on
Commit
91c36f3
1 Parent(s): 11abfd0

Update duplex.py

Browse files
Files changed (1) hide show
  1. duplex.py +39 -9
duplex.py CHANGED
@@ -3,8 +3,11 @@ import json
3
  import random
4
  import string
5
 
 
6
  import gradio as gr
7
  import requests
 
 
8
  from transformers import pipeline, set_seed
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
  import logging
@@ -14,11 +17,14 @@ import gradio as gr
14
  from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
15
 
16
  DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
17
- HF_AUTH_TOKEN = os.environ.get("HF_AUTH_TOKEN", None)
18
  MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
 
19
 
20
  HEADER = """
21
  # Poor Man's Duplex
 
 
 
22
  """.strip()
23
 
24
  FOOTER = """
@@ -45,7 +51,8 @@ def generate_es(text, **kwargs):
45
  api_uri = "https://hf.space/embed/bertin-project/bertin-gpt-j-6B/+/api/predict/"
46
  response = requests.post(api_uri, data=json.dumps({"data": [text, 100, 100, 50, 0.95, True, True]}))
47
  if response.ok:
48
- print(response.json())
 
49
  return response.json()["data"][0]
50
  else:
51
  return ""
@@ -65,9 +72,14 @@ speak_en = gr.Interface.load(f"huggingface/{tts_model_name}")
65
  transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
66
  generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
67
 
 
 
 
 
68
  def generate_en(text, **kwargs):
69
  response = generate_iface(text)
70
- print(response)
 
71
  return response or ""
72
 
73
 
@@ -97,11 +109,28 @@ def select_lang_vars(lang):
97
  return AGENT, USER, CONTEXT
98
 
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  def chat_with_gpt(lang, agent, user, context, audio_in, history):
 
 
102
  generate, transcribe, speak = select_lang(lang)
103
  AGENT, USER, _ = select_lang_vars(lang)
104
- user_message = transcribe(audio_in)
105
  # agent = AGENT
106
  # user = USER
107
  generation_kwargs = {
@@ -156,17 +185,17 @@ def chat_with_gpt(lang, agent, user, context, audio_in, history):
156
  if not response.strip():
157
  response = "Lo siento, no puedo hablar ahora" if lang.lower() == "Spanish" else "Sorry, can't talk right now"
158
  history.append((user_message, response))
159
- return history, history, speak(response)
160
 
161
 
162
  with gr.Blocks() as demo:
163
  gr.Markdown(HEADER)
164
- lang = gr.Radio(label="Language", choices=["English", "Spanish"], default="English", type="value")
165
- AGENT, USER, CONTEXT = select_lang_vars("English")
166
  context = gr.Textbox(label="Context", lines=5, value=CONTEXT)
167
  with gr.Row():
168
  audio_in = gr.Audio(label="User", source="microphone", type="filepath")
169
- audio_out = gr.Audio(label="Agent", interactive=False)
170
  # chat_btn = gr.Button("Submit")
171
  with gr.Row():
172
  user = gr.Textbox(label="User", value=USER)
@@ -175,7 +204,8 @@ with gr.Blocks() as demo:
175
  history = gr.Variable(value=[])
176
  chatbot = gr.Variable() # gr.Chatbot(color_map=("green", "gray"), visible=False)
177
  # chat_btn.click(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out])
178
- audio_in.change(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out])
 
179
  gr.Markdown(FOOTER)
180
 
181
  demo.launch()
 
3
  import random
4
  import string
5
 
6
+ import numpy as np
7
  import gradio as gr
8
  import requests
9
+ import soundfile as sf
10
+
11
  from transformers import pipeline, set_seed
12
  from transformers import AutoTokenizer, AutoModelForCausalLM
13
  import logging
 
17
  from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
18
 
19
  DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
 
20
  MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
21
+ DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
22
 
23
  HEADER = """
24
  # Poor Man's Duplex
25
+
26
+ Talk to a language model like you talk on a Walkie-Talkie! Well, with larger latencies.
27
+ The models are [EleutherAI's GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) for English, and [BERTIN GPT-J-6B](https://huggingface.co/bertin-project/bertin-gpt-j-6B) for Spanish.
28
  """.strip()
29
 
30
  FOOTER = """
 
51
  api_uri = "https://hf.space/embed/bertin-project/bertin-gpt-j-6B/+/api/predict/"
52
  response = requests.post(api_uri, data=json.dumps({"data": [text, 100, 100, 50, 0.95, True, True]}))
53
  if response.ok:
54
+ if DEBUG:
55
+ print(response.json())
56
  return response.json()["data"][0]
57
  else:
58
  return ""
 
72
  transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
73
  generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
74
 
75
+ empty_audio = 'empty.flac'
76
+ sf.write(empty_audio, [], 16000)
77
+ deuncase = gr.Interface.load("huggingface/pere/DeUnCaser")
78
+
79
  def generate_en(text, **kwargs):
80
  response = generate_iface(text)
81
+ if DEBUG:
82
+ print(response)
83
  return response or ""
84
 
85
 
 
109
  return AGENT, USER, CONTEXT
110
 
111
 
112
+ def format_chat(history):
113
+ interventions = []
114
+ for user, bot in history:
115
+ interventions.append(f"""
116
+ <div data-testid="user" style="background-color:#16a34a" class="px-3 py-2 rounded-[22px] rounded-bl-none place-self-start text-white ml-7 text-sm">{user}</div>
117
+ <div data-testid="bot" style="background-color:gray" class="px-3 py-2 rounded-[22px] rounded-br-none text-white ml-7 text-sm">{bot}</div>
118
+ """)
119
+ return f"""<details><summary>Conversation log</summary>
120
+ <div class="overflow-y-auto h-[40vh]">
121
+ <div class="flex flex-col items-end space-y-4 p-3">
122
+ {"".join(interventions)}
123
+ </div>
124
+ </div>
125
+ </summary>"""
126
+
127
 
128
  def chat_with_gpt(lang, agent, user, context, audio_in, history):
129
+ if not audio_in:
130
+ return history, history, empty_audio, format_chat(history)
131
  generate, transcribe, speak = select_lang(lang)
132
  AGENT, USER, _ = select_lang_vars(lang)
133
+ user_message = deuncase(transcribe(audio_in))
134
  # agent = AGENT
135
  # user = USER
136
  generation_kwargs = {
 
185
  if not response.strip():
186
  response = "Lo siento, no puedo hablar ahora" if lang.lower() == "Spanish" else "Sorry, can't talk right now"
187
  history.append((user_message, response))
188
+ return history, history, speak(response), format_chat(history)
189
 
190
 
191
  with gr.Blocks() as demo:
192
  gr.Markdown(HEADER)
193
+ lang = gr.Radio(label="Language", choices=["English", "Spanish"], value=DEFAULT_LANG, type="value")
194
+ AGENT, USER, CONTEXT = select_lang_vars(DEFAULT_LANG)
195
  context = gr.Textbox(label="Context", lines=5, value=CONTEXT)
196
  with gr.Row():
197
  audio_in = gr.Audio(label="User", source="microphone", type="filepath")
198
+ audio_out = gr.Audio(label="Agent", interactive=False, value=empty_audio)
199
  # chat_btn = gr.Button("Submit")
200
  with gr.Row():
201
  user = gr.Textbox(label="User", value=USER)
 
204
  history = gr.Variable(value=[])
205
  chatbot = gr.Variable() # gr.Chatbot(color_map=("green", "gray"), visible=False)
206
  # chat_btn.click(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out])
207
+ log = gr.HTML()
208
+ audio_in.change(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out, log])
209
  gr.Markdown(FOOTER)
210
 
211
  demo.launch()