Spaces:
Runtime error
Runtime error
Update duplex.py
Browse files
duplex.py
CHANGED
@@ -3,8 +3,11 @@ import json
|
|
3 |
import random
|
4 |
import string
|
5 |
|
|
|
6 |
import gradio as gr
|
7 |
import requests
|
|
|
|
|
8 |
from transformers import pipeline, set_seed
|
9 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
10 |
import logging
|
@@ -14,11 +17,14 @@ import gradio as gr
|
|
14 |
from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
|
15 |
|
16 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
17 |
-
HF_AUTH_TOKEN = os.environ.get("HF_AUTH_TOKEN", None)
|
18 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
|
|
19 |
|
20 |
HEADER = """
|
21 |
# Poor Man's Duplex
|
|
|
|
|
|
|
22 |
""".strip()
|
23 |
|
24 |
FOOTER = """
|
@@ -45,7 +51,8 @@ def generate_es(text, **kwargs):
|
|
45 |
api_uri = "https://hf.space/embed/bertin-project/bertin-gpt-j-6B/+/api/predict/"
|
46 |
response = requests.post(api_uri, data=json.dumps({"data": [text, 100, 100, 50, 0.95, True, True]}))
|
47 |
if response.ok:
|
48 |
-
|
|
|
49 |
return response.json()["data"][0]
|
50 |
else:
|
51 |
return ""
|
@@ -65,9 +72,14 @@ speak_en = gr.Interface.load(f"huggingface/{tts_model_name}")
|
|
65 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
66 |
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
|
67 |
|
|
|
|
|
|
|
|
|
68 |
def generate_en(text, **kwargs):
|
69 |
response = generate_iface(text)
|
70 |
-
|
|
|
71 |
return response or ""
|
72 |
|
73 |
|
@@ -97,11 +109,28 @@ def select_lang_vars(lang):
|
|
97 |
return AGENT, USER, CONTEXT
|
98 |
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
def chat_with_gpt(lang, agent, user, context, audio_in, history):
|
|
|
|
|
102 |
generate, transcribe, speak = select_lang(lang)
|
103 |
AGENT, USER, _ = select_lang_vars(lang)
|
104 |
-
user_message = transcribe(audio_in)
|
105 |
# agent = AGENT
|
106 |
# user = USER
|
107 |
generation_kwargs = {
|
@@ -156,17 +185,17 @@ def chat_with_gpt(lang, agent, user, context, audio_in, history):
|
|
156 |
if not response.strip():
|
157 |
response = "Lo siento, no puedo hablar ahora" if lang.lower() == "Spanish" else "Sorry, can't talk right now"
|
158 |
history.append((user_message, response))
|
159 |
-
return history, history, speak(response)
|
160 |
|
161 |
|
162 |
with gr.Blocks() as demo:
|
163 |
gr.Markdown(HEADER)
|
164 |
-
lang = gr.Radio(label="Language", choices=["English", "Spanish"],
|
165 |
-
AGENT, USER, CONTEXT = select_lang_vars(
|
166 |
context = gr.Textbox(label="Context", lines=5, value=CONTEXT)
|
167 |
with gr.Row():
|
168 |
audio_in = gr.Audio(label="User", source="microphone", type="filepath")
|
169 |
-
audio_out = gr.Audio(label="Agent", interactive=False)
|
170 |
# chat_btn = gr.Button("Submit")
|
171 |
with gr.Row():
|
172 |
user = gr.Textbox(label="User", value=USER)
|
@@ -175,7 +204,8 @@ with gr.Blocks() as demo:
|
|
175 |
history = gr.Variable(value=[])
|
176 |
chatbot = gr.Variable() # gr.Chatbot(color_map=("green", "gray"), visible=False)
|
177 |
# chat_btn.click(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out])
|
178 |
-
|
|
|
179 |
gr.Markdown(FOOTER)
|
180 |
|
181 |
demo.launch()
|
|
|
3 |
import random
|
4 |
import string
|
5 |
|
6 |
+
import numpy as np
|
7 |
import gradio as gr
|
8 |
import requests
|
9 |
+
import soundfile as sf
|
10 |
+
|
11 |
from transformers import pipeline, set_seed
|
12 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
13 |
import logging
|
|
|
17 |
from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
|
18 |
|
19 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
|
|
20 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
21 |
+
DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
|
22 |
|
23 |
HEADER = """
|
24 |
# Poor Man's Duplex
|
25 |
+
|
26 |
+
Talk to a language model like you talk on a Walkie-Talkie! Well, with larger latencies.
|
27 |
+
The models are [EleutherAI's GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) for English, and [BERTIN GPT-J-6B](https://huggingface.co/bertin-project/bertin-gpt-j-6B) for Spanish.
|
28 |
""".strip()
|
29 |
|
30 |
FOOTER = """
|
|
|
51 |
api_uri = "https://hf.space/embed/bertin-project/bertin-gpt-j-6B/+/api/predict/"
|
52 |
response = requests.post(api_uri, data=json.dumps({"data": [text, 100, 100, 50, 0.95, True, True]}))
|
53 |
if response.ok:
|
54 |
+
if DEBUG:
|
55 |
+
print(response.json())
|
56 |
return response.json()["data"][0]
|
57 |
else:
|
58 |
return ""
|
|
|
72 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
73 |
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
|
74 |
|
75 |
+
empty_audio = 'empty.flac'
|
76 |
+
sf.write(empty_audio, [], 16000)
|
77 |
+
deuncase = gr.Interface.load("huggingface/pere/DeUnCaser")
|
78 |
+
|
79 |
def generate_en(text, **kwargs):
|
80 |
response = generate_iface(text)
|
81 |
+
if DEBUG:
|
82 |
+
print(response)
|
83 |
return response or ""
|
84 |
|
85 |
|
|
|
109 |
return AGENT, USER, CONTEXT
|
110 |
|
111 |
|
112 |
+
def format_chat(history):
|
113 |
+
interventions = []
|
114 |
+
for user, bot in history:
|
115 |
+
interventions.append(f"""
|
116 |
+
<div data-testid="user" style="background-color:#16a34a" class="px-3 py-2 rounded-[22px] rounded-bl-none place-self-start text-white ml-7 text-sm">{user}</div>
|
117 |
+
<div data-testid="bot" style="background-color:gray" class="px-3 py-2 rounded-[22px] rounded-br-none text-white ml-7 text-sm">{bot}</div>
|
118 |
+
""")
|
119 |
+
return f"""<details><summary>Conversation log</summary>
|
120 |
+
<div class="overflow-y-auto h-[40vh]">
|
121 |
+
<div class="flex flex-col items-end space-y-4 p-3">
|
122 |
+
{"".join(interventions)}
|
123 |
+
</div>
|
124 |
+
</div>
|
125 |
+
</summary>"""
|
126 |
+
|
127 |
|
128 |
def chat_with_gpt(lang, agent, user, context, audio_in, history):
|
129 |
+
if not audio_in:
|
130 |
+
return history, history, empty_audio, format_chat(history)
|
131 |
generate, transcribe, speak = select_lang(lang)
|
132 |
AGENT, USER, _ = select_lang_vars(lang)
|
133 |
+
user_message = deuncase(transcribe(audio_in))
|
134 |
# agent = AGENT
|
135 |
# user = USER
|
136 |
generation_kwargs = {
|
|
|
185 |
if not response.strip():
|
186 |
response = "Lo siento, no puedo hablar ahora" if lang.lower() == "Spanish" else "Sorry, can't talk right now"
|
187 |
history.append((user_message, response))
|
188 |
+
return history, history, speak(response), format_chat(history)
|
189 |
|
190 |
|
191 |
with gr.Blocks() as demo:
|
192 |
gr.Markdown(HEADER)
|
193 |
+
lang = gr.Radio(label="Language", choices=["English", "Spanish"], value=DEFAULT_LANG, type="value")
|
194 |
+
AGENT, USER, CONTEXT = select_lang_vars(DEFAULT_LANG)
|
195 |
context = gr.Textbox(label="Context", lines=5, value=CONTEXT)
|
196 |
with gr.Row():
|
197 |
audio_in = gr.Audio(label="User", source="microphone", type="filepath")
|
198 |
+
audio_out = gr.Audio(label="Agent", interactive=False, value=empty_audio)
|
199 |
# chat_btn = gr.Button("Submit")
|
200 |
with gr.Row():
|
201 |
user = gr.Textbox(label="User", value=USER)
|
|
|
204 |
history = gr.Variable(value=[])
|
205 |
chatbot = gr.Variable() # gr.Chatbot(color_map=("green", "gray"), visible=False)
|
206 |
# chat_btn.click(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out])
|
207 |
+
log = gr.HTML()
|
208 |
+
audio_in.change(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out, log])
|
209 |
gr.Markdown(FOOTER)
|
210 |
|
211 |
demo.launch()
|