Spaces:
Running
on
Zero
Running
on
Zero
Commit
•
9b12ac3
1
Parent(s):
b8d8555
Final
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ from pydub import AudioSegment
|
|
13 |
from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
|
14 |
from huggingface_hub import InferenceClient
|
15 |
from streamer import ParlerTTSStreamer
|
|
|
16 |
|
17 |
|
18 |
device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
@@ -75,13 +76,12 @@ def generate_response(audio):
|
|
75 |
"For example, 'Magic 8 Ball, should I get a dog?', 'A dog is ready for you but are you ready for the dog?'")},
|
76 |
{"role": "user", "content": f"Magic 8 Ball please answer this question - {question}"}]
|
77 |
|
78 |
-
response = client.chat_completion(messages, max_tokens=64, seed=random.randint(1, 5000),
|
79 |
-
model="mistralai/Mistral-7B-Instruct-v0.3")
|
80 |
response = response.choices[0].message.content.replace("Magic 8 Ball", "")
|
81 |
return response, None, None
|
82 |
|
83 |
@spaces.GPU
|
84 |
-
def
|
85 |
|
86 |
play_steps_in_s = 2.0
|
87 |
play_steps = int(frame_rate * play_steps_in_s)
|
@@ -104,31 +104,28 @@ def generate_base(answer):
|
|
104 |
set_seed(SEED)
|
105 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
106 |
thread.start()
|
107 |
-
|
108 |
for new_audio in streamer:
|
109 |
-
print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds")
|
110 |
yield answer, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
|
111 |
|
112 |
-
css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
|
113 |
-
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
|
114 |
-
|
115 |
|
116 |
with gr.Blocks() as block:
|
117 |
gr.HTML(
|
118 |
f"""
|
119 |
<h1 style='text-align: center;'> Magic 8 Ball 🎱 </h1>
|
|
|
120 |
<p style='text-align: center;'> Powered by <a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a>
|
121 |
"""
|
122 |
)
|
123 |
with gr.Group():
|
124 |
with gr.Row():
|
125 |
-
audio_out = gr.Audio(
|
126 |
-
answer = gr.Textbox(
|
127 |
state = gr.State()
|
128 |
-
|
129 |
with gr.Row():
|
130 |
audio_in = gr.Audio(label="Speak you question", sources="microphone", type="filepath")
|
131 |
|
132 |
-
audio_in.stop_recording(generate_response, audio_in, [state, answer, audio_out]).then(fn=
|
133 |
|
134 |
block.launch()
|
|
|
13 |
from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
|
14 |
from huggingface_hub import InferenceClient
|
15 |
from streamer import ParlerTTSStreamer
|
16 |
+
import time
|
17 |
|
18 |
|
19 |
device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
|
|
76 |
"For example, 'Magic 8 Ball, should I get a dog?', 'A dog is ready for you but are you ready for the dog?'")},
|
77 |
{"role": "user", "content": f"Magic 8 Ball please answer this question - {question}"}]
|
78 |
|
79 |
+
response = client.chat_completion(messages, max_tokens=64, seed=random.randint(1, 5000), model="mistralai/Mistral-7B-Instruct-v0.3")
|
|
|
80 |
response = response.choices[0].message.content.replace("Magic 8 Ball", "")
|
81 |
return response, None, None
|
82 |
|
83 |
@spaces.GPU
|
84 |
+
def read_response(answer):
|
85 |
|
86 |
play_steps_in_s = 2.0
|
87 |
play_steps = int(frame_rate * play_steps_in_s)
|
|
|
104 |
set_seed(SEED)
|
105 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
106 |
thread.start()
|
107 |
+
start = time.time()
|
108 |
for new_audio in streamer:
|
109 |
+
print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds after {time.time() - start} seconds")
|
110 |
yield answer, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
|
111 |
|
|
|
|
|
|
|
112 |
|
113 |
with gr.Blocks() as block:
|
114 |
gr.HTML(
|
115 |
f"""
|
116 |
<h1 style='text-align: center;'> Magic 8 Ball 🎱 </h1>
|
117 |
+
<h3 style='text-align: center;'> Ask a question and receive wisdom </h3>
|
118 |
<p style='text-align: center;'> Powered by <a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a>
|
119 |
"""
|
120 |
)
|
121 |
with gr.Group():
|
122 |
with gr.Row():
|
123 |
+
audio_out = gr.Audio(label="Spoken Answer", streaming=True, autoplay=True, loop=False)
|
124 |
+
answer = gr.Textbox(label="Answer")
|
125 |
state = gr.State()
|
|
|
126 |
with gr.Row():
|
127 |
audio_in = gr.Audio(label="Speak you question", sources="microphone", type="filepath")
|
128 |
|
129 |
+
audio_in.stop_recording(generate_response, audio_in, [state, answer, audio_out]).then(fn=read_response, inputs=state, outputs=[answer, audio_out])
|
130 |
|
131 |
block.launch()
|