freddyaboulton HF staff commited on
Commit
9b12ac3
1 Parent(s): b8d8555
Files changed (1) hide show
  1. app.py +9 -12
app.py CHANGED
@@ -13,6 +13,7 @@ from pydub import AudioSegment
13
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
14
  from huggingface_hub import InferenceClient
15
  from streamer import ParlerTTSStreamer
 
16
 
17
 
18
  device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
@@ -75,13 +76,12 @@ def generate_response(audio):
75
  "For example, 'Magic 8 Ball, should I get a dog?', 'A dog is ready for you but are you ready for the dog?'")},
76
  {"role": "user", "content": f"Magic 8 Ball please answer this question - {question}"}]
77
 
78
- response = client.chat_completion(messages, max_tokens=64, seed=random.randint(1, 5000),
79
- model="mistralai/Mistral-7B-Instruct-v0.3")
80
  response = response.choices[0].message.content.replace("Magic 8 Ball", "")
81
  return response, None, None
82
 
83
  @spaces.GPU
84
- def generate_base(answer):
85
 
86
  play_steps_in_s = 2.0
87
  play_steps = int(frame_rate * play_steps_in_s)
@@ -104,31 +104,28 @@ def generate_base(answer):
104
  set_seed(SEED)
105
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
106
  thread.start()
107
-
108
  for new_audio in streamer:
109
- print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds")
110
  yield answer, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
111
 
112
- css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
113
- .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
114
-
115
 
116
  with gr.Blocks() as block:
117
  gr.HTML(
118
  f"""
119
  <h1 style='text-align: center;'> Magic 8 Ball 🎱 </h1>
 
120
  <p style='text-align: center;'> Powered by <a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a>
121
  """
122
  )
123
  with gr.Group():
124
  with gr.Row():
125
- audio_out = gr.Audio(visible=True, streaming=True, autoplay=True)
126
- answer = gr.Textbox(visible=True, label="Answer")
127
  state = gr.State()
128
-
129
  with gr.Row():
130
  audio_in = gr.Audio(label="Speak you question", sources="microphone", type="filepath")
131
 
132
- audio_in.stop_recording(generate_response, audio_in, [state, answer, audio_out]).then(fn=generate_base, inputs=state, outputs=[answer, audio_out])
133
 
134
  block.launch()
 
13
  from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
14
  from huggingface_hub import InferenceClient
15
  from streamer import ParlerTTSStreamer
16
+ import time
17
 
18
 
19
  device = "cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
 
76
  "For example, 'Magic 8 Ball, should I get a dog?', 'A dog is ready for you but are you ready for the dog?'")},
77
  {"role": "user", "content": f"Magic 8 Ball please answer this question - {question}"}]
78
 
79
+ response = client.chat_completion(messages, max_tokens=64, seed=random.randint(1, 5000), model="mistralai/Mistral-7B-Instruct-v0.3")
 
80
  response = response.choices[0].message.content.replace("Magic 8 Ball", "")
81
  return response, None, None
82
 
83
  @spaces.GPU
84
+ def read_response(answer):
85
 
86
  play_steps_in_s = 2.0
87
  play_steps = int(frame_rate * play_steps_in_s)
 
104
  set_seed(SEED)
105
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
106
  thread.start()
107
+ start = time.time()
108
  for new_audio in streamer:
109
+ print(f"Sample of length: {round(new_audio.shape[0] / sampling_rate, 2)} seconds after {time.time() - start} seconds")
110
  yield answer, numpy_to_mp3(new_audio, sampling_rate=sampling_rate)
111
 
 
 
 
112
 
113
  with gr.Blocks() as block:
114
  gr.HTML(
115
  f"""
116
  <h1 style='text-align: center;'> Magic 8 Ball 🎱 </h1>
117
+ <h3 style='text-align: center;'> Ask a question and receive wisdom </h3>
118
  <p style='text-align: center;'> Powered by <a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a>
119
  """
120
  )
121
  with gr.Group():
122
  with gr.Row():
123
+ audio_out = gr.Audio(label="Spoken Answer", streaming=True, autoplay=True, loop=False)
124
+ answer = gr.Textbox(label="Answer")
125
  state = gr.State()
 
126
  with gr.Row():
127
  audio_in = gr.Audio(label="Speak you question", sources="microphone", type="filepath")
128
 
129
+ audio_in.stop_recording(generate_response, audio_in, [state, answer, audio_out]).then(fn=read_response, inputs=state, outputs=[answer, audio_out])
130
 
131
  block.launch()