Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -88,7 +88,7 @@ vad_iterator = VADIterator(
|
|
88 |
speech_pad_ms=500,
|
89 |
)
|
90 |
|
91 |
-
|
92 |
def transcribe(stream, new_chunk):
|
93 |
sr, y = new_chunk
|
94 |
global text_str
|
@@ -119,19 +119,22 @@ def transcribe(stream, new_chunk):
|
|
119 |
# prompt = ASR_processor.batch_decode(
|
120 |
# pred_ids, skip_special_tokens=True, decode_with_timestamps=False
|
121 |
# )[0]
|
|
|
122 |
prompt=transcriber({"sampling_rate": sr, "raw": array})["text"]
|
123 |
print(prompt)
|
|
|
124 |
# prompt=ASR_model.transcribe(array)["text"].strip()
|
125 |
chat.append({"role": user_role, "content": prompt})
|
126 |
chat_messages = chat.to_list()
|
127 |
output=LM_pipe(
|
128 |
chat_messages,
|
129 |
-
max_new_tokens=
|
130 |
min_new_tokens=0,
|
131 |
temperature=0.0,
|
132 |
do_sample=False,
|
133 |
)
|
134 |
print(output)
|
|
|
135 |
generated_text = output[0]['generated_text'][-1]["content"]
|
136 |
print(generated_text)
|
137 |
# torch.mps.empty_cache()
|
@@ -142,6 +145,7 @@ def transcribe(stream, new_chunk):
|
|
142 |
audio_chunk = tts_model.tts_to_file(text_str, speaker_id, quiet=True)
|
143 |
audio_chunk = (audio_chunk * 32768).astype(np.int16)
|
144 |
audio_output=(44100, audio_chunk)
|
|
|
145 |
# else:
|
146 |
# audio_output=None
|
147 |
text_str1=text_str
|
|
|
88 |
speech_pad_ms=500,
|
89 |
)
|
90 |
|
91 |
+
import time
|
92 |
def transcribe(stream, new_chunk):
|
93 |
sr, y = new_chunk
|
94 |
global text_str
|
|
|
119 |
# prompt = ASR_processor.batch_decode(
|
120 |
# pred_ids, skip_special_tokens=True, decode_with_timestamps=False
|
121 |
# )[0]
|
122 |
+
start_time = time.time()
|
123 |
prompt=transcriber({"sampling_rate": sr, "raw": array})["text"]
|
124 |
print(prompt)
|
125 |
+
print("--- %s seconds ---" % (time.time() - start_time))
|
126 |
# prompt=ASR_model.transcribe(array)["text"].strip()
|
127 |
chat.append({"role": user_role, "content": prompt})
|
128 |
chat_messages = chat.to_list()
|
129 |
output=LM_pipe(
|
130 |
chat_messages,
|
131 |
+
max_new_tokens=32,
|
132 |
min_new_tokens=0,
|
133 |
temperature=0.0,
|
134 |
do_sample=False,
|
135 |
)
|
136 |
print(output)
|
137 |
+
print("--- %s seconds ---" % (time.time() - start_time))
|
138 |
generated_text = output[0]['generated_text'][-1]["content"]
|
139 |
print(generated_text)
|
140 |
# torch.mps.empty_cache()
|
|
|
145 |
audio_chunk = tts_model.tts_to_file(text_str, speaker_id, quiet=True)
|
146 |
audio_chunk = (audio_chunk * 32768).astype(np.int16)
|
147 |
audio_output=(44100, audio_chunk)
|
148 |
+
print("--- %s seconds ---" % (time.time() - start_time))
|
149 |
# else:
|
150 |
# audio_output=None
|
151 |
text_str1=text_str
|