Spaces:
Running
on
Zero
Running
on
Zero
Helw150
commited on
Commit
•
67da1a1
1
Parent(s):
3583d5c
Add Buffering to Avoid Speech Gaps due to Orca Slowdown
Browse files- app.py +9 -5
- requirements.txt +1 -1
app.py
CHANGED
@@ -69,7 +69,7 @@ def response(state: AppState, audio: tuple):
|
|
69 |
if not audio:
|
70 |
return AppState()
|
71 |
|
72 |
-
file_name = f"
|
73 |
|
74 |
sf.write(file_name, audio[1], audio[0], format="wav")
|
75 |
|
@@ -103,7 +103,8 @@ def response(state: AppState, audio: tuple):
|
|
103 |
state.model_outs = None
|
104 |
prev_outs = causal_outs
|
105 |
stream = orca.stream_open()
|
106 |
-
|
|
|
107 |
for resp, outs in diva_audio(
|
108 |
(audio[0], audio[1]),
|
109 |
prev_outs=(prev_outs if prev_outs is not None else None),
|
@@ -112,15 +113,18 @@ def response(state: AppState, audio: tuple):
|
|
112 |
if prev_resp == LOADER_STR:
|
113 |
prev_resp = ""
|
114 |
state.conversation[-1]["content"] = resp
|
115 |
-
pcm = stream.synthesize(resp[len(prev_resp) :])
|
116 |
audio_chunk = None
|
|
|
117 |
if pcm is not None:
|
|
|
|
|
118 |
mp3_io = io.BytesIO()
|
119 |
sf.write(
|
120 |
-
mp3_io, np.asarray(
|
121 |
)
|
122 |
audio_chunk = mp3_io.getvalue()
|
123 |
mp3_io.close()
|
|
|
124 |
yield state, state.conversation, audio_chunk
|
125 |
|
126 |
del outs.logits
|
@@ -256,4 +260,4 @@ with gr.Blocks(theme=theme, js=js) as demo:
|
|
256 |
)
|
257 |
|
258 |
if __name__ == "__main__":
|
259 |
-
demo.launch()
|
|
|
69 |
if not audio:
|
70 |
return AppState()
|
71 |
|
72 |
+
file_name = f"./{xxhash.xxh32(bytes(audio[1])).hexdigest()}.wav"
|
73 |
|
74 |
sf.write(file_name, audio[1], audio[0], format="wav")
|
75 |
|
|
|
103 |
state.model_outs = None
|
104 |
prev_outs = causal_outs
|
105 |
stream = orca.stream_open()
|
106 |
+
i = 0
|
107 |
+
buff = []
|
108 |
for resp, outs in diva_audio(
|
109 |
(audio[0], audio[1]),
|
110 |
prev_outs=(prev_outs if prev_outs is not None else None),
|
|
|
113 |
if prev_resp == LOADER_STR:
|
114 |
prev_resp = ""
|
115 |
state.conversation[-1]["content"] = resp
|
|
|
116 |
audio_chunk = None
|
117 |
+
pcm = stream.synthesize(resp[len(prev_resp) :])
|
118 |
if pcm is not None:
|
119 |
+
buff.extend(pcm)
|
120 |
+
if len(buff) > (orca.sample_rate*2):
|
121 |
mp3_io = io.BytesIO()
|
122 |
sf.write(
|
123 |
+
mp3_io, np.asarray(buff[:orca.sample_rate]).astype(np.int16), orca.sample_rate, format="mp3"
|
124 |
)
|
125 |
audio_chunk = mp3_io.getvalue()
|
126 |
mp3_io.close()
|
127 |
+
buff = buff[orca.sample_rate:]
|
128 |
yield state, state.conversation, audio_chunk
|
129 |
|
130 |
del outs.logits
|
|
|
260 |
)
|
261 |
|
262 |
if __name__ == "__main__":
|
263 |
+
demo.launch(share=True)
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
transformers==4.43.3
|
2 |
-
gradio==5.0
|
3 |
spaces
|
4 |
accelerate
|
5 |
|
|
|
1 |
transformers==4.43.3
|
2 |
+
gradio==5.1.0
|
3 |
spaces
|
4 |
accelerate
|
5 |
|