Spaces:

WillHeld
/

diva-audio-chat

Running on Zero

Helw150 commited on Oct 15

Commit

987fe56

•

1 Parent(s): 7367b85

Loading

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ from transformers import AutoModel
 from transformers.modeling_outputs import CausalLMOutputWithPast
 orca = pvorca.create(access_key=os.environ.get("ORCA_KEY"))
 if gr.NO_RELOAD:
     diva_model = AutoModel.from_pretrained(
         "WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True
@@ -73,7 +73,15 @@ def response(state: AppState, audio: tuple):
     state.conversation.append(
         {"role": "user", "content": {"path": file_name, "mime_type": "audio/wav"}}
     )
-    state.conversation.append({"role": "assistant", "content": ""})
     yield state, state.conversation, None
     if spaces.config.Config.zero_gpu:
         if state.model_outs is not None:
@@ -96,6 +104,8 @@ def response(state: AppState, audio: tuple):
         prev_outs=(prev_outs if prev_outs is not None else None),
     ):
         prev_resp = state.conversation[-1]["content"]
         state.conversation[-1]["content"] = resp
         pcm = stream.synthesize(resp[len(prev_resp) :])
         audio_chunk = None

 from transformers.modeling_outputs import CausalLMOutputWithPast
 orca = pvorca.create(access_key=os.environ.get("ORCA_KEY"))
+LOADER_STR = "♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪loading♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪loading♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪♫"
 if gr.NO_RELOAD:
     diva_model = AutoModel.from_pretrained(
         "WillHeld/DiVA-llama-3-v0-8b", trust_remote_code=True
     state.conversation.append(
         {"role": "user", "content": {"path": file_name, "mime_type": "audio/wav"}}
     )
+    gr.Warning(
+        "The first response might take a second to generate as DiVA is loaded from Disk to the ZeroGPU!"
+    )
+    state.conversation.append(
+        {
+            "role": "assistant",
+            "content": LOADER_STR,
+        }
+    )
     yield state, state.conversation, None
     if spaces.config.Config.zero_gpu:
         if state.model_outs is not None:
         prev_outs=(prev_outs if prev_outs is not None else None),
     ):
         prev_resp = state.conversation[-1]["content"]
+        if prev_resp == LOADER_STR:
+            prev_resp = ""
         state.conversation[-1]["content"] = resp
         pcm = stream.synthesize(resp[len(prev_resp) :])
         audio_chunk = None