Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ import urllib
|
|
20 |
import PIL.Image
|
21 |
import io
|
22 |
import datasets
|
23 |
-
|
24 |
import gradio as gr
|
25 |
from transformers import TextIteratorStreamer
|
26 |
from transformers import Idefics2ForConditionalGeneration
|
@@ -70,39 +70,31 @@ theme = gr.themes.Soft(
|
|
70 |
background_fill_secondary_dark="#111111",
|
71 |
color_accent_soft_dark="transparent")
|
72 |
|
73 |
-
|
74 |
-
BATCH_SIZE = 10
|
75 |
-
|
76 |
-
device = 0 if torch.cuda.is_available() else "cpu"
|
77 |
|
78 |
-
|
79 |
-
task="automatic-speech-recognition",
|
80 |
-
model=MODEL_NAME,
|
81 |
-
chunk_length_s=30,
|
82 |
-
device=device,
|
83 |
-
)
|
84 |
-
|
85 |
-
@spaces.GPU(queue=False)
|
86 |
-
def transcribe(inputs):
|
87 |
-
if inputs is None:
|
88 |
-
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
89 |
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
92 |
|
93 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
94 |
|
95 |
def client_fn(model):
|
96 |
-
if "
|
97 |
-
return InferenceClient("
|
98 |
-
elif "
|
99 |
-
return InferenceClient("
|
100 |
elif "Mistral" in model:
|
101 |
return InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
102 |
elif "Phi" in model:
|
103 |
return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
|
|
|
|
|
104 |
else:
|
105 |
-
return InferenceClient("
|
106 |
|
107 |
def randomize_seed_fn(seed: int) -> int:
|
108 |
seed = random.randint(0, 999999)
|
@@ -117,16 +109,12 @@ def models(text, model="Mixtral 8x7B", seed=42):
|
|
117 |
|
118 |
client = client_fn(model)
|
119 |
generate_kwargs = dict(
|
120 |
-
temperature=0.7,
|
121 |
max_new_tokens=512,
|
122 |
-
top_p=0.95,
|
123 |
-
repetition_penalty=1,
|
124 |
-
do_sample=True,
|
125 |
seed=seed,
|
126 |
)
|
127 |
|
128 |
formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
|
129 |
-
stream =
|
130 |
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
131 |
output = ""
|
132 |
for response in stream:
|
@@ -135,9 +123,9 @@ def models(text, model="Mixtral 8x7B", seed=42):
|
|
135 |
|
136 |
return output
|
137 |
|
138 |
-
async def respond(audio):
|
139 |
user = transcribe(audio)
|
140 |
-
reply =
|
141 |
communicate = edge_tts.Communicate(reply)
|
142 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
143 |
tmp_path = tmp_file.name
|
@@ -548,14 +536,7 @@ with gr.Blocks(
|
|
548 |
|
549 |
with gr.Blocks() as voice:
|
550 |
with gr.Row():
|
551 |
-
select = gr.Dropdown([ 'Mixtral 8x7B',
|
552 |
-
'Llama 3 8B',
|
553 |
-
'Mistral 7B v0.3',
|
554 |
-
'Phi 3 mini',
|
555 |
-
],
|
556 |
-
value="Mixtral 8x7B",
|
557 |
-
label="Model"
|
558 |
-
)
|
559 |
seed = gr.Slider(
|
560 |
label="Seed",
|
561 |
minimum=0,
|
@@ -571,7 +552,7 @@ with gr.Blocks() as voice:
|
|
571 |
elem_classes="audio")
|
572 |
gr.Interface(
|
573 |
fn=respond,
|
574 |
-
inputs=[input],
|
575 |
outputs=[output], api_name="translate", live=True)
|
576 |
|
577 |
with gr.Blocks() as livechat:
|
|
|
20 |
import PIL.Image
|
21 |
import io
|
22 |
import datasets
|
23 |
+
from streaming_stt_nemo import Model as nemo
|
24 |
import gradio as gr
|
25 |
from transformers import TextIteratorStreamer
|
26 |
from transformers import Idefics2ForConditionalGeneration
|
|
|
70 |
background_fill_secondary_dark="#111111",
|
71 |
color_accent_soft_dark="transparent")
|
72 |
|
73 |
+
default_lang = "en"
|
|
|
|
|
|
|
74 |
|
75 |
+
engines = { default_lang: nemo(default_lang) }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
def transcribe(audio):
|
78 |
+
lang = "en"
|
79 |
+
model = engines[lang]
|
80 |
+
text = model.stt_file(audio)[0]
|
81 |
+
return text
|
82 |
|
83 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
84 |
|
85 |
def client_fn(model):
|
86 |
+
if "Nous" in model:
|
87 |
+
return InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
|
88 |
+
elif "Star" in model:
|
89 |
+
return InferenceClient("HuggingFaceH4/starchat2-15b-v0.1")
|
90 |
elif "Mistral" in model:
|
91 |
return InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
92 |
elif "Phi" in model:
|
93 |
return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
|
94 |
+
elif "Zephyr" in model:
|
95 |
+
return InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
96 |
else:
|
97 |
+
return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
98 |
|
99 |
def randomize_seed_fn(seed: int) -> int:
|
100 |
seed = random.randint(0, 999999)
|
|
|
109 |
|
110 |
client = client_fn(model)
|
111 |
generate_kwargs = dict(
|
|
|
112 |
max_new_tokens=512,
|
|
|
|
|
|
|
113 |
seed=seed,
|
114 |
)
|
115 |
|
116 |
formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
|
117 |
+
stream = client.text_generation(
|
118 |
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
119 |
output = ""
|
120 |
for response in stream:
|
|
|
123 |
|
124 |
return output
|
125 |
|
126 |
+
async def respond(audio, model, seed):
|
127 |
user = transcribe(audio)
|
128 |
+
reply = models(user, model, seed)
|
129 |
communicate = edge_tts.Communicate(reply)
|
130 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
131 |
tmp_path = tmp_file.name
|
|
|
536 |
|
537 |
with gr.Blocks() as voice:
|
538 |
with gr.Row():
|
539 |
+
select = gr.Dropdown([ 'Nous Hermes Mixtral 8x7B DPO', 'Mixtral 8x7B','StarChat2 15b','Mistral 7B v0.3','Phi 3 mini', 'Zephyr 7b' ], value="Mistral 7B v0.3", label="Select Model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
seed = gr.Slider(
|
541 |
label="Seed",
|
542 |
minimum=0,
|
|
|
552 |
elem_classes="audio")
|
553 |
gr.Interface(
|
554 |
fn=respond,
|
555 |
+
inputs=[input, select,seed],
|
556 |
outputs=[output], api_name="translate", live=True)
|
557 |
|
558 |
with gr.Blocks() as livechat:
|