Spaces:
Running
on
Zero
Running
on
Zero
Added models in Voice chat and Improved UI
Browse files
app.py
CHANGED
@@ -54,9 +54,20 @@ def videochat(image3, prompt3):
|
|
54 |
decoded_text = decoded_text[:-10]
|
55 |
yield decoded_text
|
56 |
|
57 |
-
theme = gr.themes.
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
MODEL_NAME = "openai/whisper-medium"
|
62 |
BATCH_SIZE = 10
|
@@ -78,18 +89,39 @@ def transcribe(inputs):
|
|
78 |
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"})["text"]
|
79 |
return text
|
80 |
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
system_instructions1 = "[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. You will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
|
84 |
|
85 |
-
def
|
|
|
|
|
|
|
|
|
|
|
86 |
generate_kwargs = dict(
|
87 |
temperature=0.7,
|
88 |
max_new_tokens=512,
|
89 |
top_p=0.95,
|
90 |
repetition_penalty=1,
|
91 |
do_sample=True,
|
92 |
-
seed=
|
93 |
)
|
94 |
|
95 |
formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
|
@@ -115,7 +147,7 @@ DEVICE = torch.device("cuda")
|
|
115 |
MODELS = {
|
116 |
"idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
|
117 |
"HuggingFaceM4/idefics2-8b-chatty",
|
118 |
-
torch_dtype=torch.
|
119 |
_attn_implementation="flash_attention_2",
|
120 |
).to(DEVICE),
|
121 |
}
|
@@ -521,16 +553,12 @@ with gr.Blocks() as voice:
|
|
521 |
autoplay=True,
|
522 |
elem_classes="audio")
|
523 |
gr.Interface(
|
524 |
-
batch=True,
|
525 |
-
max_batch_size=10,
|
526 |
fn=respond,
|
527 |
inputs=[input],
|
528 |
-
outputs=[output], live=True)
|
529 |
|
530 |
with gr.Blocks() as livechat:
|
531 |
gr.Interface(
|
532 |
-
batch=True,
|
533 |
-
max_batch_size=10,
|
534 |
fn=videochat,
|
535 |
inputs=[gr.Image(type="pil",sources="webcam", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
|
536 |
outputs=gr.Textbox(label="Answer")
|
|
|
54 |
decoded_text = decoded_text[:-10]
|
55 |
yield decoded_text
|
56 |
|
57 |
+
theme = gr.themes.Soft(
|
58 |
+
primary_hue="blue",
|
59 |
+
secondary_hue="orange",
|
60 |
+
neutral_hue="gray",
|
61 |
+
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif']).set(
|
62 |
+
body_background_fill_dark="#111111",
|
63 |
+
block_background_fill_dark="#111111",
|
64 |
+
block_border_width="1px",
|
65 |
+
block_title_background_fill_dark="#1e1c26",
|
66 |
+
input_background_fill_dark="#292733",
|
67 |
+
button_secondary_background_fill_dark="#24212b",
|
68 |
+
border_color_primary_dark="#343140",
|
69 |
+
background_fill_secondary_dark="#111111",
|
70 |
+
color_accent_soft_dark="transparent")
|
71 |
|
72 |
MODEL_NAME = "openai/whisper-medium"
|
73 |
BATCH_SIZE = 10
|
|
|
89 |
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"})["text"]
|
90 |
return text
|
91 |
|
92 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
93 |
+
|
94 |
+
def client_fn(model):
|
95 |
+
if "Mixtral" in model:
|
96 |
+
return InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
97 |
+
elif "Llama" in model:
|
98 |
+
return InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
|
99 |
+
elif "Mistral" in model:
|
100 |
+
return InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
101 |
+
elif "Phi" in model:
|
102 |
+
return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
|
103 |
+
else:
|
104 |
+
return InferenceClient("microsoft/Phi-3-mini-4k-instruct")
|
105 |
+
|
106 |
+
def randomize_seed_fn(seed: int) -> int:
|
107 |
+
seed = random.randint(0, 999999)
|
108 |
+
return seed
|
109 |
|
110 |
system_instructions1 = "[SYSTEM] Answer as Real OpenGPT 4o, Made by 'KingNish', Keep conversation very short, clear, friendly and concise. The text provided is a request for a specific type of response from you, the virtual assistant. You will avoid introductions and start answering the query directly, elaborating on all aspects of the request. As an AI-powered assistant, your task is to generate responses that appear as if they are created by the user and not an AI-powered assistant. [USER]"
|
111 |
|
112 |
+
def models(text, model="Mixtral 8x7B", seed=42):
|
113 |
+
|
114 |
+
seed = int(randomize_seed_fn(seed))
|
115 |
+
generator = torch.Generator().manual_seed(seed)
|
116 |
+
|
117 |
+
client = client_fn(model)
|
118 |
generate_kwargs = dict(
|
119 |
temperature=0.7,
|
120 |
max_new_tokens=512,
|
121 |
top_p=0.95,
|
122 |
repetition_penalty=1,
|
123 |
do_sample=True,
|
124 |
+
seed=seed,
|
125 |
)
|
126 |
|
127 |
formatted_prompt = system_instructions1 + text + "[OpenGPT 4o]"
|
|
|
147 |
MODELS = {
|
148 |
"idefics2-8b-chatty": Idefics2ForConditionalGeneration.from_pretrained(
|
149 |
"HuggingFaceM4/idefics2-8b-chatty",
|
150 |
+
torch_dtype=torch.float16,
|
151 |
_attn_implementation="flash_attention_2",
|
152 |
).to(DEVICE),
|
153 |
}
|
|
|
553 |
autoplay=True,
|
554 |
elem_classes="audio")
|
555 |
gr.Interface(
|
|
|
|
|
556 |
fn=respond,
|
557 |
inputs=[input],
|
558 |
+
outputs=[output], api_name="translate", live=True)
|
559 |
|
560 |
with gr.Blocks() as livechat:
|
561 |
gr.Interface(
|
|
|
|
|
562 |
fn=videochat,
|
563 |
inputs=[gr.Image(type="pil",sources="webcam", label="Upload Image"), gr.Textbox(label="Prompt", value="what he is doing")],
|
564 |
outputs=gr.Textbox(label="Answer")
|