Spaces:

KingNish
/

OpenGPT-4o

Running on Zero

App Files Files Community

KingNish commited on Sep 27

Commit

4936b31

•

1 Parent(s): 4e975f4

Update chatbot.py

Browse files

Files changed (1) hide show

chatbot.py +27 -15

chatbot.py CHANGED Viewed

@@ -151,7 +151,15 @@ def video_gen(prompt):
 image_extensions = Image.registered_extensions()
 video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
-def qwen_inference(user_prompt, chat_history):
     images = []
     text_input = user_prompt["text"]
@@ -194,20 +202,12 @@ def qwen_inference(user_prompt, chat_history):
     })
     return messages
-# Initialize inference clients for different models
-client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
-client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
-client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
-client_mistral_nemo = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
-@spaces.GPU(duration=60, queue=False)
-def model_inference( user_prompt, chat_history):
     if user_prompt["files"]:
         messages = qwen_inference(user_prompt, chat_history)
         text = processor.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
         image_inputs, video_inputs = process_vision_info(messages)
         inputs = processor(
             text=[text],
@@ -218,8 +218,8 @@ def model_inference( user_prompt, chat_history):
         ).to("cuda")
         streamer = TextIteratorStreamer(
-        processor, skip_prompt=True, **{"skip_special_tokens": True}
-    )
         generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
@@ -325,7 +325,19 @@ def model_inference( user_prompt, chat_history):
                 yield gr.Video(video)
             elif json_data["name"] == "image_qna":
-                inputs = llava(user_prompt, chat_history)
                 streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
                 generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)

 image_extensions = Image.registered_extensions()
 video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
+# Initialize inference clients for different models
+client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
+client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
+client_llama = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
+client_mistral_nemo = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
+def model_inference(user_prompt, chat_history):
+    @spaces.GPU(duration=60, queue=False)
+    def qwen_inference(user_prompt, chat_history):
     images = []
     text_input = user_prompt["text"]
     })
     return messages
     if user_prompt["files"]:
         messages = qwen_inference(user_prompt, chat_history)
         text = processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
         image_inputs, video_inputs = process_vision_info(messages)
         inputs = processor(
             text=[text],
         ).to("cuda")
         streamer = TextIteratorStreamer(
+            processor, skip_prompt=True, **{"skip_special_tokens": True}
+        )
         generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
         thread = Thread(target=model.generate, kwargs=generation_kwargs)
                 yield gr.Video(video)
             elif json_data["name"] == "image_qna":
+                messages = qwen_inference(user_prompt, chat_history)
+                text = processor.apply_chat_template(
+                    messages, tokenize=False, add_generation_prompt=True
+                )
+                image_inputs, video_inputs = process_vision_info(messages)
+                inputs = processor(
+                    text=[text],
+                    images=image_inputs,
+                    videos=video_inputs,
+                    padding=True,
+                    return_tensors="pt",
+                ).to("cuda")
                 streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
                 generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)