Spaces:

AI-Safeguard
/

Ivy-VL

Running

Ivy1997 commited on 27 days ago

Commit

dd3b350

•

1 Parent(s): 4aa36bf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,19 +10,18 @@ import warnings
 warnings.filterwarnings("ignore")
-pretrained = "AI-Safeguard/Ivy-VL-llava"
 model_name = "llava_qwen"
-device = "cpu"
 device_map = "auto"
 # Load model, tokenizer, and image processor
 tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, attn_implementation="sdpa")
 model.eval()
-def respond(image_path, question, temperature, max_tokens):
     try:
         # Load and process the image
-        image = Image.open(image_path)
         image_tensor = process_images([image], image_processor, model.config)
         image_tensor = [_image.to(dtype=torch.float16, device=device) for _image in image_tensor]
@@ -57,12 +56,12 @@ def respond(image_path, question, temperature, max_tokens):
 def chat_interface(image, question, temperature, max_tokens):
     if not image or not question:
         return "Please provide both an image and a question."
-    return respond(image.name, question, temperature, max_tokens)
 demo = gr.Interface(
     fn=chat_interface,
     inputs=[
-        gr.Image(type="file", label="Input Image"),
         gr.Textbox(label="Question"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max Tokens"),

 warnings.filterwarnings("ignore")
+pretrained = "/tmp/pre-trained/AI-Safeguard/Ivy-VL-llava"
 model_name = "llava_qwen"
+device = "cuda"
 device_map = "auto"
 # Load model, tokenizer, and image processor
 tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, attn_implementation="sdpa")
 model.eval()
+def respond(image, question, temperature, max_tokens):
     try:
         # Load and process the image
         image_tensor = process_images([image], image_processor, model.config)
         image_tensor = [_image.to(dtype=torch.float16, device=device) for _image in image_tensor]
 def chat_interface(image, question, temperature, max_tokens):
     if not image or not question:
         return "Please provide both an image and a question."
+    return respond(image, question, temperature, max_tokens)
 demo = gr.Interface(
     fn=chat_interface,
     inputs=[
+        gr.Image(type="pil", label="Input Image"),
         gr.Textbox(label="Question"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max Tokens"),