Spaces:

shcho-isle
/

qwen

Runtime error

App Files Files Community

shcho-isle commited on Dec 10, 2024

Commit

3902745

verified ·

1 Parent(s): 42a9b9a

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -8

app.py CHANGED Viewed

@@ -1,13 +1,56 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
-model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
-def predict(input_text):
-    inputs = tokenizer(input_text, return_tensors="pt")
-    outputs = model.generate(**inputs)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-interface = gr.Interface(fn=predict, inputs="text", outputs="text")
 interface.launch()

 import gradio as gr
+from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
+from qwen_vl_utils import process_vision_info
+# Load the model and processor
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen2-VL-72B-Instruct", torch_dtype="auto", device_map="auto"
+)
+processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-72B-Instruct")
+# Define a function to process input and generate a response
+def generate_response(image, text):
+    # Prepare the input
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": text},
+            ],
+        }
+    ]
+    # Process the input data
+    text_data = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text_data],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    # Generate the output
+    generated_ids = model.generate(**inputs, max_new_tokens=128)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
+    return output_text[0]
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=generate_response,
+    inputs=[gr.Image(type="pil", label="Input Image"), gr.Textbox(label="Input Text")],
+    outputs="text",
+    title="Qwen2-VL-72B-Instruct",
+    description="Generate AI responses based on image and text input using Qwen2-VL-72B-Instruct.",
+)
+# Launch the app
 interface.launch()