import gradio as gr import torch from transformers import BitsAndBytesConfig, pipeline # Load model directly model_id = "LanguageBind/MoE-LLaVA-Phi2-2.7B-4e" pipe = pipeline("image-to-text", model=model_id, trust_remote_code=True) def generate_text(image): max_new_tokens = 200 prompt = "USER: \nWhat are the things I should be cautious about when I visit this place?\nASSISTANT:" outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200}) return outputs[0]["generated_text"] iface = gr.Interface(fn=generate_text, inputs=gr.inputs.Image(), outputs="text") iface.launch()