import gradio as gr
import torch
from transformers import BitsAndBytesConfig, pipeline

# Load model directly


model_id = "LanguageBind/MoE-LLaVA-Phi2-2.7B-4e"
pipe = pipeline("image-to-text", model=model_id, trust_remote_code=True)

def generate_text(image):
    max_new_tokens = 200
    prompt = "USER: <image>\nWhat are the things I should be cautious about when I visit this place?\nASSISTANT:"
    outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
    return outputs[0]["generated_text"]

iface = gr.Interface(fn=generate_text, inputs=gr.inputs.Image(), outputs="text")
iface.launch()