import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image

# Disable gradient computation
torch.set_grad_enabled(False)

# Initialize model and tokenizer
model = AutoModel.from_pretrained('internlm/internlm-xcomposer2d5-7b', 
                                  torch_dtype=torch.bfloat16, 
                                  trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained('internlm/internlm-xcomposer2d5-7b', 
                                          trust_remote_code=True)
model.tokenizer = tokenizer

# Define the function to process input and generate a response
def analyze_image(query, image_path):
    image = Image.open(image_path)
    # Convert image to required format and save temporarily if needed
    with torch.autocast(device_type='cuda', dtype=torch.float16):
        response, _ = model.chat(tokenizer, query, [image_path], do_sample=False, num_beams=3, use_meta=True)
    
    return response

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Image Analysis Tool using Hugging Face's `internlm-xcomposer2d5-7b`")
    
    with gr.Row():
        query_input = gr.Textbox(label="Enter your query", placeholder="Analyze the given image in a detailed manner")
    
    with gr.Row():
        image_input = gr.Image(label="Upload an Image", type="filepath")
    
    with gr.Row():
        result_output = gr.Textbox(label="Result", placeholder="Model response will appear here", interactive=False)
    
    with gr.Row():
        submit_button = gr.Button("Submit")
    
    submit_button.click(fn=analyze_image, inputs=[query_input, image_input], outputs=result_output)

# Launch the Gradio interface
demo.launch()