import gradio as gr import torch from transformers import AutoModel, AutoTokenizer from PIL import Image # Disable gradient computation torch.set_grad_enabled(False) # Initialize model and tokenizer model = AutoModel.from_pretrained('internlm/internlm-xcomposer2d5-7b', torch_dtype=torch.bfloat16, trust_remote_code=True).cuda().eval() tokenizer = AutoTokenizer.from_pretrained('internlm/internlm-xcomposer2d5-7b', trust_remote_code=True) model.tokenizer = tokenizer # Define the function to process input and generate a response def analyze_image(query, image_path): image = Image.open(image_path) # Convert image to required format and save temporarily if needed with torch.autocast(device_type='cuda', dtype=torch.float16): response, _ = model.chat(tokenizer, query, [image_path], do_sample=False, num_beams=3, use_meta=True) return response # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("## Image Analysis Tool using Hugging Face's `internlm-xcomposer2d5-7b`") with gr.Row(): query_input = gr.Textbox(label="Enter your query", placeholder="Analyze the given image in a detailed manner") with gr.Row(): image_input = gr.Image(label="Upload an Image", type="filepath") with gr.Row(): result_output = gr.Textbox(label="Result", placeholder="Model response will appear here", interactive=False) with gr.Row(): submit_button = gr.Button("Submit") submit_button.click(fn=analyze_image, inputs=[query_input, image_input], outputs=result_output) # Launch the Gradio interface demo.launch()