Spaces:

Ketengan-Diffusion-Lab
/

Dolphin-Inference

Build error

File size: 1,732 Bytes

789acc7
 
5ee7893
789acc7
 
5ee7893
 
 
 
 
 
 
 
 
 
 
 
a6663c1
 
 
5ee7893
a6663c1
5ee7893
 
 
 
789acc7
5ee7893
 
789acc7
5ee7893
 
 
a6663c1
5ee7893
 
 
 
 
 
 
 
789acc7
5ee7893
a6663c1

import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
from PIL import Image

# Disable gradient computation
torch.set_grad_enabled(False)

# Initialize model and tokenizer
model = AutoModel.from_pretrained('internlm/internlm-xcomposer2d5-7b', 
                                  torch_dtype=torch.bfloat16, 
                                  trust_remote_code=True).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained('internlm/internlm-xcomposer2d5-7b', 
                                          trust_remote_code=True)
model.tokenizer = tokenizer

# Define the function to process input and generate a response
def analyze_image(query, image_path):
    image = Image.open(image_path)
    # Convert image to required format and save temporarily if needed
    with torch.autocast(device_type='cuda', dtype=torch.float16):
        response, _ = model.chat(tokenizer, query, [image_path], do_sample=False, num_beams=3, use_meta=True)
    
    return response

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Image Analysis Tool using Hugging Face's `internlm-xcomposer2d5-7b`")
    
    with gr.Row():
        query_input = gr.Textbox(label="Enter your query", placeholder="Analyze the given image in a detailed manner")
    
    with gr.Row():
        image_input = gr.Image(label="Upload an Image", type="filepath")
    
    with gr.Row():
        result_output = gr.Textbox(label="Result", placeholder="Model response will appear here", interactive=False)
    
    with gr.Row():
        submit_button = gr.Button("Submit")
    
    submit_button.click(fn=analyze_image, inputs=[query_input, image_input], outputs=result_output)

# Launch the Gradio interface
demo.launch()