Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor, TextStreamer | |
import torch | |
import gc | |
import os | |
# Enable better CPU performance | |
torch.set_num_threads(4) | |
device = "cpu" | |
def load_model(): | |
model_name = "forestav/unsloth_vision_radiography_finetune" | |
base_model_name = "unsloth/Llama-3.2-11B-Vision-Instruct" # Correct base model | |
print("Loading tokenizer and processor...") | |
# Load tokenizer from base model | |
tokenizer = AutoTokenizer.from_pretrained( | |
base_model_name, | |
trust_remote_code=True | |
) | |
# Load processor from base model | |
processor = AutoProcessor.from_pretrained( | |
base_model_name, | |
trust_remote_code=True | |
) | |
print("Loading model...") | |
# Load model with CPU optimizations | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="cpu", | |
torch_dtype=torch.float32, | |
low_cpu_mem_usage=True, | |
offload_folder="offload", | |
offload_state_dict=True, | |
trust_remote_code=True | |
) | |
print("Quantizing model...") | |
model = torch.quantization.quantize_dynamic( | |
model, | |
{torch.nn.Linear}, | |
dtype=torch.qint8 | |
) | |
return model, tokenizer, processor | |
# Create offload directory if it doesn't exist | |
os.makedirs("offload", exist_ok=True) | |
# Initialize model and tokenizer globally | |
print("Starting model initialization...") | |
try: | |
model, tokenizer, processor = load_model() | |
print("Model loaded and quantized successfully!") | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
raise | |
def analyze_image(image, instruction): | |
try: | |
# Clear memory | |
gc.collect() | |
if instruction.strip() == "": | |
instruction = "You are an expert radiographer. Describe accurately what you see in this image." | |
# Prepare the messages | |
messages = [ | |
{"role": "user", "content": [ | |
{"type": "image"}, | |
{"type": "text", "text": instruction} | |
]} | |
] | |
# Process the image and text | |
inputs = processor( | |
images=image, | |
text=tokenizer.apply_chat_template(messages, add_generation_prompt=True), | |
return_tensors="pt" | |
) | |
# Generate with conservative settings for CPU | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=128, | |
temperature=1.0, | |
min_p=0.1, | |
use_cache=True, | |
pad_token_id=tokenizer.eos_token_id, | |
num_beams=1 | |
) | |
# Decode the response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Clean up | |
del outputs | |
gc.collect() | |
return response | |
except Exception as e: | |
return f"Error processing image: {str(e)}\nPlease try again with a smaller image or different settings." | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# Medical Image Analysis Assistant | |
Upload a medical image and receive a professional description from an AI radiographer. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image( | |
type="pil", | |
label="Upload Medical Image", | |
max_pixels=1500000 # Limit image size | |
) | |
instruction_input = gr.Textbox( | |
label="Custom Instruction (optional)", | |
placeholder="You are an expert radiographer. Describe accurately what you see in this image.", | |
lines=2 | |
) | |
submit_btn = gr.Button("Analyze Image") | |
with gr.Column(): | |
output_text = gr.Textbox(label="Analysis Result", lines=10) | |
# Handle the submission | |
submit_btn.click( | |
fn=analyze_image, | |
inputs=[image_input, instruction_input], | |
outputs=output_text | |
) | |
gr.Markdown(""" | |
### Notes: | |
- The model runs on CPU and may take several moments to process each image | |
- For best results, upload images smaller than 1.5MP | |
- Please be patient during processing | |
""") | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |