Spaces:

Hzqhssn
/

OCR-test

Sleeping

App Files Files Community

OCR-test / app.py

Hzqhssn

fix

0d32855 3 months ago

raw

history blame contribute delete

2.27 kB

	import gradio as gr
	import requests
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForCausalLM
	from io import BytesIO
	import torch

	# Set device
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# Load model and processor
	model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", torch_dtype=torch_dtype, trust_remote_code=True).to(device)
	processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)

	# Prediction function
	def predict_from_url(url):
	prompt = "<OCR>"
	if not url:
	return "Error: Please input a URL", None

	try:
	# Open the image and convert to RGB format to handle grayscale or other formats
	image = Image.open(BytesIO(requests.get(url).content)).convert("RGB")
	except Exception as e:
	return f"Error: Failed to load or process the image: {str(e)}", None

	# Preprocess and perform inference
	try:
	inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_new_tokens=4096,
	num_beams=3,
	do_sample=False
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height))

	# Extract OCR text
	ocr_text = parsed_answer.get("<OCR>", "")
	output_text = " ".join(ocr_text.replace("\n", " ").strip().split())
	except Exception as e:
	return f"Error: Failed to process the image for OCR: {str(e)}", image

	return output_text, image

	# Gradio Interface
	demo = gr.Interface(
	fn=predict_from_url,
	inputs=gr.Textbox(label="Enter Image URL"),
	outputs=[
	gr.Textbox(label="Extracted Text"),
	gr.Image(label="Uploaded Image")
	],
	title="OCR Text Extractor",
	description="Provide an image URL, and this tool will extract text using OCR.",
	allow_flagging="never"
	)

	# Launch the app
	demo.launch()