Arabic-Small-Nougat

Paused

App Files Files Community

Arabic-Small-Nougat / app.py

MohamedRashad

Add app.py and others

688353f 9 months ago

raw

history blame

2.59 kB

	from transformers import NougatProcessor, VisionEncoderDecoderModel
	import gradio as gr
	from pdf2image import convert_from_path

	# Load the model and processor
	processor = NougatProcessor.from_pretrained("MohamedRashad/arabic-small-nougat")
	model = VisionEncoderDecoderModel.from_pretrained("MohamedRashad/arabic-small-nougat")
	device = "cpu"

	context_length = 2048

	def extract_text_from_image(image):
	"""
	Extract text from PIL image

	Args:
	image (PIL.Image): Input image

	Returns:
	str: Extracted text from the image
	"""

	# prepare PDF image for the model
	pixel_values = processor(image, return_tensors="pt").pixel_values

	# generate transcription
	outputs = model.generate(
	pixel_values.to(device),
	min_length=1,
	max_new_tokens=context_length,
	bad_words_ids=[[processor.tokenizer.unk_token_id]],
	)
	page_sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
	page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
	return page_sequence

	def extract_text_from_pdf(pdf_path, progress=gr.Progress()):
	"""
	Extract text from PDF

	Args:
	pdf_path (str): Path to the PDF file
	progress (gr.Progress): Progress bar

	Returns:
	str: Extracted text from the PDF
	"""

	progress(0, desc="Starting...")
	images = convert_from_path(pdf_path)
	texts = []
	for image in progress.tqdm(images):
	extracted_text = extract_text_from_image(image)
	texts.append(extracted_text)

	return "\n".join(texts)

	with gr.Blocks(title="Arabic Small Nougat") as demo:
	gr.HTML("<h1 style='text-align: center'>Arabic End-to-End Structured OCR for textbooks</h1>")

	with gr.Tab("Extract Text from Image"):
	with gr.Row():
	with gr.Column():
	image = gr.Image(label="Input Image", type="pil")
	image_submit_button = gr.Button(value="Submit", variant="primary")
	output = gr.Markdown(label="Output Markdown", rtl=True)
	image_submit_button.click(extract_text_from_image, inputs=[image], outputs=output)

	with gr.Tab("Extract Text from PDF"):
	with gr.Row():
	with gr.Column():
	pdf = gr.File(label="Input PDF", type="filepath")
	pdf_submit_button = gr.Button(value="Submit", variant="primary")
	output = gr.Markdown(label="Output Markdown", rtl=True)
	pdf_submit_button.click(extract_text_from_pdf, inputs=[pdf], outputs=output)

	demo.queue().launch(share=False)