from transformers import NougatProcessor, VisionEncoderDecoderModel import gradio as gr from pdf2image import convert_from_path # Load the model and processor processor = NougatProcessor.from_pretrained("MohamedRashad/arabic-small-nougat") model = VisionEncoderDecoderModel.from_pretrained("MohamedRashad/arabic-small-nougat") device = "cpu" context_length = 2048 def extract_text_from_image(image): """ Extract text from PIL image Args: image (PIL.Image): Input image Returns: str: Extracted text from the image """ # prepare PDF image for the model pixel_values = processor(image, return_tensors="pt").pixel_values # generate transcription outputs = model.generate( pixel_values.to(device), min_length=1, max_new_tokens=context_length, bad_words_ids=[[processor.tokenizer.unk_token_id]], ) page_sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0] page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False) return page_sequence def extract_text_from_pdf(pdf_path, progress=gr.Progress()): """ Extract text from PDF Args: pdf_path (str): Path to the PDF file progress (gr.Progress): Progress bar Returns: str: Extracted text from the PDF """ progress(0, desc="Starting...") images = convert_from_path(pdf_path) texts = [] for image in progress.tqdm(images): extracted_text = extract_text_from_image(image) texts.append(extracted_text) return "\n".join(texts) with gr.Blocks(title="Arabic Small Nougat") as demo: gr.HTML("