--- language: - en library_name: transformers tags: - donut - donut-python --- ### Installtion ```bash pip install torch pip install transformers==4.11.3 pip install opencv-python==4.6.0.66 pip install donut-python ``` ### Usage ```python import sys import os import pandas as pd import numpy as np import shutil from tqdm import tqdm import re from donut import DonutModel import torch from PIL import Image en_model_path = "question_generator_by_en_on_pic" task_prompt = "{user_input}" en_pretrained_model = DonutModel.from_pretrained(en_model_path) if torch.cuda.is_available(): en_pretrained_model.half() device = torch.device("cuda") en_pretrained_model.to(device) en_pretrained_model.eval() print("have load !") def demo_process_vqa(input_img, question): #input_img = Image.fromarray(input_img) global en_pretrained_model, task_prompt user_prompt = task_prompt.replace("{user_input}", question) output = en_pretrained_model.inference(input_img, prompt=user_prompt)["predictions"][0] req = { "question": output["answer"], "answer": output["question"] } return req img_path = "en_img.png" demo_process_vqa(Image.open(img_path), "605-7227", ) ''' { "question": "What is the Phone #?", "answer": "605-7227" } ''' ``` ### Sample Image