import gradio as gr import argparse import torch from PIL import Image from donut import DonutModel def demo_process(input_img): global model, task_prompt, task_name input_img = Image.fromarray(input_img) output = model.inference(image=input_img, prompt=task_prompt)["predictions"][0] return output parser = argparse.ArgumentParser() parser.add_argument("--task", type=str, default="Booking") parser.add_argument("--pretrained_path", type=str, default="result/train_booking/20241112_150925") args, left_argv = parser.parse_known_args() task_name = args.task task_prompt = f"" model = DonutModel.from_pretrained("./result/train_booking/20241112_150925") if torch.cuda.is_available(): model.half() device = torch.device("cuda") model.to(device) else: model.encoder.to(torch.bfloat16) model.eval() demo = gr.Interface(fn=demo_process,inputs="image",outputs="json", title=f"Donut 🍩 demonstration for `{task_name}` task",) demo.launch(debug=True)