Spaces:

naver-clova-ix
/

donut-base-finetuned-cord-v2

Running

Geewook Kim commited on Jul 20, 2022

Commit

ebf0b03

1 Parent(s): 582d0f3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,14 +25,7 @@ def demo_process_vqa(input_img, question):
 def demo_process(input_img):
     global pretrained_model, task_prompt, task_name
     input_img = Image.fromarray(input_img)
-    pretrained_model = DonutModel.from_pretrained(args.pretrained_path, max_length=512)
-    pretrained_model.encoder.to(torch.bfloat16)
-    pretrained_model.eval()
     output = pretrained_model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
-    del pretrained_model
     return output
@@ -55,6 +48,10 @@ if __name__ == "__main__":
     if args.sample_img_path:
         example_sample.append(args.sample_img_path)
     demo = gr.Interface(
         fn=demo_process_vqa if task_name == "docvqa" else demo_process,
         inputs=["image", "text"] if task_name == "docvqa" else "image",

 def demo_process(input_img):
     global pretrained_model, task_prompt, task_name
     input_img = Image.fromarray(input_img)
     output = pretrained_model.inference(image=input_img, prompt=task_prompt)["predictions"][0]
     return output
     if args.sample_img_path:
         example_sample.append(args.sample_img_path)
+    pretrained_model = DonutModel.from_pretrained(args.pretrained_path, max_length=128)
+    pretrained_model.encoder.to(torch.bfloat16)
+    pretrained_model.eval()
     demo = gr.Interface(
         fn=demo_process_vqa if task_name == "docvqa" else demo_process,
         inputs=["image", "text"] if task_name == "docvqa" else "image",