Spaces:

Saee
/

vQA-exploration

Running

App Files Files Community

saylee-m commited on Jul 8

Commit

e0a50f9

•

1 Parent(s): b0ab786

added comments

Browse files

Files changed (1) hide show

app.py +9 -6

app.py CHANGED Viewed

@@ -44,13 +44,13 @@ def load_models():
     # load donut
     donut_model, donut_processor = load_donut_model()
     print("donut downloaded")
-    #load paligemma
-    pg_model, pg_processor = load_paligemma_docvqa()
-    print("paligemma downloaded")
     return {"donut":[donut_model, donut_processor],
-            # "idefics": [idf_model, idf_processor],
-            "paligemma": [pg_model, pg_processor]}
 loaded_models = load_models()
@@ -112,7 +112,7 @@ def process_document_pg(image_array, question):
     inputs = processor(images=image_array, text=question, return_tensors="pt").to(device)
     predictions = model.generate(**inputs, max_new_tokens=100)
-    return processor.batch_decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")
 def process_document_idf(image_array, question):
     model, processor = loaded_models.get("idefics")
@@ -124,6 +124,7 @@ def process_document_idf(image_array, question):
 def generate_answer_donut(image_array, question):
     try:
         answer = process_document_donut(image_array, question)
         print(answer)
         return answer
@@ -134,6 +135,7 @@ def generate_answer_donut(image_array, question):
 def generate_answer_idefics(image_array, question):
     try:
         # answer = process_document_idf(image_array, question)
         answer = inference_calling_idefics(image_array, question)
         print(answer)
@@ -145,6 +147,7 @@ def generate_answer_idefics(image_array, question):
 def generate_answer_paligemma(image_array, question):
     try:
         answer = process_document_pg(image_array, question)
         print(answer)
         return answer

     # load donut
     donut_model, donut_processor = load_donut_model()
     print("donut downloaded")
+    # #load paligemma
+    # pg_model, pg_processor = load_paligemma_docvqa()
+    # print("paligemma downloaded")
     return {"donut":[donut_model, donut_processor],
+            # "paligemma": [pg_model, pg_processor]
+            }
 loaded_models = load_models()
     inputs = processor(images=image_array, text=question, return_tensors="pt").to(device)
     predictions = model.generate(**inputs, max_new_tokens=100)
+    return processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")
 def process_document_idf(image_array, question):
     model, processor = loaded_models.get("idefics")
 def generate_answer_donut(image_array, question):
     try:
+        print("processing document - donut")
         answer = process_document_donut(image_array, question)
         print(answer)
         return answer
 def generate_answer_idefics(image_array, question):
     try:
+        print("processing document - idf2")
         # answer = process_document_idf(image_array, question)
         answer = inference_calling_idefics(image_array, question)
         print(answer)
 def generate_answer_paligemma(image_array, question):
     try:
+        print("processing document - pg")
         answer = process_document_pg(image_array, question)
         print(answer)
         return answer