Spaces:

Saee
/

vQA-exploration

Running

saylee-m commited on Jul 8

Commit

f50a20b

•

1 Parent(s): 59e7947

added more comments

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,8 +26,8 @@ def load_donut_model():
     return model, processor
 def load_paligemma_docvqa():
-    model_id = "google/paligemma-3b-ft-docvqa-896"
-    # model_id = "google/paligemma-3b-mix-448"
     processor = AutoProcessor.from_pretrained(model_id)
     model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
     model.to(device)
@@ -53,6 +53,7 @@ def load_models():
             }
 loaded_models = load_models()
 def base64_encoded_image(image_array):
     im = Image.fromarray(image_array)
@@ -108,10 +109,14 @@ def process_document_donut(image_array, question):
     return op
 def process_document_pg(image_array, question):
     model, processor = loaded_models.get("paligemma")
     inputs = processor(images=image_array, text=question, return_tensors="pt").to(device)
     predictions = model.generate(**inputs, max_new_tokens=100)
     return processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")
 def process_document_idf(image_array, question):

     return model, processor
 def load_paligemma_docvqa():
+    # model_id = "google/paligemma-3b-ft-docvqa-896"
+    model_id = "google/paligemma-3b-mix-448"
     processor = AutoProcessor.from_pretrained(model_id)
     model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
     model.to(device)
             }
 loaded_models = load_models()
+print("models loaded")
 def base64_encoded_image(image_array):
     im = Image.fromarray(image_array)
     return op
 def process_document_pg(image_array, question):
+    print("called loaded model")
     model, processor = loaded_models.get("paligemma")
+    print("converting inputs")
     inputs = processor(images=image_array, text=question, return_tensors="pt").to(device)
+    print("get predictions")
     predictions = model.generate(**inputs, max_new_tokens=100)
+    print("returning decoding")
     return processor.decode(predictions[0], skip_special_tokens=True)[len(question):].lstrip("\n")
 def process_document_idf(image_array, question):