Spaces:

impira
/

invoices

Build error

Ankur Goyal commited on Aug 26, 2022

Commit

8171e8e

1 Parent(s): 6cc15a7

Properly cache pipeline and display

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,47 +2,46 @@ import os
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 import streamlit as st
 import torch
 from docquery.pipeline import get_pipeline
 from docquery.document import load_bytes
-device = "cuda" if torch.cuda.is_available() else "cpu"
-pipeline = get_pipeline(device=device)
-def process_document(file, question):
-    # prepare encoder inputs
-    document = load_document(file.name)
-    return pipeline(question=question, **document.context)
 def ensure_list(x):
     if isinstance(x, list):
         return x
     else:
         return [x]
 st.title("DocQuery: Query Documents Using NLP")
 file = st.file_uploader("Upload a PDF or Image document")
 question = st.text_input("QUESTION", "")
-document = None
 if file is not None:
     col1, col2 = st.columns(2)
     document = load_bytes(file, file.name)
     col1.image(document.preview, use_column_width=True)
-if document is not None and question is not None and len(question) > 0:
-    predictions = pipeline(question=question, **document.context)
-    col2.header("Probabilities")
     for p in ensure_list(predictions):
-        col2.subheader(f"{ p['answer'] }: { round(p['score'] * 100, 1)}%")
 "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."

 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+print("Importing")
 import streamlit as st
 import torch
 from docquery.pipeline import get_pipeline
 from docquery.document import load_bytes
 def ensure_list(x):
     if isinstance(x, list):
         return x
     else:
         return [x]
+@st.experimental_singleton
+def construct_pipeline():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    ret = get_pipeline(device=device)
+    return ret
+@st.cache
+def run_pipeline(question, document):
+    return construct_pipeline()(question=question, **document.context)
 st.title("DocQuery: Query Documents Using NLP")
 file = st.file_uploader("Upload a PDF or Image document")
 question = st.text_input("QUESTION", "")
 if file is not None:
     col1, col2 = st.columns(2)
     document = load_bytes(file, file.name)
     col1.image(document.preview, use_column_width=True)
+if file is not None and question is not None and len(question) > 0:
+    predictions = run_pipeline(question=question, document=document)
+    col2.header("Answers")
     for p in ensure_list(predictions):
+        col2.subheader(f"{ p['answer'] }: ({round(p['score'] * 100, 1)}%)")
 "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."