Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on Dec 12, 2023

Commit

a82199b

•

1 Parent(s): 66f4dcc

Update main.py

Browse files

Files changed (1) hide show

main.py +12 -20

main.py CHANGED Viewed

@@ -1,10 +1,11 @@
-from io import BytesIO
-from PIL import Image
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import pipeline
-from pytesseract import pytesseract
-import base64
 app = FastAPI()
@@ -13,7 +14,7 @@ nlp_qa = pipeline("document-question-answering", model="impira/layoutlm-document
 description = """
 ## Image-based Document QA
-This API extracts text from an uploaded image using OCR and performs document question answering using a LayoutLM-based model.
 ### Endpoints:
 - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
@@ -22,12 +23,8 @@ This API extracts text from an uploaded image using OCR and performs document qu
 app = FastAPI(docs_url="/", description=description)
-def get_image_content(contents):
-    # Convert binary content to image
-    image = Image.open(BytesIO(contents))
-    # Perform OCR to extract text from the image
-    text_content = pytesseract.image_to_string(image)
-    return text_content
 @app.post("/uploadfile/", description=description)
 async def perform_document_qa(
@@ -35,20 +32,15 @@ async def perform_document_qa(
     questions: str = Form(...),
 ):
     try:
-        # Read the uploaded file
         contents = await file.read()
-        text_content = get_image_content(contents)
-        # Split the questions string into a list
-        question_list = [q.strip() for q in questions.split(',')]
         # Perform document question answering for each question using LayoutLM-based model
         answers_dict = {}
-        for question in question_list:
             result = nlp_qa(
-                text_content,
-                question
             )
             answers_dict[question] = result['answer']

+import os
+import shutil
+import requests
+from tempfile import NamedTemporaryFile
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import pipeline
 app = FastAPI()
 description = """
 ## Image-based Document QA
+This API performs document question answering using a LayoutLM-based model.
 ### Endpoints:
 - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
 app = FastAPI(docs_url="/", description=description)
+# Define a temporary folder to store downloaded files
+TEMP_FOLDER = "/path/to/temp/folder"  # Replace with the actual path
 @app.post("/uploadfile/", description=description)
 async def perform_document_qa(
     questions: str = Form(...),
 ):
     try:
+        # Read the uploaded file as bytes
         contents = await file.read()
         # Perform document question answering for each question using LayoutLM-based model
         answers_dict = {}
+        for question in questions.split(','):
             result = nlp_qa(
+                contents.decode('utf-8'),  # Assuming the content is text, adjust as needed
+                question.strip()
             )
             answers_dict[question] = result['answer']