Spaces:
Running
Running
File size: 3,716 Bytes
8700a34 574f9e3 6bbd3ca c39e604 af17670 836458e 86a0b7a 6bbd3ca 574f9e3 4e3cfd3 574f9e3 6bbd3ca 574f9e3 6bbd3ca 574f9e3 6bbd3ca a82199b 6bbd3ca c39e604 574f9e3 420d3c9 574f9e3 41d335c 574f9e3 41d335c 574f9e3 f198fb3 574f9e3 f8ec4b3 574f9e3 86a0b7a 574f9e3 86a0b7a 574f9e3 86a0b7a 574f9e3 f8ec4b3 574f9e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import fitz
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import JSONResponse
from transformers import pipeline
from PIL import Image
from io import BytesIO
from starlette.middleware import Middleware
from starlette.middleware.cors import CORSMiddleware
app = FastAPI()
# Set up CORS middleware
origins = ["*"] # or specify your list of allowed origins
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
nlp_qa = pipeline("document-question-answering", model="tiennvcs/layoutlmv2-base-uncased-finetuned-infovqa")
description = """
## Image-based Document QA
This API performs document question answering using a LayoutLMv2-based model.
### Endpoints:
- **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
- **POST /pdfQA/:** Provide a PDF file to extract text and answer provided questions.
"""
app = FastAPI(docs_url="/", description=description)
@app.post("/uploadfile/", description="Upload an image file to extract text and answer provided questions.")
async def perform_document_qa(
file: UploadFile = File(...),
questions: str = Form(...),
):
try:
# Read the uploaded file as bytes
contents = await file.read()
# Open the image using PIL
image = Image.open(BytesIO(contents))
# Perform document question answering for each question using LayoutLMv2-based model
answers_dict = {}
for question in questions.split(','):
result = nlp_qa(
image,
question.strip()
)
# Access the 'answer' key from the first item in the result list
answer = result[0]['answer']
# Format the question as a string without extra characters
formatted_question = question.strip("[]")
answers_dict[formatted_question] = answer
return answers_dict
except Exception as e:
return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
@app.post("/pdfQA/", description="Provide a PDF file to extract text and answer provided questions.")
async def pdf_question_answering(
file: UploadFile = File(...),
questions: str = Form(...),
):
try:
# Read the uploaded file as bytes
contents = await file.read()
# Initialize an empty string to store the text content of the PDF
all_text = ""
# Use PyMuPDF to process the PDF and extract text
pdf_document = fitz.open_from_bytes(contents)
# Loop through each page and perform OCR
for page_num in range(pdf_document.page_count):
page = pdf_document.load_page(page_num)
print(f"Processing page {page_num + 1}...")
text = page.get_text()
all_text += text + '\n'
# Print or do something with the collected text
print(all_text)
# List of questions
question_list = questions.split(',')
# Initialize an empty dictionary to store questions and answers
qa_dict = {}
# Get answers for each question with the same context
for question in question_list:
result = nlp_qa({
'question': question,
'context': all_text
})
# Access the 'answer' key from the result
answer = result['answer']
# Store the question and answer in the dictionary
qa_dict[question] = answer
return qa_dict
except Exception as e:
return JSONResponse(content=f"Error processing PDF file: {str(e)}", status_code=500)
|