Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

document-vqa-v2 / main.py

MJobe

Update main.py

4e3cfd3 12 months ago

raw

history blame

3.72 kB

	import fitz
	from fastapi import FastAPI, File, UploadFile, Form
	from fastapi.responses import JSONResponse
	from transformers import pipeline
	from PIL import Image
	from io import BytesIO
	from starlette.middleware import Middleware
	from starlette.middleware.cors import CORSMiddleware

	app = FastAPI()

	# Set up CORS middleware
	origins = ["*"] # or specify your list of allowed origins
	app.add_middleware(
	CORSMiddleware,
	allow_origins=origins,
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	nlp_qa = pipeline("document-question-answering", model="tiennvcs/layoutlmv2-base-uncased-finetuned-infovqa")

	description = """
	## Image-based Document QA
	This API performs document question answering using a LayoutLMv2-based model.

	### Endpoints:
	- POST /uploadfile/: Upload an image file to extract text and answer provided questions.
	- POST /pdfQA/: Provide a PDF file to extract text and answer provided questions.
	"""

	app = FastAPI(docs_url="/", description=description)

	@app.post("/uploadfile/", description="Upload an image file to extract text and answer provided questions.")
	async def perform_document_qa(
	file: UploadFile = File(...),
	questions: str = Form(...),
	):
	try:
	# Read the uploaded file as bytes
	contents = await file.read()

	# Open the image using PIL
	image = Image.open(BytesIO(contents))

	# Perform document question answering for each question using LayoutLMv2-based model
	answers_dict = {}
	for question in questions.split(','):
	result = nlp_qa(
	image,
	question.strip()
	)

	# Access the 'answer' key from the first item in the result list
	answer = result[0]['answer']

	# Format the question as a string without extra characters
	formatted_question = question.strip("[]")

	answers_dict[formatted_question] = answer

	return answers_dict
	except Exception as e:
	return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)

	@app.post("/pdfQA/", description="Provide a PDF file to extract text and answer provided questions.")
	async def pdf_question_answering(
	file: UploadFile = File(...),
	questions: str = Form(...),
	):
	try:
	# Read the uploaded file as bytes
	contents = await file.read()

	# Initialize an empty string to store the text content of the PDF
	all_text = ""

	# Use PyMuPDF to process the PDF and extract text
	pdf_document = fitz.open_from_bytes(contents)

	# Loop through each page and perform OCR
	for page_num in range(pdf_document.page_count):
	page = pdf_document.load_page(page_num)
	print(f"Processing page {page_num + 1}...")
	text = page.get_text()
	all_text += text + '\n'

	# Print or do something with the collected text
	print(all_text)

	# List of questions
	question_list = questions.split(',')

	# Initialize an empty dictionary to store questions and answers
	qa_dict = {}

	# Get answers for each question with the same context
	for question in question_list:
	result = nlp_qa({
	'question': question,
	'context': all_text
	})

	# Access the 'answer' key from the result
	answer = result['answer']

	# Store the question and answer in the dictionary
	qa_dict[question] = answer

	return qa_dict

	except Exception as e:
	return JSONResponse(content=f"Error processing PDF file: {str(e)}", status_code=500)