from io import BytesIO from PIL import Image from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import JSONResponse from transformers import pipeline from pytesseract import pytesseract import base64 app = FastAPI() # Use a pipeline as a high-level helper nlp_qa = pipeline("document-question-answering", model="impira/layoutlm-document-qa") description = """ ## Image-based Document QA This API extracts text from an uploaded image using OCR and performs document question answering using a LayoutLM-based model. ### Endpoints: - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions. - **POST /pdfUpload/:** Provide a file to extract text and answer provided questions. """ app = FastAPI(docs_url="/", description=description) def get_image_content(contents): # Convert binary content to image image = Image.open(BytesIO(contents)) # Perform OCR to extract text from the image text_content = pytesseract.image_to_string(image) return text_content @app.post("/uploadfile/", description=description) async def perform_document_qa( file: UploadFile = File(...), questions: str = Form(...), ): try: # Read the uploaded file contents = await file.read() text_content = get_image_content(contents) # Split the questions string into a list question_list = [q.strip() for q in questions.split(',')] # Perform document question answering for each question using LayoutLM-based model answers_dict = {} for question in question_list: result = nlp_qa( text_content, question ) answers_dict[question] = result['answer'] return answers_dict except Exception as e: return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500) @app.post("/pdfUpload/", description=description) async def load_file( file: UploadFile = File(...), questions: str = Form(...), ): try: # Read the uploaded file as bytes contents = await file.read() # Perform document question answering for each question using LayoutLM-based model answers_dict = {} for question in questions.split(','): result = nlp_qa( contents.decode('utf-8'), # Assuming the content is text, adjust as needed question.strip() ) answers_dict[question] = result['answer'] return answers_dict except Exception as e: return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)