gcp_document_ai / app.py
Omkar008's picture
Update app.py
78babf2 verified
raw
history blame
1.96 kB
from fastapi import FastAPI, HTTPException,Query,Request
import json
from authenticate import get_access_token,get_access_token_v1
import os
import requests
app = FastAPI()
@app.post("/process_document")
async def process_document_base64(request: Request):
project_id = os.getenv('PROJECT_ID')
processor_id = os.getenv('PROCESSOR_ID')
document_entities = {}
request_data = await request.json()
# print(request_data)
if request_data.get('base64_content') is None:
return {"error":"base64 data not present"}
message_id = request_data.get('message_id')
filename = request_data.get('filename')
file_type = filename.split('.')
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": "application/pdf",
"content": request_data.get('base64_content')
}
}
access_token = get_access_token_v1()
print("printing access_token")
print(access_token)
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json; charset=utf-8'
}
response = requests.post(
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
headers=headers,
json=payload
)
response_json = response.json()
entities = response_json.get('document').get('entities' , None)
print('Printing entities')
print(entities)
if entities is not None:
for ent in entities:
if ent.get('type') is not None:
mention_text = ent.get('mentionText')
normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
return {"message_id":message_id , "filename":filename , "entities_data":document_entities}