Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException,Query,Request | |
import json | |
from authenticate import get_access_token,get_access_token_v1 | |
import os | |
import requests | |
app = FastAPI() | |
async def process_document_base64(request: Request): | |
project_id = os.getenv('PROJECT_ID') | |
processor_id = os.getenv('PROCESSOR_ID') | |
document_entities = {} | |
request_data = await request.json() | |
# print(request_data) | |
if request_data.get('base64_content') is None: | |
return {"error":"base64 data not present"} | |
message_id = request_data.get('message_id') | |
filename = request_data.get('filename') | |
file_type = filename.split('.') | |
payload = { | |
"skipHumanReview": True, | |
"rawDocument": { | |
"mimeType": "application/pdf", | |
"content": request_data.get('base64_content') | |
} | |
} | |
access_token = get_access_token_v1() | |
print("printing access_token") | |
print(access_token) | |
headers = { | |
'Authorization': f'Bearer {access_token}', | |
'Content-Type': 'application/json; charset=utf-8' | |
} | |
response = requests.post( | |
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process', | |
headers=headers, | |
json=payload | |
) | |
response_json = response.json() | |
entities = response_json.get('document').get('entities' , None) | |
print('Printing entities') | |
print(entities) | |
if entities is not None: | |
for ent in entities: | |
if ent.get('type') is not None: | |
mention_text = ent.get('mentionText') | |
normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None | |
document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values} | |
return {"message_id":message_id , "filename":filename , "entities_data":document_entities} | |