Spaces:
Running
Running
File size: 1,958 Bytes
fd238c0 df6525b f32be4c 9c28f03 06baaec 94dd1d8 df6525b d8cffed fd238c0 2f6db8c 27fddcb d2f7dce ba400dd 2f6db8c d2f7dce 27fddcb df6525b 2f6db8c df6525b 956f6b8 ba400dd bb3be13 df6525b 2f6db8c df6525b 27fddcb 445c199 f65bd6f 27fddcb 78babf2 27fddcb e2387e5 53f56c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from fastapi import FastAPI, HTTPException,Query,Request
import json
from authenticate import get_access_token,get_access_token_v1
import os
import requests
app = FastAPI()
@app.post("/process_document")
async def process_document_base64(request: Request):
project_id = os.getenv('PROJECT_ID')
processor_id = os.getenv('PROCESSOR_ID')
document_entities = {}
request_data = await request.json()
# print(request_data)
if request_data.get('base64_content') is None:
return {"error":"base64 data not present"}
message_id = request_data.get('message_id')
filename = request_data.get('filename')
file_type = filename.split('.')
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": "application/pdf",
"content": request_data.get('base64_content')
}
}
access_token = get_access_token_v1()
print("printing access_token")
print(access_token)
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json; charset=utf-8'
}
response = requests.post(
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
headers=headers,
json=payload
)
response_json = response.json()
entities = response_json.get('document').get('entities' , None)
print('Printing entities')
print(entities)
if entities is not None:
for ent in entities:
if ent.get('type') is not None:
mention_text = ent.get('mentionText')
normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
return {"message_id":message_id , "filename":filename , "entities_data":document_entities}
|