File size: 1,958 Bytes
fd238c0
df6525b
f32be4c
9c28f03
06baaec
 
94dd1d8
df6525b
 
d8cffed
fd238c0
2f6db8c
 
27fddcb
d2f7dce
ba400dd
2f6db8c
 
d2f7dce
 
27fddcb
 
 
df6525b
 
 
 
2f6db8c
df6525b
 
 
956f6b8
ba400dd
bb3be13
df6525b
 
 
 
 
 
 
2f6db8c
df6525b
 
 
27fddcb
445c199
f65bd6f
 
27fddcb
 
 
78babf2
 
 
27fddcb
e2387e5
53f56c0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from fastapi import FastAPI, HTTPException,Query,Request
import json
from authenticate import get_access_token,get_access_token_v1
import os
import requests


app = FastAPI()

@app.post("/process_document")
async def process_document_base64(request: Request):
    project_id = os.getenv('PROJECT_ID')
    processor_id = os.getenv('PROCESSOR_ID')
    document_entities = {}
    request_data = await request.json()
    # print(request_data)
    if request_data.get('base64_content') is None:
        return {"error":"base64 data not present"}
    message_id = request_data.get('message_id')
    filename = request_data.get('filename')
    file_type = filename.split('.')
    
        
    payload = {
        "skipHumanReview": True,
        "rawDocument": {
            "mimeType": "application/pdf",
            "content": request_data.get('base64_content')
        }
    }
    
    access_token = get_access_token_v1()
    print("printing access_token")
    print(access_token)
    
    headers = {
        'Authorization': f'Bearer {access_token}',
        'Content-Type': 'application/json; charset=utf-8'
    }
    
    response = requests.post(
        f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
        headers=headers,
        json=payload
    )
    response_json = response.json()
    entities = response_json.get('document').get('entities' , None)
    print('Printing entities')
    print(entities)
    if entities is not None:
        for ent in entities:
            if ent.get('type') is not None:
                mention_text = ent.get('mentionText')
                normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
                document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
    
    return {"message_id":message_id , "filename":filename , "entities_data":document_entities}