gcp_document_ai / extract_and_store_supabase.py
Omkar008's picture
Update extract_and_store_supabase.py
36a270b verified
raw
history blame
3.36 kB
from supabase_models import Supabase_Client
def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str):
if attachment_id and message_id:
project_id = os.getenv('PROJECT_ID')
processor_id = os.getenv('PROCESSOR_ID')
document_entities = {}
extension = attachment.filename.split(".")[-1]
file_name = f"{message.id}_{attachment.attachment_id}.{extension}"
print(f"file_name: {file_name}")
supabase = Supabase_Client().instance
try:
response = supabase.storage.from_("receipt_radar").download(
file_name
)
base64_data = urlsafe_b64encode(response).decode('utf-8')
payload = {
"skipHumanReview": True,
"rawDocument": {
"mimeType": f"application/{file_type}",
"content": base64_content
}
}
access_token = get_access_token_v1()
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json; charset=utf-8'
}
response = requests.post(
f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process',
headers=headers,
json=payload
)
response_json = response.json()
allowed_entities = [
"due_date",
"invoice_date",
"total_amount",
"total_tax_amount",
"receiver_name",
"invoice_id",
"currency",
"receiver_address",
"invoice_type",
"supplier_name",
"payment_terms",
"line_item",
"line_item/description",
"line_item/quantity",
"line_item/amount",
"line_item/unit_price"
]
raw_text = response_json.get('document').get('text' , None)
entities = response_json.get('document').get('entities' , None)
document_entities['user_id'] = user_id
document_entities['raw_text'] = raw_text
print('Printing entities')
print(entities)
if entities is not None:
for ent in entities:
if ent.get('type') is not None:
if entity_type in allowed_entities:
mention_text = ent.get('mentionText')
normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None
document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values}
print(document_entities)
insert_data_response = response = (
supabase.table("countries")
.insert(document_entities)
.execute()
)
except Exception as e:
print(f"Error downloading or encoding file: {e}")