import os import base64 from base64 import urlsafe_b64encode import requests from supabase_models import Supabase_Client from authenticate import get_access_token_v1 def extract_structure_store_message(user_id:str,message_id:str , attachment_id:str,attachment_extension:str,email:str): if attachment_id and message_id: project_id = os.getenv('PROJECT_ID') processor_id = os.getenv('PROCESSOR_ID') document_entities = {} file_name = f"{message_id}_{attachment_id}.{attachment_extension}" print(f"file_name: {file_name}") supabase = Supabase_Client().instance response = supabase.storage.from_("receipt_radar").download(file_name) base64_data = urlsafe_b64encode(response).decode('utf-8') payload = { "skipHumanReview": True, "rawDocument": { "mimeType": f"application/{attachment_extension}", "content": base64_data } } access_token = get_access_token_v1() print(access_token) headers = { 'Authorization': f'Bearer {access_token}', 'Content-Type': 'application/json; charset=utf-8' } response = requests.post( f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process', headers=headers, json=payload ) response_json = response.json() allowed_entities = [ "due_date", "invoice_date", "total_amount", "total_tax_amount", "receiver_name", "invoice_id", "currency", "receiver_address", "invoice_type", "supplier_name", "payment_terms", "line_item", "line_item/description", "line_item/quantity", "line_item/amount", "line_item/unit_price" ] raw_text = response_json.get('document').get('text', None) entities = response_json.get('document').get('entities', None) document_entities['user_id'] = user_id insert_ocr_data_response = ( supabase.table("receipt_ocr_data") .insert({'user_id': user_id, 'message_id': message_id, 'receipt_text': raw_text, 'email': email, 'file_type': attachment_extension}) .execute() ) print('Printing entities') print(entities) if entities is not None: for ent in entities: if ent.get('type') is not None: entity_type = ent.get('type') or "" if entity_type in allowed_entities: mention_text = ent.get('mentionText') or "" normalized_values = ent.get('normalizedValue') or "" if entity_type not in document_entities: document_entities[entity_type] = [] document_entities[entity_type].append({ "mention_text": mention_text, "normalizedValue": normalized_values }) if entity_type == 'line_item' and 'properties' in ent: for prop in ent['properties']: prop_type = prop.get('type') or "" if prop_type in allowed_entities: mention_text = prop.get('mentionText') or "" normalized_values = prop.get('normalizedValue') or "" if prop_type not in document_entities: document_entities[prop_type] = [] document_entities[prop_type].append({ "mention_text": mention_text, "normalizedValue": normalized_values }) if 'line_item/description' in document_entities: document_entities['line_item_description'] = document_entities['line_item/description'] document_entities.pop('line_item/description', None) if 'line_item/quantity' in document_entities: document_entities['line_item_quantity'] = document_entities['line_item/quantity'] document_entities.pop('line_item/quantity', None) if 'line_item/amount' in document_entities: document_entities['line_item_amount'] = document_entities['line_item/amount'] document_entities.pop('line_item/amount', None) if 'line_item/unit_price' in document_entities: document_entities['line_item_unit_price'] = document_entities['line_item/unit_price'] document_entities.pop('line_item/unit_price', None) document_entities['email'] = email document_entities['message_id'] = message_id print(document_entities) insert_data_response = ( supabase.table("document_ai_entities") .insert(document_entities) .execute() ) print(insert_data_response) # if attachment_id and message_id: # project_id = os.getenv('PROJECT_ID') # processor_id = os.getenv('PROCESSOR_ID') # document_entities = {} # file_name = f"{message_id}_{attachment_id}.{attachment_extension}" # print(f"file_name: {file_name}") # supabase = Supabase_Client().instance # try: # response = supabase.storage.from_("receipt_radar").download( # file_name # ) # base64_data = urlsafe_b64encode(response).decode('utf-8') # payload = { # "skipHumanReview": True, # "rawDocument": { # "mimeType": f"application/{attachment_extension}", # "content": base64_data # } # } # access_token = get_access_token_v1() # print(access_token) # headers = { # 'Authorization': f'Bearer {access_token}', # 'Content-Type': 'application/json; charset=utf-8' # } # response = requests.post( # f'https://us-documentai.googleapis.com/v1/projects/{project_id}/locations/us/processors/{processor_id}:process', # headers=headers, # json=payload # ) # response_json = response.json() # allowed_entities = [ # "due_date", # "invoice_date", # "total_amount", # "total_tax_amount", # "receiver_name", # "invoice_id", # "currency", # "receiver_address", # "invoice_type", # "supplier_name", # "payment_terms", # "line_item", # "line_item/description", # "line_item/quantity", # "line_item/amount", # "line_item/unit_price" # ] # raw_text = response_json.get('document').get('text' , None) # entities = response_json.get('document').get('entities' , None) # document_entities['user_id'] = user_id # insert_ocr_data_response = ( # supabase.table("receipt_ocr_data") # .insert({'user_id':user_id , 'message_id':message_id,'receipt_text':raw_text ,'email':email,'file_type':attachment_extension}) # .execute() # ) # print('Printing entities') # print(entities) # # if entities is not None: # # for ent in entities: # # if ent.get('type') is not None: # # if ent.get('type') in allowed_entities: # # mention_text = ent.get('mentionText') # # normalised_values = ent.get('normalizedValue') if 'normalizedValue' in ent else None # # document_entities[ent.get('type')] = {"mention_text":mention_text,"normalizedValue":normalised_values} # if entities is not None: # for ent in entities: # if ent.get('type') is not None: # entity_type = ent.get('type') or "" # # Check if the entity type is in the allowed list # if entity_type in allowed_entities: # mention_text = ent.get('mentionText') or "" # normalized_values = ent.get('normalizedValue') or "" # # Initialize a list for the entity type if not already present # if entity_type not in document_entities: # document_entities[entity_type] = [] # # Append the entity data to the list # document_entities[entity_type].append({ # "mention_text": mention_text, # "normalizedValue": normalized_values # }) # # Handling 'line_item' and its properties (line_item/description, line_item/quantity, etc.) # if entity_type == 'line_item' and 'properties' in ent: # for prop in ent['properties']: # prop_type = prop.get('type') or "" # if prop_type in allowed_entities: # mention_text = prop.get('mentionText') or "" # normalized_values = prop.get('normalizedValue') or "" # # Initialize a list for the property type if not already present # if prop_type not in document_entities: # document_entities[prop_type] = [] # # Append the property data to the list # document_entities[prop_type].append({ # "mention_text": mention_text, # "normalizedValue": normalized_values # }) # if 'line_item/description' in document_entities: # document_entities['line_item_description'] = document_entities['line_item/description'] # document_entities.pop('line_item/description', None) # if 'line_item/quantity' in document_entities: # document_entities['line_item_quantity'] = document_entities['line_item/quantity'] # document_entities.pop('line_item/quantity', None) # if 'line_item/amount' in document_entities: # document_entities['line_item_amount'] = document_entities['line_item/amount'] # document_entities.pop('line_item/amount', None) # if 'line_item/unit_price' in document_entities: # document_entities['line_item_unit_price'] = document_entities['line_item/unit_price'] # document_entities.pop('line_item/unit_price', None) # document_entities['email'] = email # document_entities['message_id'] = message_id # print(document_entities) # insert_data_response = ( # supabase.table("document_ai_entities") # .insert(document_entities) # .execute() # ) # print(insert_data_response) # except Exception as e: # print(f"Error downloading or encoding file: {e}")