import os import re import json import streamlit as st import pandas as pd from utils import validate_pdf, displayPDF from styles import apply_custom_styles from invoice_extractor.extraction import Auto if 'GPT_KEY' not in os.environ or os.environ.get('GPT_KEY') in [None, '']: os.environ['GPT_KEY'] = st.secrets['GPT_KEY'] if 'auto_extractor' not in st.session_state: st.session_state.auto_extractor = Auto() def markdown_table_to_json(markdown): lines = markdown.strip().split("\n") # Extract headers headers = [h.strip() for h in lines[0].split("|") if h.strip()] # Extract rows rows = [] for line in lines[2:]: # Skip header and separator line values = [v.strip() for v in line.split("|") if v.strip()] row_dict = dict(zip(headers, values)) rows.append(row_dict) return rows def visualise_pie_chart(analysis): verdicts = {} score = 0 total = 0 for verdict in ['GOOD', 'AVERAGE', 'BAD']: table = analysis.split(f'<{verdict}>')[-1].split(f'')[0] table = markdown_table_to_json(table) if len(table) > 0: verdicts[verdict] = table if verdict == 'GOOD': score += 5 * len(table) if verdict == 'AVERAGE': score += 3 * len(table) elif verdict == 'BAD': score += len(table) total += 5 * len(table) gauge(gVal = total, gTitle = '', gMode = 'gauge+number', grLow = total // 3, grMid = 2 * (total // 3)) def main(): # Apply custom styles apply_custom_styles() # Header st.markdown("""

Invoice Extractor

Upload and extract data from invoices

""", unsafe_allow_html=True) # File upload section st.markdown('
', unsafe_allow_html=True) uploaded_files = st.file_uploader("Choose invoice PDF files", type="pdf", accept_multiple_files=True) print(uploaded_files) lob = st.selectbox( 'LOB', options = ['Health', 'Life', 'Auto'], index = 2 ) st.markdown('
', unsafe_allow_html=True) if uploaded_files and st.button('Extract'): # Process each uploaded file for uploaded_file in uploaded_files: # Read PDF content pdf_bytes = uploaded_file.read() # displayPDF(pdf_bytes) # Validate PDF if not validate_pdf(pdf_bytes): st.error(f"Invalid PDF file: {uploaded_file.name}") continue # Show loading state with st.spinner(f"Extracting {uploaded_file.name}..."): try: # Make API call response = st.session_state.auto_extractor(pdf_bytes) extraction = next( (item for item in response if item.get("stage") == "POST_PROCESS"), None )['response'] with st.expander(f'### Invoice : {uploaded_file.name}'): displayPDF(pdf_bytes) for entity in extraction: # cols = st.columns(2) # with cols[0]: if isinstance(entity['entityValue'], list): st.markdown(f'{entity["entityName"]}') df = pd.DataFrame.from_records(entity['entityValue']) st.table(df) elif isinstance(entity['entityValue'], dict): st.markdown(f'{entity["entityName"]}') for k, v in entity['entityValue'].items(): st.markdown(f'{k.upper()}') if isinstance(v, list): df = pd.DataFrame.from_records(v) st.table(v) else: st.text_input(f'{entity["entityName"]}', entity['entityValue']) except Exception as e: st.error(f"Error extracting {uploaded_file.name}: {str(e)}") # Footer st.markdown("""

Upload one or more invoice PDFs to get detailed extraction.

We support all major formats.

""", unsafe_allow_html=True) if __name__ == "__main__": st.set_page_config( page_title="Invoice Extractor", page_icon="📋", layout="wide" ) main()