import os
import streamlit as st
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
from openai import OpenAI

# Initialize DocTR OCR predictor
ocr_model = ocr_predictor(pretrained=True)

# Initialize the OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Streamlit application
def main():
    st.title('EMAIL, Phone, Location Extractor')

    # Upload a PDF file
    uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

    if uploaded_file is not None:
        # Load the PDF file with Doctr
        pdf_bytes = uploaded_file.read()
        doc = DocumentFile.from_pdf(pdf_bytes)

        # Extract the text
        result = ocr_model(doc)
        text = ""
        for page in result.pages:
            for block in page.blocks:
                for line in block.lines:
                    for word in line.words:
                        text += word.value + " "
                    text += "\n"

        # Prepare the input for the LLM
        messages = [
            {"role": "system", "content": "You are a helpful AI assistant."},
            {"role": "user", "content": f"Extract the email, phone number, and location from the following text:\n{text}"}
        ]

        # Use OpenAI's GPT-3.5-turbo to extract the details
        try:
            chat_completion = client.chat.completions.create(
                messages=messages,
                model="gpt-3.5-turbo",
            )
            generated_text = chat_completion.choices[0].message.content

            # Display the extracted information
            st.header('Extracted Information')
            st.write(generated_text)
        except Exception as e:
            st.error(f"An error occurred: {str(e)}")

if __name__ == '__main__':
    main()