File size: 1,783 Bytes
6220a93 3f18c40 6220a93 3f18c40 6220a93 3f18c40 6220a93 3f18c40 6220a93 3f18c40 6220a93 3f18c40 6220a93 3f18c40 6220a93 82580f6 6220a93 82580f6 6220a93 82580f6 6220a93 82580f6 8183a3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import os
import streamlit as st
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
from openai import OpenAI
# Initialize DocTR OCR predictor
ocr_model = ocr_predictor(pretrained=True)
# Initialize the OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
# Streamlit application
def main():
st.title('EMAIL, Phone, Location Extractor')
# Upload a PDF file
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
if uploaded_file is not None:
# Load the PDF file with Doctr
pdf_bytes = uploaded_file.read()
doc = DocumentFile.from_pdf(pdf_bytes)
# Extract the text
result = ocr_model(doc)
text = ""
for page in result.pages:
for block in page.blocks:
for line in block.lines:
for word in line.words:
text += word.value + " "
text += "\n"
# Prepare the input for the LLM
messages = [
{"role": "system", "content": "You are a helpful AI assistant."},
{"role": "user", "content": f"Extract the email, phone number, and location from the following text:\n{text}"}
]
# Use OpenAI's GPT-3.5-turbo to extract the details
try:
chat_completion = client.chat.completions.create(
messages=messages,
model="gpt-3.5-turbo",
)
generated_text = chat_completion.choices[0].message.content
# Display the extracted information
st.header('Extracted Information')
st.write(generated_text)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
if __name__ == '__main__':
main() |