Spaces:

Nassiraaa
/

LLM-for-email-phone-gmail

Sleeping

Update app.py

6220a93 verified 3 months ago

1.78 kB

	import os
	import streamlit as st
	from doctr.models import ocr_predictor
	from doctr.io import DocumentFile
	from openai import OpenAI

	# Initialize DocTR OCR predictor
	ocr_model = ocr_predictor(pretrained=True)

	# Initialize the OpenAI client
	client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	# Streamlit application
	def main():
	st.title('EMAIL, Phone, Location Extractor')

	# Upload a PDF file
	uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

	if uploaded_file is not None:
	# Load the PDF file with Doctr
	pdf_bytes = uploaded_file.read()
	doc = DocumentFile.from_pdf(pdf_bytes)

	# Extract the text
	result = ocr_model(doc)
	text = ""
	for page in result.pages:
	for block in page.blocks:
	for line in block.lines:
	for word in line.words:
	text += word.value + " "
	text += "\n"

	# Prepare the input for the LLM
	messages = [
	{"role": "system", "content": "You are a helpful AI assistant."},
	{"role": "user", "content": f"Extract the email, phone number, and location from the following text:\n{text}"}
	]

	# Use OpenAI's GPT-3.5-turbo to extract the details
	try:
	chat_completion = client.chat.completions.create(
	messages=messages,
	model="gpt-3.5-turbo",
	)
	generated_text = chat_completion.choices[0].message.content

	# Display the extracted information
	st.header('Extracted Information')
	st.write(generated_text)
	except Exception as e:
	st.error(f"An error occurred: {str(e)}")

	if __name__ == '__main__':
	main()