Spaces:
Sleeping
Sleeping
import streamlit as st | |
import mailparser | |
from email_reply_parser import EmailReplyParser | |
from bs4 import BeautifulSoup | |
# Function to extract the latest email message from raw email content | |
def extract_latest_message(raw_email): | |
try: | |
# Parse the email using mail-parser | |
mail = mailparser.parse_from_string(raw_email) | |
# Debugging: Display entire mail object to inspect its content | |
st.write("Parsed Email Object:") | |
st.json(mail.mail_json) | |
# Inspect text parts of the email (plain text and HTML) | |
text_parts = mail.text_plain | |
html_parts = mail.text_html | |
# Debugging: Output all parts to check what's available | |
st.write("Text Parts:", text_parts) | |
st.write("HTML Parts:", html_parts) | |
# Check if the email contains plain text parts | |
if mail.text_plain: | |
body = mail.text_plain[0] | |
st.write("Extracted plain text body from email.") | |
elif mail.text_html: | |
# If no plain text is available, fall back to HTML body | |
body = mail.text_html[0] | |
st.write("Extracted HTML body from email. Converting to plain text...") | |
# Use BeautifulSoup to strip HTML tags and convert to plain text | |
body = BeautifulSoup(body, "html.parser").get_text() | |
else: | |
body = "No body content found in email." | |
# Debugging: Output the cleaned-up email body before using EmailReplyParser | |
st.write("Cleaned-up email body before parsing:") | |
st.text_area("Parsed Body", value=body, height=200) | |
# Use email-reply-parser to extract only the latest reply (remove quoted thread) | |
latest_reply = EmailReplyParser.parse_reply(body) | |
return latest_reply | |
except Exception as e: | |
return f"Error: {e}" | |
# Streamlit app | |
def main(): | |
st.title("Email Latest Message Extractor") | |
st.write(""" | |
This tool extracts the latest message from a raw MIME email and removes any quoted thread or previous messages. | |
Paste the raw email in MIME format in the text area below, and the tool will display the latest message. | |
""") | |
# Input field for the raw email content | |
raw_email = st.text_area("Paste the raw MIME email content here", height=300) | |
# Button to process the input | |
if st.button("Extract Latest Message"): | |
if raw_email.strip(): | |
# Call the function to extract the latest message | |
latest_message = extract_latest_message(raw_email) | |
st.subheader("Extracted Latest Message:") | |
st.text_area("Latest Message", value=latest_message, height=200) | |
else: | |
st.warning("Please paste the raw MIME email content.") | |
if __name__ == "__main__": | |
main() |