Spaces:
Runtime error
Runtime error
updated email extractor
Browse files
app.py
CHANGED
@@ -135,6 +135,31 @@ def email_extractor(email_uploaded):
|
|
135 |
|
136 |
return email_body, character_cnt, url_cnt
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
# extract email body from parse email
|
140 |
def email_body_extractor(email_data):
|
@@ -363,7 +388,7 @@ if st.session_state.get('button') == True:
|
|
363 |
#uploaded_file = FileChooser(uploaded_file)
|
364 |
#bytes_data = uploaded_file.getvalue()
|
365 |
|
366 |
-
email_body, character_cnt, url_cnt =
|
367 |
|
368 |
# Start the prediction
|
369 |
# Need to solve X test issue
|
|
|
135 |
|
136 |
return email_body, character_cnt, url_cnt
|
137 |
|
138 |
+
def email_extractor_general(email_uploaded):
|
139 |
+
parse = parse_email(email_uploaded)
|
140 |
+
email_text = ''.join(parse).strip()
|
141 |
+
|
142 |
+
# get rid of non-text elements
|
143 |
+
email_text = email_text.replace('\n', '')
|
144 |
+
email_text = email_text.replace('\t', '')
|
145 |
+
email_text = email_text.replace('\r', '')
|
146 |
+
email_text = email_text.replace('</b>', '')
|
147 |
+
email_text = email_text.replace('<b>', '')
|
148 |
+
email_text = email_text.replace('\xa0', '')
|
149 |
+
|
150 |
+
# find length of URLs if any
|
151 |
+
extractor = URLExtract()
|
152 |
+
urls = extractor.find_urls(email_text)
|
153 |
+
url_cnt = len(urls)
|
154 |
+
|
155 |
+
# remove URLs and get character count
|
156 |
+
body = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', email_text)
|
157 |
+
sep = '©'
|
158 |
+
body = body.split(sep, 1)[0]
|
159 |
+
character_cnt = sum(not chr.isspace() for chr in body)
|
160 |
+
|
161 |
+
return email_text, character_cnt, url_cnt
|
162 |
+
|
163 |
|
164 |
# extract email body from parse email
|
165 |
def email_body_extractor(email_data):
|
|
|
388 |
#uploaded_file = FileChooser(uploaded_file)
|
389 |
#bytes_data = uploaded_file.getvalue()
|
390 |
|
391 |
+
email_body, character_cnt, url_cnt = email_extractor_general(uploaded_file)
|
392 |
|
393 |
# Start the prediction
|
394 |
# Need to solve X test issue
|