Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,8 @@ from chromadb.utils import embedding_functions
|
|
7 |
|
8 |
import gradio as gr
|
9 |
|
|
|
|
|
10 |
#######################################################
|
11 |
|
12 |
# Load the email dataset
|
@@ -21,9 +23,9 @@ collection = client.create_collection("enron_emails")
|
|
21 |
|
22 |
# Add documents and IDs to the collection, using ChromaDB's built-in text encoding
|
23 |
collection.add(
|
24 |
-
documents=emails["body"].tolist()[:
|
25 |
-
ids=emails["file"].tolist()[:
|
26 |
-
metadatas=[{"source": "enron_emails"}] * len(emails[:
|
27 |
)
|
28 |
|
29 |
|
@@ -68,6 +70,10 @@ def summarize_documents(text_input):
|
|
68 |
# Generate a summary with the model
|
69 |
summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
|
70 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
71 |
return summary
|
72 |
except Exception as e:
|
73 |
return f"An error occurred while summarizing: {e}"
|
|
|
7 |
|
8 |
import gradio as gr
|
9 |
|
10 |
+
import re
|
11 |
+
|
12 |
#######################################################
|
13 |
|
14 |
# Load the email dataset
|
|
|
23 |
|
24 |
# Add documents and IDs to the collection, using ChromaDB's built-in text encoding
|
25 |
collection.add(
|
26 |
+
documents=emails["body"].tolist()[:10000],
|
27 |
+
ids=emails["file"].tolist()[:10000],
|
28 |
+
metadatas=[{"source": "enron_emails"}] * len(emails[:10000]), # Optional metadata
|
29 |
)
|
30 |
|
31 |
|
|
|
70 |
# Generate a summary with the model
|
71 |
summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
|
72 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
73 |
+
|
74 |
+
summary = re.sub(r"(\w+)([?!])\s", r"\1\2. ", summary) # Ensures that sentences ending in ? ! .
|
75 |
+
summary = re.sub(r"([^.?!])(?=\s+[A-Z]|$)", r"\1.", summary)
|
76 |
+
|
77 |
return summary
|
78 |
except Exception as e:
|
79 |
return f"An error occurred while summarizing: {e}"
|