varl42 commited on
Commit
6557c9b
·
verified ·
1 Parent(s): 07ccc3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -7,6 +7,8 @@ from chromadb.utils import embedding_functions
7
 
8
  import gradio as gr
9
 
 
 
10
  #######################################################
11
 
12
  # Load the email dataset
@@ -21,9 +23,9 @@ collection = client.create_collection("enron_emails")
21
 
22
  # Add documents and IDs to the collection, using ChromaDB's built-in text encoding
23
  collection.add(
24
- documents=emails["body"].tolist()[:1000],
25
- ids=emails["file"].tolist()[:1000],
26
- metadatas=[{"source": "enron_emails"}] * len(emails[:1000]), # Optional metadata
27
  )
28
 
29
 
@@ -68,6 +70,10 @@ def summarize_documents(text_input):
68
  # Generate a summary with the model
69
  summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
70
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 
 
 
 
71
  return summary
72
  except Exception as e:
73
  return f"An error occurred while summarizing: {e}"
 
7
 
8
  import gradio as gr
9
 
10
+ import re
11
+
12
  #######################################################
13
 
14
  # Load the email dataset
 
23
 
24
  # Add documents and IDs to the collection, using ChromaDB's built-in text encoding
25
  collection.add(
26
+ documents=emails["body"].tolist()[:10000],
27
+ ids=emails["file"].tolist()[:10000],
28
+ metadatas=[{"source": "enron_emails"}] * len(emails[:10000]), # Optional metadata
29
  )
30
 
31
 
 
70
  # Generate a summary with the model
71
  summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
72
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
73
+
74
+ summary = re.sub(r"(\w+)([?!])\s", r"\1\2. ", summary) # Ensures that sentences ending in ? ! .
75
+ summary = re.sub(r"([^.?!])(?=\s+[A-Z]|$)", r"\1.", summary)
76
+
77
  return summary
78
  except Exception as e:
79
  return f"An error occurred while summarizing: {e}"