Ahmad-Moiz commited on
Commit
c13fd5d
·
1 Parent(s): 8a890e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -17
app.py CHANGED
@@ -1,27 +1,84 @@
1
- import spacy
2
  import streamlit as st
 
 
 
 
 
3
 
4
- def article_summarizer(article_text, num_sentences=3):
5
- nlp = spacy.load("en_core_web_sm")
6
- doc = nlp(article_text)
7
- sentence_importance = {}
8
- for sentence in doc.sents:
9
- sentence_tokens = [token for token in sentence if not token.is_stop]
10
- sentence_rank = sum(token.rank for token in sentence_tokens)
11
- sentence_importance[sentence] = sentence_rank
12
- sorted_sentences = sorted(sentence_importance, key=lambda x: sentence_importance[x], reverse=True)
13
- summary = " ".join(str(sentence) for sentence in sorted_sentences[:num_sentences])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  return summary
15
 
 
16
  st.title("Article Summarizer")
17
-
18
- article = st.text_area("Enter your article here:")
19
- num_sentences = st.slider("Select the number of sentences for the summary:", 1, 10, 3)
20
 
21
  if st.button("Summarize"):
22
- if article:
23
- summary = article_summarizer(article, num_sentences)
24
  st.subheader("Summary:")
25
  st.write(summary)
26
  else:
27
- st.warning("Please enter an article to summarize.")
 
 
1
  import streamlit as st
2
+ import nltk
3
+ from nltk.corpus import stopwords
4
+ from nltk.cluster.util import cosine_distance
5
+ import numpy as np
6
+ import networkx as nx
7
 
8
+ # Download NLTK resources
9
+ nltk.download('punkt')
10
+ nltk.download('stopwords')
11
+
12
+ # Function to read and preprocess the article
13
+ def read_article(article):
14
+ sentences = nltk.sent_tokenize(article)
15
+ sentences = [sentence for sentence in sentences if len(sentence) > 10] # Filter out very short sentences
16
+ return sentences
17
+
18
+ # Function to compute sentence similarity based on cosine similarity
19
+ def sentence_similarity(sent1, sent2, stopwords):
20
+ words1 = nltk.word_tokenize(sent1)
21
+ words2 = nltk.word_tokenize(sent2)
22
+
23
+ words1 = [word.lower() for word in words1 if word.isalnum()]
24
+ words2 = [word.lower() for word in words2 if word.isalnum()]
25
+
26
+ all_words = list(set(words1 + words2)
27
+
28
+ vector1 = [0] * len(all_words)
29
+ vector2 = [0] * len(all_words)
30
+
31
+ for word in words1:
32
+ if word in stopwords:
33
+ continue
34
+ vector1[all_words.index(word)] += 1
35
+
36
+ for word in words2:
37
+ if word in stopwords:
38
+ continue
39
+ vector2[all_words.index(word)] += 1
40
+
41
+ return 1 - cosine_distance(vector1, vector2)
42
+
43
+ # Function to create a similarity matrix of sentences
44
+ def build_similarity_matrix(sentences, stopwords):
45
+ similarity_matrix = np.zeros((len(sentences), len(sentences))
46
+
47
+ for i in range(len(sentences)):
48
+ for j in range(len(sentences)):
49
+ if i == j: # Skip comparing a sentence to itself
50
+ continue
51
+ similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords)
52
+
53
+ return similarity_matrix
54
+
55
+ # Function to generate the article summary
56
+ def generate_summary(article, top_n=5):
57
+ sentences = read_article(article)
58
+ stop_words = set(stopwords.words('english'))
59
+ sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)
60
+
61
+ # Create a graph from the similarity matrix
62
+ sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
63
+
64
+ # Use the PageRank algorithm to rank the sentences
65
+ scores = nx.pagerank(sentence_similarity_graph)
66
+
67
+ # Sort the sentences by score
68
+ ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)
69
+
70
+ # Get the top N sentences as the summary
71
+ summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
72
  return summary
73
 
74
+ # Streamlit web app
75
  st.title("Article Summarizer")
76
+ user_article = st.text_area("Enter your article here:")
 
 
77
 
78
  if st.button("Summarize"):
79
+ if user_article:
80
+ summary = generate_summary(user_article)
81
  st.subheader("Summary:")
82
  st.write(summary)
83
  else:
84
+ st.warning("Please enter an article to summarize.")