Update app.py
Browse files
app.py
CHANGED
@@ -3,5 +3,87 @@ import gradio as gr
|
|
3 |
def greet(name):
|
4 |
return "Hello " + name + "!!"
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
demo.launch()
|
|
|
|
3 |
def greet(name):
|
4 |
return "Hello " + name + "!!"
|
5 |
|
6 |
+
|
7 |
+
# Load cleaned_word_embeddings
|
8 |
+
with open("cleaned_word_embeddings.pkl", "rb") as f:
|
9 |
+
cleaned_word_embeddings = pickle.load(f)
|
10 |
+
|
11 |
+
def get_clean_sentences(text):
|
12 |
+
sentences = sent_tokenize(text)
|
13 |
+
# Remove punctuations, numbers and special characters
|
14 |
+
cleaned_sentences = []
|
15 |
+
for sentence in sentences:
|
16 |
+
cleaned_sentence = re.sub(r"\\.|[^\\'\w ]", " ", sentence)
|
17 |
+
cleaned_sentences.append(cleaned_sentence)
|
18 |
+
return cleaned_sentences
|
19 |
+
|
20 |
+
|
21 |
+
def filter_sentences(text):
|
22 |
+
cleaned_sentences = get_clean_sentences(text)
|
23 |
+
# Remove stopwords
|
24 |
+
stop_words = set(stopwords.words("english"))
|
25 |
+
filtered_sentences = []
|
26 |
+
for sentence in cleaned_sentences:
|
27 |
+
words = nltk.word_tokenize(sentence)
|
28 |
+
filtered_sentence = " ".join(
|
29 |
+
[word for word in words if word.lower() not in stop_words]
|
30 |
+
)
|
31 |
+
filtered_sentences.append(filtered_sentence)
|
32 |
+
return filtered_sentences
|
33 |
+
|
34 |
+
|
35 |
+
def get_vector_representation(text):
|
36 |
+
filtered_sentences = filter_sentences(text)
|
37 |
+
# Get vector representations for each sentence in the articles
|
38 |
+
sentence_vectors = []
|
39 |
+
for sentence in filtered_sentences:
|
40 |
+
words = sentence.split()
|
41 |
+
sentence_vector = np.zeros((25,))
|
42 |
+
if len(words) != 0:
|
43 |
+
for word in words:
|
44 |
+
if word in cleaned_word_embeddings:
|
45 |
+
sentence_vector += cleaned_word_embeddings[word]
|
46 |
+
sentence_vector /= len(words)
|
47 |
+
sentence_vectors.append(sentence_vector)
|
48 |
+
return sentence_vectors
|
49 |
+
|
50 |
+
|
51 |
+
def calculate_cosine_similarity(sentence_vectors):
|
52 |
+
flat_sentence_vectors = np.array(
|
53 |
+
[vec for sublist in sentence_vectors for vec in sublist]
|
54 |
+
).reshape(1, -1)
|
55 |
+
# Calculate cosine similarity
|
56 |
+
similarity_matrix = cosine_similarity(sentence_vectors)
|
57 |
+
return similarity_matrix
|
58 |
+
|
59 |
+
|
60 |
+
def get_scores(similarity_matrix):
|
61 |
+
# Create a graph from the similarity matrix
|
62 |
+
nx_graph = nx.from_numpy_array(similarity_matrix)
|
63 |
+
# Get scores
|
64 |
+
scores = nx.pagerank(nx_graph)
|
65 |
+
return scores
|
66 |
+
|
67 |
+
|
68 |
+
def rank_sentences(text):
|
69 |
+
sentence_vectors = get_vector_representation(text)
|
70 |
+
similarity_matrix = calculate_cosine_similarity(sentence_vectors)
|
71 |
+
scores = get_scores(similarity_matrix)
|
72 |
+
ranked_sentences = sorted(
|
73 |
+
((scores[j], sentence) for j, sentence in enumerate(sent_tokenize(text))),
|
74 |
+
reverse=True,
|
75 |
+
)
|
76 |
+
return ranked_sentences
|
77 |
+
|
78 |
+
|
79 |
+
def summarize(text):
|
80 |
+
ranked_sentences = rank_sentences(text)
|
81 |
+
summary = ""
|
82 |
+
for j in range(len(ranked_sentences)//10):
|
83 |
+
summary += ranked_sentences[j][1] + " "
|
84 |
+
return summary
|
85 |
+
|
86 |
+
|
87 |
+
demo = gr.Interface(fn=summarize, inputs="text", outputs="text")
|
88 |
demo.launch()
|
89 |
+
|