Spaces:

IinjyI
/

Qissar

Runtime error

App Files Files Community

IinjyI commited on May 2

Commit

a2e8dd9

•

1 Parent(s): af38f47

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -1

app.py CHANGED Viewed

@@ -3,5 +3,87 @@ import gradio as gr
 def greet(name):
     return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 def greet(name):
     return "Hello " + name + "!!"
+# Load cleaned_word_embeddings
+with open("cleaned_word_embeddings.pkl", "rb") as f:
+    cleaned_word_embeddings = pickle.load(f)
+def get_clean_sentences(text):
+    sentences = sent_tokenize(text)
+    # Remove punctuations, numbers and special characters
+    cleaned_sentences = []
+    for sentence in sentences:
+        cleaned_sentence = re.sub(r"\\.|[^\\'\w ]", " ", sentence)
+        cleaned_sentences.append(cleaned_sentence)
+    return cleaned_sentences
+def filter_sentences(text):
+    cleaned_sentences = get_clean_sentences(text)
+    # Remove stopwords
+    stop_words = set(stopwords.words("english"))
+    filtered_sentences = []
+    for sentence in cleaned_sentences:
+        words = nltk.word_tokenize(sentence)
+        filtered_sentence = " ".join(
+            [word for word in words if word.lower() not in stop_words]
+        )
+        filtered_sentences.append(filtered_sentence)
+    return filtered_sentences
+def get_vector_representation(text):
+    filtered_sentences = filter_sentences(text)
+    # Get vector representations for each sentence in the articles
+    sentence_vectors = []
+    for sentence in filtered_sentences:
+        words = sentence.split()
+        sentence_vector = np.zeros((25,))
+        if len(words) != 0:
+            for word in words:
+                if word in cleaned_word_embeddings:
+                    sentence_vector += cleaned_word_embeddings[word]
+            sentence_vector /= len(words)
+        sentence_vectors.append(sentence_vector)
+    return sentence_vectors
+def calculate_cosine_similarity(sentence_vectors):
+    flat_sentence_vectors = np.array(
+        [vec for sublist in sentence_vectors for vec in sublist]
+    ).reshape(1, -1)
+    # Calculate cosine similarity
+    similarity_matrix = cosine_similarity(sentence_vectors)
+    return similarity_matrix
+def get_scores(similarity_matrix):
+    # Create a graph from the similarity matrix
+    nx_graph = nx.from_numpy_array(similarity_matrix)
+    # Get scores
+    scores = nx.pagerank(nx_graph)
+    return scores
+def rank_sentences(text):
+    sentence_vectors = get_vector_representation(text)
+    similarity_matrix = calculate_cosine_similarity(sentence_vectors)
+    scores = get_scores(similarity_matrix)
+    ranked_sentences = sorted(
+        ((scores[j], sentence) for j, sentence in enumerate(sent_tokenize(text))),
+        reverse=True,
+    )
+    return ranked_sentences
+def summarize(text):
+    ranked_sentences = rank_sentences(text)
+    summary = ""
+    for j in range(len(ranked_sentences)//10):
+        summary += ranked_sentences[j][1] + " "
+    return summary
+demo = gr.Interface(fn=summarize, inputs="text", outputs="text")
 demo.launch()