DexterSptizu
commited on
Commit
•
f31ddfc
1
Parent(s):
51073fb
Update app.py
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ def preprocess_text(text):
|
|
36 |
return ' '.join(tokens)
|
37 |
|
38 |
# Initialize text processing components
|
39 |
-
def extract_keywords(text, num_keywords=10, scores=True):
|
40 |
# Preprocess text
|
41 |
processed_text = remove_stopwords(text.lower())
|
42 |
tokens = simple_preprocess(processed_text, deacc=True)
|
@@ -52,17 +52,27 @@ def extract_keywords(text, num_keywords=10, scores=True):
|
|
52 |
# Sort by scores
|
53 |
sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True)
|
54 |
|
55 |
-
# Get top keywords
|
56 |
results = []
|
57 |
-
for word_id, score in sorted_keywords
|
58 |
word = dictionary[word_id]
|
59 |
-
if
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
|
64 |
return "\n".join(results) if results else "No keywords found."
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
def load_example(example_name):
|
67 |
return EXAMPLES.get(example_name, "")
|
68 |
|
|
|
36 |
return ' '.join(tokens)
|
37 |
|
38 |
# Initialize text processing components
|
39 |
+
def extract_keywords(text, num_keywords=10, scores=True, min_length=1):
|
40 |
# Preprocess text
|
41 |
processed_text = remove_stopwords(text.lower())
|
42 |
tokens = simple_preprocess(processed_text, deacc=True)
|
|
|
52 |
# Sort by scores
|
53 |
sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True)
|
54 |
|
55 |
+
# Get top keywords and filter by length
|
56 |
results = []
|
57 |
+
for word_id, score in sorted_keywords:
|
58 |
word = dictionary[word_id]
|
59 |
+
if len(word.split()) >= min_length:
|
60 |
+
if scores:
|
61 |
+
results.append(f"• {word:<30} (score: {score:.4f})")
|
62 |
+
else:
|
63 |
+
results.append(f"• {word}")
|
64 |
+
if len(results) >= num_keywords:
|
65 |
+
break
|
66 |
|
67 |
return "\n".join(results) if results else "No keywords found."
|
68 |
|
69 |
+
# Update the interface click handler to match the function parameters
|
70 |
+
extract_btn.click(
|
71 |
+
extract_keywords,
|
72 |
+
inputs=[input_text, num_keywords, show_scores, min_length],
|
73 |
+
outputs=[output_text]
|
74 |
+
)
|
75 |
+
|
76 |
def load_example(example_name):
|
77 |
return EXAMPLES.get(example_name, "")
|
78 |
|