DexterSptizu
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -3,39 +3,25 @@ from gensim.models import TfidfModel
|
|
3 |
from gensim.corpora import Dictionary
|
4 |
from gensim.utils import simple_preprocess
|
5 |
from gensim.parsing.preprocessing import remove_stopwords
|
6 |
-
import numpy as np
|
7 |
-
import warnings
|
8 |
-
warnings.filterwarnings('ignore')
|
9 |
|
10 |
# Example texts
|
11 |
EXAMPLES = {
|
12 |
"Scientific Abstract": """
|
13 |
Compatibility of systems of linear constraints over the set of natural numbers.
|
14 |
Criteria of compatibility of a system of linear Diophantine equations, strict inequations,
|
15 |
-
and nonstrict inequations are considered.
|
16 |
-
and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
|
17 |
""",
|
18 |
"News Article": """
|
19 |
Machine learning is revolutionizing the way we interact with technology.
|
20 |
Artificial intelligence systems are becoming more sophisticated, enabling automated decision making
|
21 |
-
and pattern recognition at unprecedented scales.
|
22 |
-
making breakthroughs in natural language processing and computer vision.
|
23 |
""",
|
24 |
"Technical Documentation": """
|
25 |
The user interface provides intuitive navigation through contextual menus and adaptive layouts.
|
26 |
-
System responses are optimized for performance while maintaining high reliability standards.
|
27 |
-
Database connections are pooled to minimize resource overhead and maximize throughput.
|
28 |
"""
|
29 |
}
|
30 |
|
31 |
-
def preprocess_text(text):
|
32 |
-
# Remove stopwords
|
33 |
-
text = remove_stopwords(text)
|
34 |
-
# Tokenize and clean text
|
35 |
-
tokens = simple_preprocess(text, deacc=True)
|
36 |
-
return ' '.join(tokens)
|
37 |
-
|
38 |
-
# Initialize text processing components
|
39 |
def extract_keywords(text, num_keywords=10, scores=True, min_length=1):
|
40 |
# Preprocess text
|
41 |
processed_text = remove_stopwords(text.lower())
|
@@ -66,64 +52,26 @@ def extract_keywords(text, num_keywords=10, scores=True, min_length=1):
|
|
66 |
|
67 |
return "\n".join(results) if results else "No keywords found."
|
68 |
|
69 |
-
# Update the interface click handler to match the function parameters
|
70 |
-
extract_btn.click(
|
71 |
-
extract_keywords,
|
72 |
-
inputs=[input_text, num_keywords, show_scores, min_length],
|
73 |
-
outputs=[output_text]
|
74 |
-
)
|
75 |
-
|
76 |
def load_example(example_name):
|
77 |
return EXAMPLES.get(example_name, "")
|
78 |
|
79 |
# Create Gradio interface
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
label="Number of Keywords"
|
98 |
-
)
|
99 |
-
|
100 |
-
show_scores = gr.Checkbox(
|
101 |
-
label="Show Scores",
|
102 |
-
value=True
|
103 |
-
)
|
104 |
-
|
105 |
-
min_length = gr.Slider(
|
106 |
-
minimum=1,
|
107 |
-
maximum=5,
|
108 |
-
value=1,
|
109 |
-
step=1,
|
110 |
-
label="Minimum Words per Keyword"
|
111 |
-
)
|
112 |
-
|
113 |
-
# Define the button here, before using it
|
114 |
-
extract_btn = gr.Button("Extract Keywords", variant="primary")
|
115 |
-
|
116 |
-
output_text = gr.Textbox(
|
117 |
-
label="Extracted Keywords",
|
118 |
-
lines=10,
|
119 |
-
interactive=False
|
120 |
-
)
|
121 |
-
|
122 |
-
# Add the click event after the button is defined
|
123 |
-
extract_btn.click(
|
124 |
-
fn=extract_keywords,
|
125 |
-
inputs=[input_text, num_keywords, show_scores, min_length],
|
126 |
-
outputs=output_text
|
127 |
-
)
|
128 |
|
129 |
demo.launch(share=True)
|
|
|
3 |
from gensim.corpora import Dictionary
|
4 |
from gensim.utils import simple_preprocess
|
5 |
from gensim.parsing.preprocessing import remove_stopwords
|
|
|
|
|
|
|
6 |
|
7 |
# Example texts
|
8 |
EXAMPLES = {
|
9 |
"Scientific Abstract": """
|
10 |
Compatibility of systems of linear constraints over the set of natural numbers.
|
11 |
Criteria of compatibility of a system of linear Diophantine equations, strict inequations,
|
12 |
+
and nonstrict inequations are considered.
|
|
|
13 |
""",
|
14 |
"News Article": """
|
15 |
Machine learning is revolutionizing the way we interact with technology.
|
16 |
Artificial intelligence systems are becoming more sophisticated, enabling automated decision making
|
17 |
+
and pattern recognition at unprecedented scales.
|
|
|
18 |
""",
|
19 |
"Technical Documentation": """
|
20 |
The user interface provides intuitive navigation through contextual menus and adaptive layouts.
|
21 |
+
System responses are optimized for performance while maintaining high reliability standards.
|
|
|
22 |
"""
|
23 |
}
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def extract_keywords(text, num_keywords=10, scores=True, min_length=1):
|
26 |
# Preprocess text
|
27 |
processed_text = remove_stopwords(text.lower())
|
|
|
52 |
|
53 |
return "\n".join(results) if results else "No keywords found."
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def load_example(example_name):
|
56 |
return EXAMPLES.get(example_name, "")
|
57 |
|
58 |
# Create Gradio interface
|
59 |
+
demo = gr.Interface(
|
60 |
+
fn=extract_keywords,
|
61 |
+
inputs=[
|
62 |
+
gr.Textbox(lines=8, label="Input Text", placeholder="Enter your text here..."),
|
63 |
+
gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Keywords"),
|
64 |
+
gr.Checkbox(label="Show Scores", value=True),
|
65 |
+
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Minimum Words per Keyword")
|
66 |
+
],
|
67 |
+
outputs=gr.Textbox(label="Extracted Keywords", lines=10),
|
68 |
+
title="π Keyword Extraction",
|
69 |
+
description="Extract keywords using TF-IDF scoring",
|
70 |
+
examples=[
|
71 |
+
[EXAMPLES["Scientific Abstract"], 10, True, 1],
|
72 |
+
[EXAMPLES["News Article"], 5, True, 1],
|
73 |
+
[EXAMPLES["Technical Documentation"], 8, False, 1]
|
74 |
+
]
|
75 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
demo.launch(share=True)
|