DexterSptizu commited on
Commit
a0d2064
1 Parent(s): 986a219

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spacy
3
+ from collections import Counter
4
+ from string import punctuation
5
+
6
+ # Load the English language model
7
+ nlp = spacy.load("en_core_web_sm")
8
+
9
+ # Example texts
10
+ EXAMPLES = {
11
+ "Scientific Abstract": """
12
+ Compatibility of systems of linear constraints over the set of natural numbers.
13
+ Criteria of compatibility of a system of linear Diophantine equations, strict inequations,
14
+ and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions
15
+ and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
16
+ """,
17
+ "News Article": """
18
+ Machine learning is revolutionizing the way we interact with technology.
19
+ Artificial intelligence systems are becoming more sophisticated, enabling automated decision making
20
+ and pattern recognition at unprecedented scales. Deep learning algorithms continue to improve,
21
+ making breakthroughs in natural language processing and computer vision.
22
+ """,
23
+ "Technical Documentation": """
24
+ The user interface provides intuitive navigation through contextual menus and adaptive layouts.
25
+ System responses are optimized for performance while maintaining high reliability standards.
26
+ Database connections are pooled to minimize resource overhead and maximize throughput.
27
+ """
28
+ }
29
+
30
+ def extract_keywords(text, num_keywords, extraction_type, include_phrases):
31
+ doc = nlp(text)
32
+
33
+ # Remove stopwords and punctuation
34
+ words = [token.text.lower() for token in doc
35
+ if not token.is_stop and not token.is_punct and token.text.strip()]
36
+
37
+ # Extract noun phrases if requested
38
+ phrases = []
39
+ if include_phrases:
40
+ phrases = [chunk.text.lower() for chunk in doc.noun_chunks
41
+ if len(chunk.text.split()) > 1]
42
+
43
+ # Extract keywords based on selected method
44
+ keywords = []
45
+ if extraction_type == "Nouns":
46
+ keywords = [token.text.lower() for token in doc
47
+ if token.pos_ == "NOUN" and not token.is_stop]
48
+ elif extraction_type == "Named Entities":
49
+ keywords = [ent.text.lower() for ent in doc.ents]
50
+ elif extraction_type == "All Words":
51
+ keywords = words
52
+
53
+ # Combine keywords and phrases
54
+ all_keywords = keywords + phrases
55
+
56
+ # Count frequencies
57
+ keyword_freq = Counter(all_keywords)
58
+
59
+ # Sort by frequency and get top keywords
60
+ top_keywords = sorted(keyword_freq.items(), key=lambda x: x[1], reverse=True)[:num_keywords]
61
+
62
+ # Format output
63
+ result = []
64
+ for idx, (keyword, freq) in enumerate(top_keywords, 1):
65
+ result.append(f"{idx}. {keyword} (frequency: {freq})")
66
+
67
+ return "\n".join(result) if result else "No keywords found."
68
+
69
+ def load_example(example_name):
70
+ return EXAMPLES.get(example_name, "")
71
+
72
+ # Create Gradio interface
73
+ with gr.Blocks(title="Keyword Extraction Tool") as demo:
74
+ gr.Markdown("# 🔍 Advanced NLP Keyword Extraction")
75
+ gr.Markdown("Extract keywords using spaCy's natural language processing")
76
+
77
+ with gr.Row():
78
+ with gr.Column(scale=2):
79
+ input_text = gr.Textbox(
80
+ label="Input Text",
81
+ placeholder="Enter your text here...",
82
+ lines=8
83
+ )
84
+ example_dropdown = gr.Dropdown(
85
+ choices=list(EXAMPLES.keys()),
86
+ label="Load Example Text"
87
+ )
88
+
89
+ with gr.Column(scale=1):
90
+ extraction_type = gr.Radio(
91
+ choices=["Nouns", "Named Entities", "All Words"],
92
+ value="Nouns",
93
+ label="Extraction Method"
94
+ )
95
+
96
+ include_phrases = gr.Checkbox(
97
+ label="Include Noun Phrases",
98
+ value=True
99
+ )
100
+
101
+ num_keywords = gr.Slider(
102
+ minimum=1,
103
+ maximum=20,
104
+ value=10,
105
+ step=1,
106
+ label="Number of Keywords"
107
+ )
108
+
109
+ extract_btn = gr.Button("Extract Keywords", variant="primary")
110
+
111
+ output_text = gr.Textbox(
112
+ label="Extracted Keywords",
113
+ lines=10,
114
+ interactive=False
115
+ )
116
+
117
+ # Set up event handlers
118
+ example_dropdown.change(
119
+ load_example,
120
+ inputs=[example_dropdown],
121
+ outputs=[input_text]
122
+ )
123
+
124
+ extract_btn.click(
125
+ extract_keywords,
126
+ inputs=[
127
+ input_text,
128
+ num_keywords,
129
+ extraction_type,
130
+ include_phrases
131
+ ],
132
+ outputs=[output_text]
133
+ )
134
+
135
+ # Launch the app
136
+ demo.launch()