Spaces:

DexterSptizu
/

spacy-keyword-extraction

Sleeping

App Files Files Community

DexterSptizu commited on 23 days ago

Commit

a0d2064

•

1 Parent(s): 986a219

Create app.py

Browse files

Files changed (1) hide show

app.py +136 -0

app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import gradio as gr
+import spacy
+from collections import Counter
+from string import punctuation
+# Load the English language model
+nlp = spacy.load("en_core_web_sm")
+# Example texts
+EXAMPLES = {
+    "Scientific Abstract": """
+    Compatibility of systems of linear constraints over the set of natural numbers.
+    Criteria of compatibility of a system of linear Diophantine equations, strict inequations,
+    and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions
+    and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
+    """,
+    "News Article": """
+    Machine learning is revolutionizing the way we interact with technology.
+    Artificial intelligence systems are becoming more sophisticated, enabling automated decision making
+    and pattern recognition at unprecedented scales. Deep learning algorithms continue to improve,
+    making breakthroughs in natural language processing and computer vision.
+    """,
+    "Technical Documentation": """
+    The user interface provides intuitive navigation through contextual menus and adaptive layouts.
+    System responses are optimized for performance while maintaining high reliability standards.
+    Database connections are pooled to minimize resource overhead and maximize throughput.
+    """
+}
+def extract_keywords(text, num_keywords, extraction_type, include_phrases):
+    doc = nlp(text)
+    # Remove stopwords and punctuation
+    words = [token.text.lower() for token in doc
+             if not token.is_stop and not token.is_punct and token.text.strip()]
+    # Extract noun phrases if requested
+    phrases = []
+    if include_phrases:
+        phrases = [chunk.text.lower() for chunk in doc.noun_chunks
+                  if len(chunk.text.split()) > 1]
+    # Extract keywords based on selected method
+    keywords = []
+    if extraction_type == "Nouns":
+        keywords = [token.text.lower() for token in doc
+                   if token.pos_ == "NOUN" and not token.is_stop]
+    elif extraction_type == "Named Entities":
+        keywords = [ent.text.lower() for ent in doc.ents]
+    elif extraction_type == "All Words":
+        keywords = words
+    # Combine keywords and phrases
+    all_keywords = keywords + phrases
+    # Count frequencies
+    keyword_freq = Counter(all_keywords)
+    # Sort by frequency and get top keywords
+    top_keywords = sorted(keyword_freq.items(), key=lambda x: x[1], reverse=True)[:num_keywords]
+    # Format output
+    result = []
+    for idx, (keyword, freq) in enumerate(top_keywords, 1):
+        result.append(f"{idx}. {keyword} (frequency: {freq})")
+    return "\n".join(result) if result else "No keywords found."
+def load_example(example_name):
+    return EXAMPLES.get(example_name, "")
+# Create Gradio interface
+with gr.Blocks(title="Keyword Extraction Tool") as demo:
+    gr.Markdown("# 🔍 Advanced NLP Keyword Extraction")
+    gr.Markdown("Extract keywords using spaCy's natural language processing")
+    with gr.Row():
+        with gr.Column(scale=2):
+            input_text = gr.Textbox(
+                label="Input Text",
+                placeholder="Enter your text here...",
+                lines=8
+            )
+            example_dropdown = gr.Dropdown(
+                choices=list(EXAMPLES.keys()),
+                label="Load Example Text"
+            )
+        with gr.Column(scale=1):
+            extraction_type = gr.Radio(
+                choices=["Nouns", "Named Entities", "All Words"],
+                value="Nouns",
+                label="Extraction Method"
+            )
+            include_phrases = gr.Checkbox(
+                label="Include Noun Phrases",
+                value=True
+            )
+            num_keywords = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=10,
+                step=1,
+                label="Number of Keywords"
+            )
+            extract_btn = gr.Button("Extract Keywords", variant="primary")
+    output_text = gr.Textbox(
+        label="Extracted Keywords",
+        lines=10,
+        interactive=False
+    )
+    # Set up event handlers
+    example_dropdown.change(
+        load_example,
+        inputs=[example_dropdown],
+        outputs=[input_text]
+    )
+    extract_btn.click(
+        extract_keywords,
+        inputs=[
+            input_text,
+            num_keywords,
+            extraction_type,
+            include_phrases
+        ],
+        outputs=[output_text]
+    )
+# Launch the app
+demo.launch()