DexterSptizu's picture
Update app.py
255333a verified
import gradio as gr
from gensim.models import TfidfModel
from gensim.corpora import Dictionary
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import remove_stopwords
# Example texts
EXAMPLES = {
"Scientific Abstract": """
Compatibility of systems of linear constraints over the set of natural numbers.
Criteria of compatibility of a system of linear Diophantine equations, strict inequations,
and nonstrict inequations are considered.
""",
"News Article": """
Machine learning is revolutionizing the way we interact with technology.
Artificial intelligence systems are becoming more sophisticated, enabling automated decision making
and pattern recognition at unprecedented scales.
""",
"Technical Documentation": """
The user interface provides intuitive navigation through contextual menus and adaptive layouts.
System responses are optimized for performance while maintaining high reliability standards.
"""
}
def extract_keywords(text, num_keywords=10, scores=True, min_length=1):
# Preprocess text
processed_text = remove_stopwords(text.lower())
tokens = simple_preprocess(processed_text, deacc=True)
# Create dictionary and corpus
dictionary = Dictionary([tokens])
corpus = [dictionary.doc2bow(tokens)]
# Create TF-IDF model
tfidf = TfidfModel(corpus)
tfidf_corpus = tfidf[corpus][0]
# Sort by scores
sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True)
# Get top keywords and filter by length
results = []
for word_id, score in sorted_keywords:
word = dictionary[word_id]
if len(word.split()) >= min_length:
if scores:
results.append(f"β€’ {word:<30} (score: {score:.4f})")
else:
results.append(f"β€’ {word}")
if len(results) >= num_keywords:
break
return "\n".join(results) if results else "No keywords found."
def load_example(example_name):
return EXAMPLES.get(example_name, "")
# Create Gradio interface
demo = gr.Interface(
fn=extract_keywords,
inputs=[
gr.Textbox(lines=8, label="Input Text", placeholder="Enter your text here..."),
gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Keywords"),
gr.Checkbox(label="Show Scores", value=True),
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Minimum Words per Keyword")
],
outputs=gr.Textbox(label="Extracted Keywords", lines=10),
title="πŸ“‘ Keyword Extraction",
description="Extract keywords using TF-IDF scoring",
examples=[
[EXAMPLES["Scientific Abstract"], 10, True, 1],
[EXAMPLES["News Article"], 5, True, 1],
[EXAMPLES["Technical Documentation"], 8, False, 1]
]
)
demo.launch(share=True)