|
import gradio as gr |
|
from gensim.models import TfidfModel |
|
from gensim.corpora import Dictionary |
|
from gensim.utils import simple_preprocess |
|
from gensim.parsing.preprocessing import remove_stopwords |
|
|
|
|
|
EXAMPLES = { |
|
"Scientific Abstract": """ |
|
Compatibility of systems of linear constraints over the set of natural numbers. |
|
Criteria of compatibility of a system of linear Diophantine equations, strict inequations, |
|
and nonstrict inequations are considered. |
|
""", |
|
"News Article": """ |
|
Machine learning is revolutionizing the way we interact with technology. |
|
Artificial intelligence systems are becoming more sophisticated, enabling automated decision making |
|
and pattern recognition at unprecedented scales. |
|
""", |
|
"Technical Documentation": """ |
|
The user interface provides intuitive navigation through contextual menus and adaptive layouts. |
|
System responses are optimized for performance while maintaining high reliability standards. |
|
""" |
|
} |
|
|
|
def extract_keywords(text, num_keywords=10, scores=True, min_length=1): |
|
|
|
processed_text = remove_stopwords(text.lower()) |
|
tokens = simple_preprocess(processed_text, deacc=True) |
|
|
|
|
|
dictionary = Dictionary([tokens]) |
|
corpus = [dictionary.doc2bow(tokens)] |
|
|
|
|
|
tfidf = TfidfModel(corpus) |
|
tfidf_corpus = tfidf[corpus][0] |
|
|
|
|
|
sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True) |
|
|
|
|
|
results = [] |
|
for word_id, score in sorted_keywords: |
|
word = dictionary[word_id] |
|
if len(word.split()) >= min_length: |
|
if scores: |
|
results.append(f"β’ {word:<30} (score: {score:.4f})") |
|
else: |
|
results.append(f"β’ {word}") |
|
if len(results) >= num_keywords: |
|
break |
|
|
|
return "\n".join(results) if results else "No keywords found." |
|
|
|
def load_example(example_name): |
|
return EXAMPLES.get(example_name, "") |
|
|
|
|
|
demo = gr.Interface( |
|
fn=extract_keywords, |
|
inputs=[ |
|
gr.Textbox(lines=8, label="Input Text", placeholder="Enter your text here..."), |
|
gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Keywords"), |
|
gr.Checkbox(label="Show Scores", value=True), |
|
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Minimum Words per Keyword") |
|
], |
|
outputs=gr.Textbox(label="Extracted Keywords", lines=10), |
|
title="π Keyword Extraction", |
|
description="Extract keywords using TF-IDF scoring", |
|
examples=[ |
|
[EXAMPLES["Scientific Abstract"], 10, True, 1], |
|
[EXAMPLES["News Article"], 5, True, 1], |
|
[EXAMPLES["Technical Documentation"], 8, False, 1] |
|
] |
|
) |
|
|
|
demo.launch(share=True) |