File size: 2,873 Bytes
5afcfd6 1ec2d49 5afcfd6 255333a 5afcfd6 255333a 5afcfd6 255333a 5afcfd6 f31ddfc 1ec2d49 f31ddfc 1ec2d49 f31ddfc 1ec2d49 f31ddfc 1ec2d49 5afcfd6 255333a 5afcfd6 0c62899 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import gradio as gr
from gensim.models import TfidfModel
from gensim.corpora import Dictionary
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import remove_stopwords
# Example texts
EXAMPLES = {
"Scientific Abstract": """
Compatibility of systems of linear constraints over the set of natural numbers.
Criteria of compatibility of a system of linear Diophantine equations, strict inequations,
and nonstrict inequations are considered.
""",
"News Article": """
Machine learning is revolutionizing the way we interact with technology.
Artificial intelligence systems are becoming more sophisticated, enabling automated decision making
and pattern recognition at unprecedented scales.
""",
"Technical Documentation": """
The user interface provides intuitive navigation through contextual menus and adaptive layouts.
System responses are optimized for performance while maintaining high reliability standards.
"""
}
def extract_keywords(text, num_keywords=10, scores=True, min_length=1):
# Preprocess text
processed_text = remove_stopwords(text.lower())
tokens = simple_preprocess(processed_text, deacc=True)
# Create dictionary and corpus
dictionary = Dictionary([tokens])
corpus = [dictionary.doc2bow(tokens)]
# Create TF-IDF model
tfidf = TfidfModel(corpus)
tfidf_corpus = tfidf[corpus][0]
# Sort by scores
sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True)
# Get top keywords and filter by length
results = []
for word_id, score in sorted_keywords:
word = dictionary[word_id]
if len(word.split()) >= min_length:
if scores:
results.append(f"β’ {word:<30} (score: {score:.4f})")
else:
results.append(f"β’ {word}")
if len(results) >= num_keywords:
break
return "\n".join(results) if results else "No keywords found."
def load_example(example_name):
return EXAMPLES.get(example_name, "")
# Create Gradio interface
demo = gr.Interface(
fn=extract_keywords,
inputs=[
gr.Textbox(lines=8, label="Input Text", placeholder="Enter your text here..."),
gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Keywords"),
gr.Checkbox(label="Show Scores", value=True),
gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Minimum Words per Keyword")
],
outputs=gr.Textbox(label="Extracted Keywords", lines=10),
title="π Keyword Extraction",
description="Extract keywords using TF-IDF scoring",
examples=[
[EXAMPLES["Scientific Abstract"], 10, True, 1],
[EXAMPLES["News Article"], 5, True, 1],
[EXAMPLES["Technical Documentation"], 8, False, 1]
]
)
demo.launch(share=True) |