import gradio as gr from gensim.models import TfidfModel from gensim.corpora import Dictionary from gensim.utils import simple_preprocess from gensim.parsing.preprocessing import remove_stopwords # Example texts EXAMPLES = { "Scientific Abstract": """ Compatibility of systems of linear constraints over the set of natural numbers. Criteria of compatibility of a system of linear Diophantine equations, strict inequations, and nonstrict inequations are considered. """, "News Article": """ Machine learning is revolutionizing the way we interact with technology. Artificial intelligence systems are becoming more sophisticated, enabling automated decision making and pattern recognition at unprecedented scales. """, "Technical Documentation": """ The user interface provides intuitive navigation through contextual menus and adaptive layouts. System responses are optimized for performance while maintaining high reliability standards. """ } def extract_keywords(text, num_keywords=10, scores=True, min_length=1): # Preprocess text processed_text = remove_stopwords(text.lower()) tokens = simple_preprocess(processed_text, deacc=True) # Create dictionary and corpus dictionary = Dictionary([tokens]) corpus = [dictionary.doc2bow(tokens)] # Create TF-IDF model tfidf = TfidfModel(corpus) tfidf_corpus = tfidf[corpus][0] # Sort by scores sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True) # Get top keywords and filter by length results = [] for word_id, score in sorted_keywords: word = dictionary[word_id] if len(word.split()) >= min_length: if scores: results.append(f"• {word:<30} (score: {score:.4f})") else: results.append(f"• {word}") if len(results) >= num_keywords: break return "\n".join(results) if results else "No keywords found." def load_example(example_name): return EXAMPLES.get(example_name, "") # Create Gradio interface demo = gr.Interface( fn=extract_keywords, inputs=[ gr.Textbox(lines=8, label="Input Text", placeholder="Enter your text here..."), gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Number of Keywords"), gr.Checkbox(label="Show Scores", value=True), gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Minimum Words per Keyword") ], outputs=gr.Textbox(label="Extracted Keywords", lines=10), title="📑 Keyword Extraction", description="Extract keywords using TF-IDF scoring", examples=[ [EXAMPLES["Scientific Abstract"], 10, True, 1], [EXAMPLES["News Article"], 5, True, 1], [EXAMPLES["Technical Documentation"], 8, False, 1] ] ) demo.launch(share=True)