File size: 4,795 Bytes
5afcfd6
1ec2d49
 
5afcfd6
 
1ec2d49
51073fb
5afcfd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ec2d49
f31ddfc
1ec2d49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f31ddfc
1ec2d49
f31ddfc
1ec2d49
f31ddfc
 
 
 
 
 
 
1ec2d49
 
5afcfd6
f31ddfc
 
 
 
 
 
 
5afcfd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import gradio as gr
from gensim.models import TfidfModel
from gensim.corpora import Dictionary
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import remove_stopwords
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Example texts
EXAMPLES = {
    "Scientific Abstract": """
    Compatibility of systems of linear constraints over the set of natural numbers. 
    Criteria of compatibility of a system of linear Diophantine equations, strict inequations, 
    and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions 
    and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
    """,
    "News Article": """
    Machine learning is revolutionizing the way we interact with technology. 
    Artificial intelligence systems are becoming more sophisticated, enabling automated decision making 
    and pattern recognition at unprecedented scales. Deep learning algorithms continue to improve, 
    making breakthroughs in natural language processing and computer vision.
    """,
    "Technical Documentation": """
    The user interface provides intuitive navigation through contextual menus and adaptive layouts. 
    System responses are optimized for performance while maintaining high reliability standards. 
    Database connections are pooled to minimize resource overhead and maximize throughput.
    """
}

def preprocess_text(text):
    # Remove stopwords
    text = remove_stopwords(text)
    # Tokenize and clean text
    tokens = simple_preprocess(text, deacc=True)
    return ' '.join(tokens)

# Initialize text processing components
def extract_keywords(text, num_keywords=10, scores=True, min_length=1):
    # Preprocess text
    processed_text = remove_stopwords(text.lower())
    tokens = simple_preprocess(processed_text, deacc=True)
    
    # Create dictionary and corpus
    dictionary = Dictionary([tokens])
    corpus = [dictionary.doc2bow(tokens)]
    
    # Create TF-IDF model
    tfidf = TfidfModel(corpus)
    tfidf_corpus = tfidf[corpus][0]
    
    # Sort by scores
    sorted_keywords = sorted(tfidf_corpus, key=lambda x: x[1], reverse=True)
    
    # Get top keywords and filter by length
    results = []
    for word_id, score in sorted_keywords:
        word = dictionary[word_id]
        if len(word.split()) >= min_length:
            if scores:
                results.append(f"β€’ {word:<30} (score: {score:.4f})")
            else:
                results.append(f"β€’ {word}")
        if len(results) >= num_keywords:
            break
            
    return "\n".join(results) if results else "No keywords found."

# Update the interface click handler to match the function parameters
extract_btn.click(
    extract_keywords,
    inputs=[input_text, num_keywords, show_scores, min_length],
    outputs=[output_text]
)

def load_example(example_name):
    return EXAMPLES.get(example_name, "")

# Create Gradio interface
with gr.Blocks(title="Gensim Keyword Extraction") as demo:
    gr.Markdown("# πŸ“‘ Gensim Keyword Extraction")
    gr.Markdown("Extract keywords using Gensim's text processing capabilities")
    
    with gr.Row():
        with gr.Column(scale=2):
            input_text = gr.Textbox(
                label="Input Text",
                placeholder="Enter your text here...",
                lines=8
            )
            example_dropdown = gr.Dropdown(
                choices=list(EXAMPLES.keys()),
                label="Load Example Text"
            )
        
        with gr.Column(scale=1):
            ratio = gr.Slider(
                minimum=1,
                maximum=100,
                value=20,
                step=1,
                label="Keyword Ratio (%)"
            )
            
            min_length = gr.Slider(
                minimum=1,
                maximum=5,
                value=1,
                step=1,
                label="Minimum Words per Keyword"
            )
            
            show_scores = gr.Checkbox(
                label="Show Relevance Scores",
                value=True
            )
            
            extract_btn = gr.Button(
                "Extract Keywords", 
                variant="primary"
            )
    
    output_text = gr.Textbox(
        label="Extracted Keywords",
        lines=10,
        interactive=False
    )
    
    # Set up event handlers
    example_dropdown.change(
        load_example,
        inputs=[example_dropdown],
        outputs=[input_text]
    )
    
    extract_btn.click(
        extract_keywords,
        inputs=[
            input_text,
            ratio,
            show_scores,
            min_length
        ],
        outputs=[output_text]
    )

demo.launch()