Spaces:

DexterSptizu
/

spacy-keyword-extraction

Sleeping

App Files Files Community

spacy-keyword-extraction / app.py

DexterSptizu

Create app.py

a0d2064 verified 24 days ago

raw

history blame

4.64 kB

	import gradio as gr
	import spacy
	from collections import Counter
	from string import punctuation

	# Load the English language model
	nlp = spacy.load("en_core_web_sm")

	# Example texts
	EXAMPLES = {
	"Scientific Abstract": """
	Compatibility of systems of linear constraints over the set of natural numbers.
	Criteria of compatibility of a system of linear Diophantine equations, strict inequations,
	and nonstrict inequations are considered. Upper bounds for components of a minimal set of solutions
	and algorithms of construction of minimal generating sets of solutions for all types of systems are given.
	""",
	"News Article": """
	Machine learning is revolutionizing the way we interact with technology.
	Artificial intelligence systems are becoming more sophisticated, enabling automated decision making
	and pattern recognition at unprecedented scales. Deep learning algorithms continue to improve,
	making breakthroughs in natural language processing and computer vision.
	""",
	"Technical Documentation": """
	The user interface provides intuitive navigation through contextual menus and adaptive layouts.
	System responses are optimized for performance while maintaining high reliability standards.
	Database connections are pooled to minimize resource overhead and maximize throughput.
	"""
	}

	def extract_keywords(text, num_keywords, extraction_type, include_phrases):
	doc = nlp(text)

	# Remove stopwords and punctuation
	words = [token.text.lower() for token in doc
	if not token.is_stop and not token.is_punct and token.text.strip()]

	# Extract noun phrases if requested
	phrases = []
	if include_phrases:
	phrases = [chunk.text.lower() for chunk in doc.noun_chunks
	if len(chunk.text.split()) > 1]

	# Extract keywords based on selected method
	keywords = []
	if extraction_type == "Nouns":
	keywords = [token.text.lower() for token in doc
	if token.pos_ == "NOUN" and not token.is_stop]
	elif extraction_type == "Named Entities":
	keywords = [ent.text.lower() for ent in doc.ents]
	elif extraction_type == "All Words":
	keywords = words

	# Combine keywords and phrases
	all_keywords = keywords + phrases

	# Count frequencies
	keyword_freq = Counter(all_keywords)

	# Sort by frequency and get top keywords
	top_keywords = sorted(keyword_freq.items(), key=lambda x: x[1], reverse=True)[:num_keywords]

	# Format output
	result = []
	for idx, (keyword, freq) in enumerate(top_keywords, 1):
	result.append(f"{idx}. {keyword} (frequency: {freq})")

	return "\n".join(result) if result else "No keywords found."

	def load_example(example_name):
	return EXAMPLES.get(example_name, "")

	# Create Gradio interface
	with gr.Blocks(title="Keyword Extraction Tool") as demo:
	gr.Markdown("# 🔍 Advanced NLP Keyword Extraction")
	gr.Markdown("Extract keywords using spaCy's natural language processing")

	with gr.Row():
	with gr.Column(scale=2):
	input_text = gr.Textbox(
	label="Input Text",
	placeholder="Enter your text here...",
	lines=8
	)
	example_dropdown = gr.Dropdown(
	choices=list(EXAMPLES.keys()),
	label="Load Example Text"
	)

	with gr.Column(scale=1):
	extraction_type = gr.Radio(
	choices=["Nouns", "Named Entities", "All Words"],
	value="Nouns",
	label="Extraction Method"
	)

	include_phrases = gr.Checkbox(
	label="Include Noun Phrases",
	value=True
	)

	num_keywords = gr.Slider(
	minimum=1,
	maximum=20,
	value=10,
	step=1,
	label="Number of Keywords"
	)

	extract_btn = gr.Button("Extract Keywords", variant="primary")

	output_text = gr.Textbox(
	label="Extracted Keywords",
	lines=10,
	interactive=False
	)

	# Set up event handlers
	example_dropdown.change(
	load_example,
	inputs=[example_dropdown],
	outputs=[input_text]
	)

	extract_btn.click(
	extract_keywords,
	inputs=[
	input_text,
	num_keywords,
	extraction_type,
	include_phrases
	],
	outputs=[output_text]
	)

	# Launch the app
	demo.launch()