Spaces:

PranavaKailash
/

CyNER2.0_Cyber_Entity_Recogonizer

Sleeping

CyNER2.0_Cyber_Entity_Recogonizer / app.py

Pranava Kailash

Deploy CyNER 2.0 NER App to Hugging Face V1.0

6c7907e 25 days ago

3.1 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

	# Load model and tokenizer
	path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
	tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True, max_length=768)
	model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
	ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

	def tag_sentence(sentence, entities_dict):
	"""
	Add HTML tags to entities for visualization.
	"""
	all_entities = sorted(
	[(e['start'], e['end'], e['entity'], e['word']) for ents in entities_dict.values() for e in ents],
	key=lambda x: x[0]
	)

	merged_entities = []
	current_entity = None

	for start, end, entity_type, word in all_entities:
	if current_entity is None:
	current_entity = [start, end, entity_type, word]
	else:
	if start == current_entity[1] and entity_type == current_entity[2] and entity_type.startswith('I-'):
	current_entity[1] = end
	current_entity[3] += word.replace('▁', ' ')
	else:
	merged_entities.append(tuple(current_entity))
	current_entity = [start, end, entity_type, word]

	if current_entity:
	merged_entities.append(tuple(current_entity))

	tagged_sentence = ""
	last_idx = 0

	for start, end, entity_type, _ in merged_entities:
	tagged_sentence += sentence[last_idx:start]
	entity_tag = entity_type.replace('I-', 'B-')
	tagged_sentence += f"<span style='color:blue'><{entity_tag}></span>{sentence[start:end]}<span style='color:blue'>/{entity_tag}></span>"
	last_idx = end

	tagged_sentence += sentence[last_idx:]
	return tagged_sentence

	def perform_ner(text):
	"""
	Run NER pipeline and prepare results for display.
	"""
	entities = ner_pipeline(text)
	entities_dict = {}
	for entity in entities:
	entity_type = entity['entity']
	if entity_type not in entities_dict:
	entities_dict[entity_type] = []
	entities_dict[entity_type].append({
	"entity": entity['entity'],
	"score": entity['score'],
	"index": entity['index'],
	"word": entity['word'],
	"start": entity['start'],
	"end": entity['end']
	})

	tagged_sentence = tag_sentence(text, entities_dict)
	return entities_dict, tagged_sentence

	# Streamlit UI
	st.title("CyNER 2.0 - Named Entity Recognition")
	st.write("Enter text to get named entity recognition results.")

	input_text = st.text_area("Input Text", "Type your text here...")

	if st.button("Analyze"):
	if input_text.strip():
	entities_dict, tagged_sentence = perform_ner(input_text)

	# Display results
	st.subheader("Tagged Entities")
	st.markdown(tagged_sentence, unsafe_allow_html=True)

	st.subheader("Entities and Details")
	st.json(entities_dict)
	else:
	st.warning("Please enter some text for analysis.")