import streamlit as st
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
# Load model and tokenizer
path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base'
tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True, max_length=768)
model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
def tag_sentence(sentence, entities_dict):
"""
Add HTML tags to entities for visualization.
"""
all_entities = sorted(
[(e['start'], e['end'], e['entity'], e['word']) for ents in entities_dict.values() for e in ents],
key=lambda x: x[0]
)
merged_entities = []
current_entity = None
for start, end, entity_type, word in all_entities:
if current_entity is None:
current_entity = [start, end, entity_type, word]
else:
if start == current_entity[1] and entity_type == current_entity[2] and entity_type.startswith('I-'):
current_entity[1] = end
current_entity[3] += word.replace('▁', ' ')
else:
merged_entities.append(tuple(current_entity))
current_entity = [start, end, entity_type, word]
if current_entity:
merged_entities.append(tuple(current_entity))
tagged_sentence = ""
last_idx = 0
for start, end, entity_type, _ in merged_entities:
tagged_sentence += sentence[last_idx:start]
entity_tag = entity_type.replace('I-', 'B-')
tagged_sentence += f"<{entity_tag}>{sentence[start:end]}/{entity_tag}>"
last_idx = end
tagged_sentence += sentence[last_idx:]
return tagged_sentence
def perform_ner(text):
"""
Run NER pipeline and prepare results for display.
"""
entities = ner_pipeline(text)
entities_dict = {}
for entity in entities:
entity_type = entity['entity']
if entity_type not in entities_dict:
entities_dict[entity_type] = []
entities_dict[entity_type].append({
"entity": entity['entity'],
"score": entity['score'],
"index": entity['index'],
"word": entity['word'],
"start": entity['start'],
"end": entity['end']
})
tagged_sentence = tag_sentence(text, entities_dict)
return entities_dict, tagged_sentence
# Streamlit UI
st.title("CyNER 2.0 - Named Entity Recognition")
st.write("Enter text to get named entity recognition results.")
input_text = st.text_area("Input Text", "Type your text here...")
if st.button("Analyze"):
if input_text.strip():
entities_dict, tagged_sentence = perform_ner(input_text)
# Display results
st.subheader("Tagged Entities")
st.markdown(tagged_sentence, unsafe_allow_html=True)
st.subheader("Entities and Details")
st.json(entities_dict)
else:
st.warning("Please enter some text for analysis.")