import streamlit as st from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline # Load model and tokenizer path_to_checkpoint = 'PranavaKailash/CyNER-2.0-DeBERTa-v3-base' tokenizer = AutoTokenizer.from_pretrained(path_to_checkpoint, use_fast=True, max_length=768) model = AutoModelForTokenClassification.from_pretrained(path_to_checkpoint) ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer) def tag_sentence(sentence, entities_dict): """ Add HTML tags to entities for visualization. """ all_entities = sorted( [(e['start'], e['end'], e['entity'], e['word']) for ents in entities_dict.values() for e in ents], key=lambda x: x[0] ) merged_entities = [] current_entity = None for start, end, entity_type, word in all_entities: if current_entity is None: current_entity = [start, end, entity_type, word] else: if start == current_entity[1] and entity_type == current_entity[2] and entity_type.startswith('I-'): current_entity[1] = end current_entity[3] += word.replace('▁', ' ') else: merged_entities.append(tuple(current_entity)) current_entity = [start, end, entity_type, word] if current_entity: merged_entities.append(tuple(current_entity)) tagged_sentence = "" last_idx = 0 for start, end, entity_type, _ in merged_entities: tagged_sentence += sentence[last_idx:start] entity_tag = entity_type.replace('I-', 'B-') tagged_sentence += f"<{entity_tag}>{sentence[start:end]}/{entity_tag}>" last_idx = end tagged_sentence += sentence[last_idx:] return tagged_sentence def perform_ner(text): """ Run NER pipeline and prepare results for display. """ entities = ner_pipeline(text) entities_dict = {} for entity in entities: entity_type = entity['entity'] if entity_type not in entities_dict: entities_dict[entity_type] = [] entities_dict[entity_type].append({ "entity": entity['entity'], "score": entity['score'], "index": entity['index'], "word": entity['word'], "start": entity['start'], "end": entity['end'] }) tagged_sentence = tag_sentence(text, entities_dict) return entities_dict, tagged_sentence # Streamlit UI st.title("CyNER 2.0 - Named Entity Recognition") st.write("Enter text to get named entity recognition results.") input_text = st.text_area("Input Text", "Type your text here...") if st.button("Analyze"): if input_text.strip(): entities_dict, tagged_sentence = perform_ner(input_text) # Display results st.subheader("Tagged Entities") st.markdown(tagged_sentence, unsafe_allow_html=True) st.subheader("Entities and Details") st.json(entities_dict) else: st.warning("Please enter some text for analysis.")