import gradio as gr import spacy from botocore.exceptions import ClientError from transformers import pipeline import boto3 nlp = spacy.load("en_core_web_sm") ner_pipeline = pipeline("ner", model="Jean-Baptiste/roberta-large-ner-english", aggregation_strategy="simple", grouped_entities=True) def greet(model_type, text): if model_type == "Spacy": doc = nlp(text) pos_tokens = [] for token in doc: if token.ent_type_ != "": pos_tokens.append((token.text, token.ent_type_)) else: pos_tokens.append((token.text, None)) return pos_tokens elif model_type == "Roberta": output = ner_pipeline(text) print(output) return {"text": text, "entities": [ {"word": entity["word"], "entity": entity["entity_group"], "start": entity['start'], 'end': entity['end']} for entity in output]} elif model_type == "AWS Comprehend": client = boto3.client('comprehend') try: response = client.detect_dominant_language(Text=text) languages = response['Languages'] print("Detected %s languages.", len(languages)) language = languages[0]['LanguageCode'] except ClientError: print("Couldn't detect languages.") language = 'en' response = client.detect_entities( Text=text, LanguageCode=language) print(response) return {"text": text, "entities": [{"word": entity["Text"], "entity": entity["Type"], "start": entity['BeginOffset'], 'end': entity['EndOffset']} for entity in response["Entities"]]} description = """Compare the NER outputs of Spacy, HuggingFace Roberta and AWS Comprehend. These models are off the shelf models, which have not been finetuned. This is just to show a baseline, before we start finetuning the models. All of them can be finetuned (including AWS Comprehend). AWS Comprehend can be finetuned using Entity lists, without having to annotate full documents by hand.""" demo = gr.Interface(fn=greet, inputs=[gr.Radio(["Spacy", "Roberta", "AWS Comprehend"]), "text"], outputs="highlight", title="Comparison of NER Options", description=description, examples=[["AWS Comprehend", """We hereby issue in your favour this documentary credit which is available by negotiation of your drafts at sight drawn on L/C Openers Bank at Chennai on account of M/s.TANGEDCO Limited bearing the number, date of the documentary credit and the name of the issuing bank of this credit for 100% invoice value accompanied by the following documents."""]],) demo.launch()