|
import gradio |
|
from transformers import pipeline |
|
|
|
|
|
def merge_split_tokens(tokens): |
|
merged_tokens = [] |
|
for token in tokens: |
|
if token["word"].startswith('##'): |
|
merged_tokens[-1]["word"] += token["word"][2:] |
|
else: |
|
merged_tokens.append(token) |
|
return merged_tokens |
|
|
|
def process_swedish_text(text): |
|
|
|
|
|
nlp = pipeline('ner', model='KBLab/bert-base-swedish-cased-ner', tokenizer='KBLab/bert-base-swedish-cased-ner') |
|
|
|
nlp_results = nlp(text) |
|
print('nlp_results:', nlp_results) |
|
nlp_results_merged = merge_split_tokens(nlp_results) |
|
|
|
nlp_results_adjusted = map(lambda entity: dict(entity, **{ 'score': float(entity['score']) }), nlp_results_merged) |
|
print('nlp_results_adjusted:', nlp_results_adjusted) |
|
|
|
return {'entities': list(nlp_results_adjusted)} |
|
|
|
gradio_interface = gradio.Interface( |
|
fn=process_swedish_text, |
|
inputs="text", |
|
outputs="json", |
|
examples=[ |
|
["Jag heter Tom och bor i Stockholm."], |
|
["Groens malmgård är en av Stockholms malmgårdar, belägen vid Malmgårdsvägen 53 på Södermalm i Stockholm."] |
|
], |
|
title="Swedish Entity Recognition", |
|
description="Recognizing Swedish tokens e.g. locations and person names.", |
|
article="© Tom Söderlund 2022" |
|
) |
|
gradio_interface.launch() |
|
|