ํ๊ตญ์ด ์ด๋ฆ ์ถ์ถ NER ์์ ์ํ์ ํนํ๋ ๋ชจ๋ธ์ ๋๋ค
๊ธฐ์กด์ ๋ง๋ค์๋ final_crf ์ ํ ๋จ๊ณ ๋ fine-tuning ์ํจ ๋ชจ๋ธ์ ๋๋ค. ์ข ๋ ์งง์ ์ด๋ฆ ๋ฐํ์๋ ์ด๋ฆ์ ์ ์ก์๋ด๋๋ก fine-tuning ์์ผฐ์ต๋๋ค ๊ธฐ์กด final_crf๋ ์๋ ๋งํฌ์์ ํ์ธํ ์ ์์ต๋๋ค https://huggingface.co/jinwoowef/final_crf
cuda 11.4 / python 3.8.19 ์์ ์์ฑํ์์ต๋๋ค
๋ค์์ ์ฝ๋๋ก ๋ชจ๋ธ๊ณผ ํ ํฌ๋์ด์ ๋ฅผ ๋ถ๋ฌ์ฌ ์ ์์ต๋๋ค
from transformers import AutoModelForTokenClassification, AutoTokenizer
model_name = "jinwoowef/ner_crf_plus"
model = AutoModelForTokenClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
ํ๊ตญ์ด ์ด๋ฆ ๊ฐ์ฒด๋ช ์ถ์ถ ์์
from transformers import AutoTokenizer, BertForTokenClassification, logging, pipeline
import torch
import pandas as pd
device = "cuda" if torch.cuda.is_available() else "cpu"
# NER ํ์ดํ๋ผ์ธ ์์ฑ
ner = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple",device=device)
# ๊ฒฐ๊ณผ ์ ์ฅ์ ์ํ ๋ฆฌ์คํธ
ner_results_list = []
sample_data = ## personal data
# NER ์ํ
for example_text in sample_data:
ner_results = ner(example_text)
ner_results_list.append(ner_results)
# ๋ณํ๋ ๋ฐ์ดํฐ๋ฅผ ์ ์ฅํ ๋ฆฌ์คํธ
formatted_results = []
# ๋ณํ ์์
for entry in ner_results_list:
for entity in entry:
formatted_results.append({
'NE_form': entity['word'],
'NE_label': entity['entity_group'],
'Score': entity['score'],
'NE_begin': entity['start'],
'NE_end': entity['end']
})
# ๋ณํ๋ ๊ฒฐ๊ณผ๋ฅผ ์๋ก์ด DataFrame์ผ๋ก ์ ์ฅ
ner_crf = pd.DataFrame(formatted_results)
# NER ๊ฒฐ๊ณผ ์ถ๋ ฅ
for result in ner_results_list[:5]:
for entity in result:
print(f"NE_form: {entity['word']}, NE_label: {entity['entity_group']}, Score: {entity['score']:.4f}")
์ถ๋ ฅ๋ฌผ ์์
NE_form: ๊น์์, NE_label: PS_NAME, Score: 0.9945
NE_form: ํ๊ฒฝ, NE_label: PS_NAME, Score: 0.7682
NE_form: ๊น๋ฏผ์ , NE_label: PS_NAME, Score: 0.9740
NE_form: ๊น์์ , NE_label: PS_NAME, Score: 0.9997
NE_form: ๊นํฌ๊ฒฝ, NE_label: PS_NAME, Score: 0.8500
NE_form: ๊น๋ฏธ๊ฒฝ, NE_label: PS_NAME, Score: 0.9741
NE_form: ์ค, NE_label: PS_NAME, Score: 0.6256
NE_form: ์ดํํ, NE_label: PS_NAME, Score: 0.9996
- Downloads last month
- 6