Spaces:
Sleeping
Sleeping
joyinning
commited on
Commit
·
7769f4e
1
Parent(s):
3d1e4e5
Fix all files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .DS_Store +0 -0
- app.py +1 -9
- model_utils.py +37 -37
- models/bert_ner/checkpoint-2398/config.json +64 -0
- models/bert_ner/checkpoint-2398/optimizer.pt +3 -0
- models/bert_ner/checkpoint-2398/rng_state.pth +3 -0
- models/bert_ner/checkpoint-2398/scheduler.pt +3 -0
- models/bert_ner/checkpoint-2398/special_tokens_map.json +7 -0
- models/bert_ner/checkpoint-2398/tokenizer.json +0 -0
- models/bert_ner/checkpoint-2398/tokenizer_config.json +55 -0
- models/bert_ner/checkpoint-2398/trainer_state.json +73 -0
- models/bert_ner/checkpoint-2398/training_args.bin +3 -0
- models/bert_ner/checkpoint-2398/vocab.txt +0 -0
- models/bert_ner/checkpoint-4796/config.json +64 -0
- models/bert_ner/checkpoint-4796/rng_state.pth +3 -0
- models/bert_ner/checkpoint-4796/scheduler.pt +3 -0
- models/bert_ner/checkpoint-4796/special_tokens_map.json +7 -0
- models/bert_ner/checkpoint-4796/tokenizer.json +0 -0
- models/bert_ner/checkpoint-4796/tokenizer_config.json +55 -0
- models/bert_ner/checkpoint-4796/trainer_state.json +120 -0
- models/bert_ner/checkpoint-4796/training_args.bin +3 -0
- models/bert_ner/checkpoint-4796/vocab.txt +0 -0
- models/bert_ner/checkpoint-7194/config.json +64 -0
- models/bert_ner/checkpoint-7194/optimizer.pt +3 -0
- models/bert_ner/checkpoint-7194/rng_state.pth +3 -0
- models/bert_ner/checkpoint-7194/scheduler.pt +3 -0
- models/bert_ner/checkpoint-7194/special_tokens_map.json +7 -0
- models/bert_ner/checkpoint-7194/tokenizer.json +0 -0
- models/bert_ner/checkpoint-7194/tokenizer_config.json +55 -0
- models/bert_ner/checkpoint-7194/trainer_state.json +167 -0
- models/bert_ner/checkpoint-7194/training_args.bin +3 -0
- models/bert_ner/checkpoint-7194/vocab.txt +0 -0
- models/bert_ner/config.json +64 -0
- models/bert_ner/runs/Jul06_21-47-25_04df247716ce/events.out.tfevents.1720302447.04df247716ce.1751.0 +3 -0
- models/bert_ner/runs/Jul07_02-48-39_3cedd31b78f5/events.out.tfevents.1720320522.3cedd31b78f5.1699.0 +3 -0
- models/bert_ner/special_tokens_map.json +7 -0
- models/bert_ner/tokenizer.json +0 -0
- models/bert_ner/tokenizer_config.json +55 -0
- models/bert_ner/training_args.bin +3 -0
- models/bert_ner/vocab.txt +0 -0
- models/bilstm_ner/checkpoint-11990/rng_state.pth +3 -0
- models/bilstm_ner/checkpoint-11990/scheduler.pt +3 -0
- models/bilstm_ner/checkpoint-11990/special_tokens_map.json +7 -0
- models/bilstm_ner/checkpoint-11990/tokenizer.json +0 -0
- models/bilstm_ner/checkpoint-11990/tokenizer_config.json +55 -0
- models/bilstm_ner/checkpoint-11990/trainer_state.json +254 -0
- models/bilstm_ner/checkpoint-11990/training_args.bin +3 -0
- models/bilstm_ner/checkpoint-11990/vocab.txt +0 -0
- models/bilstm_ner/checkpoint-2398/rng_state.pth +3 -0
- models/bilstm_ner/checkpoint-2398/scheduler.pt +3 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
app.py
CHANGED
@@ -1,11 +1,3 @@
|
|
1 |
-
# -*- coding: utf-8 -*-
|
2 |
-
"""app
|
3 |
-
|
4 |
-
Automatically generated by Colab.
|
5 |
-
|
6 |
-
Original file is located at
|
7 |
-
https://colab.research.google.com/drive/1Glbl7TT2ZahRqXHGYp9J3zH5U4ZB0Dsd
|
8 |
-
"""
|
9 |
|
10 |
import gradio as gr
|
11 |
from model_utils import load_models, extract_information, predict_tags, extract_4w_qa, generate_why_or_how_question_and_answer
|
@@ -35,4 +27,4 @@ iface = gr.Interface(
|
|
35 |
outputs="text",
|
36 |
title="Information Extraction Chatbot"
|
37 |
)
|
38 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
import gradio as gr
|
3 |
from model_utils import load_models, extract_information, predict_tags, extract_4w_qa, generate_why_or_how_question_and_answer
|
|
|
27 |
outputs="text",
|
28 |
title="Information Extraction Chatbot"
|
29 |
)
|
30 |
+
iface.launch()
|
model_utils.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import torch
|
2 |
-
from transformers import BertTokenizer,
|
3 |
import pickle # for saving and loading Python objects
|
4 |
from openai import OpenAI
|
5 |
import tiktoken
|
@@ -65,36 +65,61 @@ class BiLSTMForTokenClassification(nn.Module):
|
|
65 |
|
66 |
# Load custom BiLSTM and pre-trained BERT
|
67 |
def load_models():
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
bert_model.eval()
|
70 |
|
71 |
-
with open('
|
72 |
bilstm_model = pickle.load(f)
|
|
|
73 |
|
74 |
return bert_model, bilstm_model
|
75 |
|
76 |
def load_custom_model(model_dir, tokenizer_dir, id2label):
|
77 |
-
config = AutoConfig.from_pretrained(model_dir
|
78 |
config.id2label = id2label
|
79 |
config.num_labels = len(id2label)
|
80 |
|
81 |
model = BiLSTMForTokenClassification(model_name=config._name_or_path, num_labels=config.num_labels)
|
82 |
model.config.id2label = id2label
|
83 |
model.load_state_dict(torch.load(os.path.join(model_dir, 'pytorch_model.bin'), map_location=torch.device('cpu')))
|
84 |
-
tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir
|
85 |
|
86 |
return model, tokenizer
|
87 |
|
|
|
|
|
|
|
|
|
|
|
88 |
ner_model_dir = "joyinning/chatbot-info-extraction/models/bilstm_ner"
|
89 |
tokenizer_dir = "joyinning/chatbot-info-extraction/models/tokenizer"
|
90 |
-
id2label_ner = {0: 'O', 1: 'I-art', 2: 'B-org', 3: 'B-geo', 4: 'I-per', 5: 'B-eve', 6: 'I-geo', 7: 'B-per', 8: 'I-nat', 9: 'B-art', 10: 'B-tim', 11: 'I-gpe', 12: 'I-tim', 13: 'B-nat', 14: 'B-gpe', 15: 'I-org', 16: 'I-eve'}
|
91 |
ner_model, ner_tokenizer = load_custom_model(ner_model_dir, tokenizer_dir, id2label_ner)
|
92 |
|
|
|
93 |
# QA model
|
94 |
qa_model = pipeline('question-answering', model='deepset/bert-base-cased-squad2')
|
95 |
|
96 |
# Function to extract information
|
97 |
def extract_information(text, bert_model, bilstm_model, ner_tokenizer, id2label_ner):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
extracted_info = {}
|
99 |
|
100 |
ner_tags = predict_tags(text, bilstm_model, ner_tokenizer, id2label_ner)
|
@@ -115,13 +140,13 @@ def predict_tags(sentence, model, tokenizer, label_map):
|
|
115 |
Predicts NER tags for a given sentence using the specified model and tokenizer.
|
116 |
|
117 |
Args:
|
118 |
-
sentence: The input sentence
|
119 |
-
model: The
|
120 |
-
tokenizer: The tokenizer used for
|
121 |
-
label_map: A dictionary mapping numerical label indices to their corresponding tags.
|
122 |
|
123 |
Returns:
|
124 |
-
A list of predicted tags for each token in the sentence.
|
125 |
"""
|
126 |
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sentence)))
|
127 |
inputs = tokenizer.encode(sentence, return_tensors='pt')
|
@@ -182,7 +207,7 @@ def count_tokens(text):
|
|
182 |
Returns:
|
183 |
The number of tokens in the text.
|
184 |
"""
|
185 |
-
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo
|
186 |
return len(encoding.encode(text))
|
187 |
|
188 |
def generate_why_or_how_question_and_answer(extracted_info, sentence):
|
@@ -229,28 +254,3 @@ def generate_why_or_how_question_and_answer(extracted_info, sentence):
|
|
229 |
else:
|
230 |
return None
|
231 |
|
232 |
-
def get_why_or_how_answer(question, context):
|
233 |
-
"""
|
234 |
-
Queries OpenAI's GPT-3.5 model to generate an answer for a given question based on the provided context.
|
235 |
-
|
236 |
-
Args:
|
237 |
-
question (str): The question to be answered.
|
238 |
-
context (str): The text context from which the answer should be extracted.
|
239 |
-
|
240 |
-
Returns:
|
241 |
-
str: The generated answer from GPT-3.5.
|
242 |
-
"""
|
243 |
-
prompt = f"Question: {question}\nContext: {context}\nAnswer:"
|
244 |
-
|
245 |
-
response = client.chat.completions.create(
|
246 |
-
model="gpt-3.5-turbo",
|
247 |
-
messages=[
|
248 |
-
{"role": "system", "content": "You are a helpful assistant."},
|
249 |
-
{"role": "user", "content": prompt},
|
250 |
-
],
|
251 |
-
max_tokens=150,
|
252 |
-
stop=None,
|
253 |
-
temperature=0.5,
|
254 |
-
)
|
255 |
-
|
256 |
-
return response.choices[0].text.strip()
|
|
|
1 |
import torch
|
2 |
+
from transformers import BertTokenizer, AutoModelForTokenClassification, pipeline
|
3 |
import pickle # for saving and loading Python objects
|
4 |
from openai import OpenAI
|
5 |
import tiktoken
|
|
|
65 |
|
66 |
# Load custom BiLSTM and pre-trained BERT
|
67 |
def load_models():
|
68 |
+
"""
|
69 |
+
Loads the pre-trained BERT model from Hugging Face Hub.
|
70 |
+
|
71 |
+
Returns:
|
72 |
+
bert_model: The loaded BERT model.
|
73 |
+
"""
|
74 |
+
bert_model = AutoModelForTokenClassification.from_pretrained("joyinning/chatbot-info-extraction/bert-model")
|
75 |
bert_model.eval()
|
76 |
|
77 |
+
with open('models/bilstm-model.pkl', 'rb') as f:
|
78 |
bilstm_model = pickle.load(f)
|
79 |
+
bilstm_model.eval()
|
80 |
|
81 |
return bert_model, bilstm_model
|
82 |
|
83 |
def load_custom_model(model_dir, tokenizer_dir, id2label):
|
84 |
+
config = AutoConfig.from_pretrained(model_dir)
|
85 |
config.id2label = id2label
|
86 |
config.num_labels = len(id2label)
|
87 |
|
88 |
model = BiLSTMForTokenClassification(model_name=config._name_or_path, num_labels=config.num_labels)
|
89 |
model.config.id2label = id2label
|
90 |
model.load_state_dict(torch.load(os.path.join(model_dir, 'pytorch_model.bin'), map_location=torch.device('cpu')))
|
91 |
+
tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir)
|
92 |
|
93 |
return model, tokenizer
|
94 |
|
95 |
+
|
96 |
+
# Load NER model and tokenizer
|
97 |
+
with open('models/id2label.pkl', 'rb') as f:
|
98 |
+
id2label_ner = pickle.load(f)
|
99 |
+
|
100 |
ner_model_dir = "joyinning/chatbot-info-extraction/models/bilstm_ner"
|
101 |
tokenizer_dir = "joyinning/chatbot-info-extraction/models/tokenizer"
|
|
|
102 |
ner_model, ner_tokenizer = load_custom_model(ner_model_dir, tokenizer_dir, id2label_ner)
|
103 |
|
104 |
+
|
105 |
# QA model
|
106 |
qa_model = pipeline('question-answering', model='deepset/bert-base-cased-squad2')
|
107 |
|
108 |
# Function to extract information
|
109 |
def extract_information(text, bert_model, bilstm_model, ner_tokenizer, id2label_ner):
|
110 |
+
"""
|
111 |
+
Extracts information from the given text using NER tags and generates 'Why' or 'How' questions with answers.
|
112 |
+
|
113 |
+
Args:
|
114 |
+
text: The input text string.
|
115 |
+
bert_model: The pre-trained BERT model for token classification.
|
116 |
+
bilstm_model: The BiLSTM model for NER tag prediction.
|
117 |
+
ner_tokenizer: The tokenizer for the BiLSTM model.
|
118 |
+
id2label_ner: A dictionary mapping numerical label indices to NER tags.
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
A dictionary containing extracted 4W information, generated question, and answer.
|
122 |
+
"""
|
123 |
extracted_info = {}
|
124 |
|
125 |
ner_tags = predict_tags(text, bilstm_model, ner_tokenizer, id2label_ner)
|
|
|
140 |
Predicts NER tags for a given sentence using the specified model and tokenizer.
|
141 |
|
142 |
Args:
|
143 |
+
sentence (str): The input sentence.
|
144 |
+
model (nn.Module): The NER model.
|
145 |
+
tokenizer: The tokenizer used for the model.
|
146 |
+
label_map (dict): A dictionary mapping numerical label indices to their corresponding tags.
|
147 |
|
148 |
Returns:
|
149 |
+
list: A list of predicted tags for each token in the sentence.
|
150 |
"""
|
151 |
tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(sentence)))
|
152 |
inputs = tokenizer.encode(sentence, return_tensors='pt')
|
|
|
207 |
Returns:
|
208 |
The number of tokens in the text.
|
209 |
"""
|
210 |
+
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
211 |
return len(encoding.encode(text))
|
212 |
|
213 |
def generate_why_or_how_question_and_answer(extracted_info, sentence):
|
|
|
254 |
else:
|
255 |
return None
|
256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/bert_ner/checkpoint-2398/config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "O",
|
14 |
+
"1": "I-art",
|
15 |
+
"2": "B-org",
|
16 |
+
"3": "B-geo",
|
17 |
+
"4": "I-per",
|
18 |
+
"5": "B-eve",
|
19 |
+
"6": "I-geo",
|
20 |
+
"7": "B-per",
|
21 |
+
"8": "I-nat",
|
22 |
+
"9": "B-art",
|
23 |
+
"10": "B-tim",
|
24 |
+
"11": "I-gpe",
|
25 |
+
"12": "I-tim",
|
26 |
+
"13": "B-nat",
|
27 |
+
"14": "B-gpe",
|
28 |
+
"15": "I-org",
|
29 |
+
"16": "I-eve"
|
30 |
+
},
|
31 |
+
"initializer_range": 0.02,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"label2id": {
|
34 |
+
"B-art": 9,
|
35 |
+
"B-eve": 5,
|
36 |
+
"B-geo": 3,
|
37 |
+
"B-gpe": 14,
|
38 |
+
"B-nat": 13,
|
39 |
+
"B-org": 2,
|
40 |
+
"B-per": 7,
|
41 |
+
"B-tim": 10,
|
42 |
+
"I-art": 1,
|
43 |
+
"I-eve": 16,
|
44 |
+
"I-geo": 6,
|
45 |
+
"I-gpe": 11,
|
46 |
+
"I-nat": 8,
|
47 |
+
"I-org": 15,
|
48 |
+
"I-per": 4,
|
49 |
+
"I-tim": 12,
|
50 |
+
"O": 0
|
51 |
+
},
|
52 |
+
"layer_norm_eps": 1e-12,
|
53 |
+
"max_position_embeddings": 512,
|
54 |
+
"model_type": "bert",
|
55 |
+
"num_attention_heads": 12,
|
56 |
+
"num_hidden_layers": 12,
|
57 |
+
"pad_token_id": 0,
|
58 |
+
"position_embedding_type": "absolute",
|
59 |
+
"torch_dtype": "float32",
|
60 |
+
"transformers_version": "4.41.2",
|
61 |
+
"type_vocab_size": 2,
|
62 |
+
"use_cache": true,
|
63 |
+
"vocab_size": 28996
|
64 |
+
}
|
models/bert_ner/checkpoint-2398/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5950d47edadb7380fe3af406651f8a9b5f39aca803a2ff5432d3f1712f06ca5
|
3 |
+
size 862028474
|
models/bert_ner/checkpoint-2398/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2f98313637d9931afab0eea749a2a46ce725c551814b476cad0a8a35f80d8eb
|
3 |
+
size 14244
|
models/bert_ner/checkpoint-2398/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c14b88bb3063f6f00c6d5e076f2a66fd269463117579a024570393667ffec534
|
3 |
+
size 1064
|
models/bert_ner/checkpoint-2398/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
models/bert_ner/checkpoint-2398/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bert_ner/checkpoint-2398/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
models/bert_ner/checkpoint-2398/trainer_state.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.09369628131389618,
|
3 |
+
"best_model_checkpoint": "/content/drive/MyDrive/chatbot_info/models/bert_ner/checkpoint-2398",
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 2398,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.2085070892410342,
|
13 |
+
"grad_norm": 1.3864336013793945,
|
14 |
+
"learning_rate": 1.8609952738393106e-05,
|
15 |
+
"loss": 0.217,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.4170141784820684,
|
20 |
+
"grad_norm": 0.8502457737922668,
|
21 |
+
"learning_rate": 1.721990547678621e-05,
|
22 |
+
"loss": 0.1147,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.6255212677231026,
|
27 |
+
"grad_norm": 1.1136561632156372,
|
28 |
+
"learning_rate": 1.5829858215179316e-05,
|
29 |
+
"loss": 0.1029,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.8340283569641368,
|
34 |
+
"grad_norm": 0.9568729400634766,
|
35 |
+
"learning_rate": 1.4439810953572422e-05,
|
36 |
+
"loss": 0.1039,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_accuracy": 0.9708774583963692,
|
42 |
+
"eval_f1": 0.8271461197438539,
|
43 |
+
"eval_loss": 0.09369628131389618,
|
44 |
+
"eval_precision": 0.8277762839472976,
|
45 |
+
"eval_recall": 0.8265169142652586,
|
46 |
+
"eval_runtime": 32.2988,
|
47 |
+
"eval_samples_per_second": 296.915,
|
48 |
+
"eval_steps_per_second": 18.577,
|
49 |
+
"step": 2398
|
50 |
+
}
|
51 |
+
],
|
52 |
+
"logging_steps": 500,
|
53 |
+
"max_steps": 7194,
|
54 |
+
"num_input_tokens_seen": 0,
|
55 |
+
"num_train_epochs": 3,
|
56 |
+
"save_steps": 500,
|
57 |
+
"stateful_callbacks": {
|
58 |
+
"TrainerControl": {
|
59 |
+
"args": {
|
60 |
+
"should_epoch_stop": false,
|
61 |
+
"should_evaluate": false,
|
62 |
+
"should_log": false,
|
63 |
+
"should_save": true,
|
64 |
+
"should_training_stop": false
|
65 |
+
},
|
66 |
+
"attributes": {}
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"total_flos": 897629122271064.0,
|
70 |
+
"train_batch_size": 16,
|
71 |
+
"trial_name": null,
|
72 |
+
"trial_params": null
|
73 |
+
}
|
models/bert_ner/checkpoint-2398/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf5db474328d3cedd3f379ef40c2ebb8edc829ff8069efb550f8b499c28ec8ba
|
3 |
+
size 5176
|
models/bert_ner/checkpoint-2398/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bert_ner/checkpoint-4796/config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "O",
|
14 |
+
"1": "I-art",
|
15 |
+
"2": "B-org",
|
16 |
+
"3": "B-geo",
|
17 |
+
"4": "I-per",
|
18 |
+
"5": "B-eve",
|
19 |
+
"6": "I-geo",
|
20 |
+
"7": "B-per",
|
21 |
+
"8": "I-nat",
|
22 |
+
"9": "B-art",
|
23 |
+
"10": "B-tim",
|
24 |
+
"11": "I-gpe",
|
25 |
+
"12": "I-tim",
|
26 |
+
"13": "B-nat",
|
27 |
+
"14": "B-gpe",
|
28 |
+
"15": "I-org",
|
29 |
+
"16": "I-eve"
|
30 |
+
},
|
31 |
+
"initializer_range": 0.02,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"label2id": {
|
34 |
+
"B-art": 9,
|
35 |
+
"B-eve": 5,
|
36 |
+
"B-geo": 3,
|
37 |
+
"B-gpe": 14,
|
38 |
+
"B-nat": 13,
|
39 |
+
"B-org": 2,
|
40 |
+
"B-per": 7,
|
41 |
+
"B-tim": 10,
|
42 |
+
"I-art": 1,
|
43 |
+
"I-eve": 16,
|
44 |
+
"I-geo": 6,
|
45 |
+
"I-gpe": 11,
|
46 |
+
"I-nat": 8,
|
47 |
+
"I-org": 15,
|
48 |
+
"I-per": 4,
|
49 |
+
"I-tim": 12,
|
50 |
+
"O": 0
|
51 |
+
},
|
52 |
+
"layer_norm_eps": 1e-12,
|
53 |
+
"max_position_embeddings": 512,
|
54 |
+
"model_type": "bert",
|
55 |
+
"num_attention_heads": 12,
|
56 |
+
"num_hidden_layers": 12,
|
57 |
+
"pad_token_id": 0,
|
58 |
+
"position_embedding_type": "absolute",
|
59 |
+
"torch_dtype": "float32",
|
60 |
+
"transformers_version": "4.41.2",
|
61 |
+
"type_vocab_size": 2,
|
62 |
+
"use_cache": true,
|
63 |
+
"vocab_size": 28996
|
64 |
+
}
|
models/bert_ner/checkpoint-4796/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b38c96385df4e3b885915a8bd628a32636b59599af4b891f9f462ba90da839b0
|
3 |
+
size 14244
|
models/bert_ner/checkpoint-4796/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:360f5ac849ec370d5845770daa6d36fecb59b359b5d9c4d86206a09fba16abdc
|
3 |
+
size 1064
|
models/bert_ner/checkpoint-4796/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
models/bert_ner/checkpoint-4796/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bert_ner/checkpoint-4796/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
models/bert_ner/checkpoint-4796/trainer_state.json
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.08877604454755783,
|
3 |
+
"best_model_checkpoint": "/content/drive/MyDrive/chatbot_info/models/bert_ner/checkpoint-4796",
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 4796,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.2085070892410342,
|
13 |
+
"grad_norm": 1.3864336013793945,
|
14 |
+
"learning_rate": 1.8609952738393106e-05,
|
15 |
+
"loss": 0.217,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.4170141784820684,
|
20 |
+
"grad_norm": 0.8502457737922668,
|
21 |
+
"learning_rate": 1.721990547678621e-05,
|
22 |
+
"loss": 0.1147,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.6255212677231026,
|
27 |
+
"grad_norm": 1.1136561632156372,
|
28 |
+
"learning_rate": 1.5829858215179316e-05,
|
29 |
+
"loss": 0.1029,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.8340283569641368,
|
34 |
+
"grad_norm": 0.9568729400634766,
|
35 |
+
"learning_rate": 1.4439810953572422e-05,
|
36 |
+
"loss": 0.1039,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_accuracy": 0.9708774583963692,
|
42 |
+
"eval_f1": 0.8271461197438539,
|
43 |
+
"eval_loss": 0.09369628131389618,
|
44 |
+
"eval_precision": 0.8277762839472976,
|
45 |
+
"eval_recall": 0.8265169142652586,
|
46 |
+
"eval_runtime": 32.2988,
|
47 |
+
"eval_samples_per_second": 296.915,
|
48 |
+
"eval_steps_per_second": 18.577,
|
49 |
+
"step": 2398
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"epoch": 1.042535446205171,
|
53 |
+
"grad_norm": 1.206861972808838,
|
54 |
+
"learning_rate": 1.3049763691965527e-05,
|
55 |
+
"loss": 0.093,
|
56 |
+
"step": 2500
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 1.2510425354462051,
|
60 |
+
"grad_norm": 0.7828037142753601,
|
61 |
+
"learning_rate": 1.1659716430358635e-05,
|
62 |
+
"loss": 0.0807,
|
63 |
+
"step": 3000
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 1.4595496246872393,
|
67 |
+
"grad_norm": 0.5838351845741272,
|
68 |
+
"learning_rate": 1.026966916875174e-05,
|
69 |
+
"loss": 0.0777,
|
70 |
+
"step": 3500
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 1.6680567139282736,
|
74 |
+
"grad_norm": 0.8385056853294373,
|
75 |
+
"learning_rate": 8.879621907144844e-06,
|
76 |
+
"loss": 0.0758,
|
77 |
+
"step": 4000
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 1.8765638031693077,
|
81 |
+
"grad_norm": 0.9494819045066833,
|
82 |
+
"learning_rate": 7.4895746455379494e-06,
|
83 |
+
"loss": 0.076,
|
84 |
+
"step": 4500
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 2.0,
|
88 |
+
"eval_accuracy": 0.9721030659338555,
|
89 |
+
"eval_f1": 0.83379981340797,
|
90 |
+
"eval_loss": 0.08877604454755783,
|
91 |
+
"eval_precision": 0.8278782531980591,
|
92 |
+
"eval_recall": 0.8398066941113299,
|
93 |
+
"eval_runtime": 31.2589,
|
94 |
+
"eval_samples_per_second": 306.793,
|
95 |
+
"eval_steps_per_second": 19.195,
|
96 |
+
"step": 4796
|
97 |
+
}
|
98 |
+
],
|
99 |
+
"logging_steps": 500,
|
100 |
+
"max_steps": 7194,
|
101 |
+
"num_input_tokens_seen": 0,
|
102 |
+
"num_train_epochs": 3,
|
103 |
+
"save_steps": 500,
|
104 |
+
"stateful_callbacks": {
|
105 |
+
"TrainerControl": {
|
106 |
+
"args": {
|
107 |
+
"should_epoch_stop": false,
|
108 |
+
"should_evaluate": false,
|
109 |
+
"should_log": false,
|
110 |
+
"should_save": true,
|
111 |
+
"should_training_stop": false
|
112 |
+
},
|
113 |
+
"attributes": {}
|
114 |
+
}
|
115 |
+
},
|
116 |
+
"total_flos": 1797436693363512.0,
|
117 |
+
"train_batch_size": 16,
|
118 |
+
"trial_name": null,
|
119 |
+
"trial_params": null
|
120 |
+
}
|
models/bert_ner/checkpoint-4796/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf5db474328d3cedd3f379ef40c2ebb8edc829ff8069efb550f8b499c28ec8ba
|
3 |
+
size 5176
|
models/bert_ner/checkpoint-4796/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bert_ner/checkpoint-7194/config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "O",
|
14 |
+
"1": "I-art",
|
15 |
+
"2": "B-org",
|
16 |
+
"3": "B-geo",
|
17 |
+
"4": "I-per",
|
18 |
+
"5": "B-eve",
|
19 |
+
"6": "I-geo",
|
20 |
+
"7": "B-per",
|
21 |
+
"8": "I-nat",
|
22 |
+
"9": "B-art",
|
23 |
+
"10": "B-tim",
|
24 |
+
"11": "I-gpe",
|
25 |
+
"12": "I-tim",
|
26 |
+
"13": "B-nat",
|
27 |
+
"14": "B-gpe",
|
28 |
+
"15": "I-org",
|
29 |
+
"16": "I-eve"
|
30 |
+
},
|
31 |
+
"initializer_range": 0.02,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"label2id": {
|
34 |
+
"B-art": 9,
|
35 |
+
"B-eve": 5,
|
36 |
+
"B-geo": 3,
|
37 |
+
"B-gpe": 14,
|
38 |
+
"B-nat": 13,
|
39 |
+
"B-org": 2,
|
40 |
+
"B-per": 7,
|
41 |
+
"B-tim": 10,
|
42 |
+
"I-art": 1,
|
43 |
+
"I-eve": 16,
|
44 |
+
"I-geo": 6,
|
45 |
+
"I-gpe": 11,
|
46 |
+
"I-nat": 8,
|
47 |
+
"I-org": 15,
|
48 |
+
"I-per": 4,
|
49 |
+
"I-tim": 12,
|
50 |
+
"O": 0
|
51 |
+
},
|
52 |
+
"layer_norm_eps": 1e-12,
|
53 |
+
"max_position_embeddings": 512,
|
54 |
+
"model_type": "bert",
|
55 |
+
"num_attention_heads": 12,
|
56 |
+
"num_hidden_layers": 12,
|
57 |
+
"pad_token_id": 0,
|
58 |
+
"position_embedding_type": "absolute",
|
59 |
+
"torch_dtype": "float32",
|
60 |
+
"transformers_version": "4.41.2",
|
61 |
+
"type_vocab_size": 2,
|
62 |
+
"use_cache": true,
|
63 |
+
"vocab_size": 28996
|
64 |
+
}
|
models/bert_ner/checkpoint-7194/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5332017e3269dc125b8bb6e7eed0bee419824364cd59fabe94efbd3cd50c9176
|
3 |
+
size 862028474
|
models/bert_ner/checkpoint-7194/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d320c3e3276e4985c96bcae477ba07a12a9e5546aae7dcb0eee2f0ea7260862
|
3 |
+
size 14244
|
models/bert_ner/checkpoint-7194/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4af7cfc18ae1e69cb2c7f43e16f571eff81ca506ca712b084a93e7fe36a8ecb2
|
3 |
+
size 1064
|
models/bert_ner/checkpoint-7194/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
models/bert_ner/checkpoint-7194/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bert_ner/checkpoint-7194/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
models/bert_ner/checkpoint-7194/trainer_state.json
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.08877604454755783,
|
3 |
+
"best_model_checkpoint": "/content/drive/MyDrive/chatbot_info/models/bert_ner/checkpoint-4796",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 7194,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.2085070892410342,
|
13 |
+
"grad_norm": 1.3864336013793945,
|
14 |
+
"learning_rate": 1.8609952738393106e-05,
|
15 |
+
"loss": 0.217,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.4170141784820684,
|
20 |
+
"grad_norm": 0.8502457737922668,
|
21 |
+
"learning_rate": 1.721990547678621e-05,
|
22 |
+
"loss": 0.1147,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.6255212677231026,
|
27 |
+
"grad_norm": 1.1136561632156372,
|
28 |
+
"learning_rate": 1.5829858215179316e-05,
|
29 |
+
"loss": 0.1029,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.8340283569641368,
|
34 |
+
"grad_norm": 0.9568729400634766,
|
35 |
+
"learning_rate": 1.4439810953572422e-05,
|
36 |
+
"loss": 0.1039,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_accuracy": 0.9708774583963692,
|
42 |
+
"eval_f1": 0.8271461197438539,
|
43 |
+
"eval_loss": 0.09369628131389618,
|
44 |
+
"eval_precision": 0.8277762839472976,
|
45 |
+
"eval_recall": 0.8265169142652586,
|
46 |
+
"eval_runtime": 32.2988,
|
47 |
+
"eval_samples_per_second": 296.915,
|
48 |
+
"eval_steps_per_second": 18.577,
|
49 |
+
"step": 2398
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"epoch": 1.042535446205171,
|
53 |
+
"grad_norm": 1.206861972808838,
|
54 |
+
"learning_rate": 1.3049763691965527e-05,
|
55 |
+
"loss": 0.093,
|
56 |
+
"step": 2500
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 1.2510425354462051,
|
60 |
+
"grad_norm": 0.7828037142753601,
|
61 |
+
"learning_rate": 1.1659716430358635e-05,
|
62 |
+
"loss": 0.0807,
|
63 |
+
"step": 3000
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 1.4595496246872393,
|
67 |
+
"grad_norm": 0.5838351845741272,
|
68 |
+
"learning_rate": 1.026966916875174e-05,
|
69 |
+
"loss": 0.0777,
|
70 |
+
"step": 3500
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 1.6680567139282736,
|
74 |
+
"grad_norm": 0.8385056853294373,
|
75 |
+
"learning_rate": 8.879621907144844e-06,
|
76 |
+
"loss": 0.0758,
|
77 |
+
"step": 4000
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 1.8765638031693077,
|
81 |
+
"grad_norm": 0.9494819045066833,
|
82 |
+
"learning_rate": 7.4895746455379494e-06,
|
83 |
+
"loss": 0.076,
|
84 |
+
"step": 4500
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 2.0,
|
88 |
+
"eval_accuracy": 0.9721030659338555,
|
89 |
+
"eval_f1": 0.83379981340797,
|
90 |
+
"eval_loss": 0.08877604454755783,
|
91 |
+
"eval_precision": 0.8278782531980591,
|
92 |
+
"eval_recall": 0.8398066941113299,
|
93 |
+
"eval_runtime": 31.2589,
|
94 |
+
"eval_samples_per_second": 306.793,
|
95 |
+
"eval_steps_per_second": 19.195,
|
96 |
+
"step": 4796
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 2.085070892410342,
|
100 |
+
"grad_norm": 0.8878109455108643,
|
101 |
+
"learning_rate": 6.099527383931054e-06,
|
102 |
+
"loss": 0.0715,
|
103 |
+
"step": 5000
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"epoch": 2.293577981651376,
|
107 |
+
"grad_norm": 1.7333062887191772,
|
108 |
+
"learning_rate": 4.70948012232416e-06,
|
109 |
+
"loss": 0.0607,
|
110 |
+
"step": 5500
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 2.5020850708924103,
|
114 |
+
"grad_norm": 0.9545063376426697,
|
115 |
+
"learning_rate": 3.319432860717265e-06,
|
116 |
+
"loss": 0.0615,
|
117 |
+
"step": 6000
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"epoch": 2.7105921601334444,
|
121 |
+
"grad_norm": 1.2574018239974976,
|
122 |
+
"learning_rate": 1.9293855991103697e-06,
|
123 |
+
"loss": 0.0616,
|
124 |
+
"step": 6500
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"epoch": 2.9190992493744785,
|
128 |
+
"grad_norm": 0.9615433812141418,
|
129 |
+
"learning_rate": 5.393383375034752e-07,
|
130 |
+
"loss": 0.0588,
|
131 |
+
"step": 7000
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 3.0,
|
135 |
+
"eval_accuracy": 0.9728451329975679,
|
136 |
+
"eval_f1": 0.838597894175663,
|
137 |
+
"eval_loss": 0.08889511227607727,
|
138 |
+
"eval_precision": 0.8326422584913983,
|
139 |
+
"eval_recall": 0.844639341328083,
|
140 |
+
"eval_runtime": 30.9059,
|
141 |
+
"eval_samples_per_second": 310.297,
|
142 |
+
"eval_steps_per_second": 19.414,
|
143 |
+
"step": 7194
|
144 |
+
}
|
145 |
+
],
|
146 |
+
"logging_steps": 500,
|
147 |
+
"max_steps": 7194,
|
148 |
+
"num_input_tokens_seen": 0,
|
149 |
+
"num_train_epochs": 3,
|
150 |
+
"save_steps": 500,
|
151 |
+
"stateful_callbacks": {
|
152 |
+
"TrainerControl": {
|
153 |
+
"args": {
|
154 |
+
"should_epoch_stop": false,
|
155 |
+
"should_evaluate": false,
|
156 |
+
"should_log": false,
|
157 |
+
"should_save": true,
|
158 |
+
"should_training_stop": true
|
159 |
+
},
|
160 |
+
"attributes": {}
|
161 |
+
}
|
162 |
+
},
|
163 |
+
"total_flos": 2693332448203128.0,
|
164 |
+
"train_batch_size": 16,
|
165 |
+
"trial_name": null,
|
166 |
+
"trial_params": null
|
167 |
+
}
|
models/bert_ner/checkpoint-7194/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf5db474328d3cedd3f379ef40c2ebb8edc829ff8069efb550f8b499c28ec8ba
|
3 |
+
size 5176
|
models/bert_ner/checkpoint-7194/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bert_ner/config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "bert-base-cased",
|
3 |
+
"architectures": [
|
4 |
+
"BertForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "O",
|
14 |
+
"1": "I-org",
|
15 |
+
"2": "I-per",
|
16 |
+
"3": "B-geo",
|
17 |
+
"4": "B-tim",
|
18 |
+
"5": "I-art",
|
19 |
+
"6": "B-per",
|
20 |
+
"7": "B-gpe",
|
21 |
+
"8": "B-org",
|
22 |
+
"9": "I-tim",
|
23 |
+
"10": "B-eve",
|
24 |
+
"11": "B-art",
|
25 |
+
"12": "I-nat",
|
26 |
+
"13": "B-nat",
|
27 |
+
"14": "I-gpe",
|
28 |
+
"15": "I-eve",
|
29 |
+
"16": "I-geo"
|
30 |
+
},
|
31 |
+
"initializer_range": 0.02,
|
32 |
+
"intermediate_size": 3072,
|
33 |
+
"label2id": {
|
34 |
+
"B-art": 11,
|
35 |
+
"B-eve": 10,
|
36 |
+
"B-geo": 3,
|
37 |
+
"B-gpe": 7,
|
38 |
+
"B-nat": 13,
|
39 |
+
"B-org": 8,
|
40 |
+
"B-per": 6,
|
41 |
+
"B-tim": 4,
|
42 |
+
"I-art": 5,
|
43 |
+
"I-eve": 15,
|
44 |
+
"I-geo": 16,
|
45 |
+
"I-gpe": 14,
|
46 |
+
"I-nat": 12,
|
47 |
+
"I-org": 1,
|
48 |
+
"I-per": 2,
|
49 |
+
"I-tim": 9,
|
50 |
+
"O": 0
|
51 |
+
},
|
52 |
+
"layer_norm_eps": 1e-12,
|
53 |
+
"max_position_embeddings": 512,
|
54 |
+
"model_type": "bert",
|
55 |
+
"num_attention_heads": 12,
|
56 |
+
"num_hidden_layers": 12,
|
57 |
+
"pad_token_id": 0,
|
58 |
+
"position_embedding_type": "absolute",
|
59 |
+
"torch_dtype": "float32",
|
60 |
+
"transformers_version": "4.41.2",
|
61 |
+
"type_vocab_size": 2,
|
62 |
+
"use_cache": true,
|
63 |
+
"vocab_size": 28996
|
64 |
+
}
|
models/bert_ner/runs/Jul06_21-47-25_04df247716ce/events.out.tfevents.1720302447.04df247716ce.1751.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e90aa08f9b37741cb5e450f80657cf1766abe7ec6a8b65f9841e96ab326c3a3e
|
3 |
+
size 10255
|
models/bert_ner/runs/Jul07_02-48-39_3cedd31b78f5/events.out.tfevents.1720320522.3cedd31b78f5.1699.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5341fa0c174ff3925dc2c323ac28e1fc000e64d58e72ba58d80888e465ec3e2f
|
3 |
+
size 10255
|
models/bert_ner/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
models/bert_ner/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bert_ner/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
models/bert_ner/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ae34742087a4014c8fd9ba1b02b3aac236943c8376ad44fe74bf1db71ba9cbe
|
3 |
+
size 5176
|
models/bert_ner/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bilstm_ner/checkpoint-11990/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1120cd6bc3f391f251ab690fbc8b73d6d6ed110a0f1cbad1bcebc7851bc89d0e
|
3 |
+
size 14244
|
models/bilstm_ner/checkpoint-11990/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:372e9f9c1f96c16718abe3028904d44d39283f17ede1f127a4aa086234e16a5e
|
3 |
+
size 1064
|
models/bilstm_ner/checkpoint-11990/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
models/bilstm_ner/checkpoint-11990/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bilstm_ner/checkpoint-11990/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
models/bilstm_ner/checkpoint-11990/trainer_state.json
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.09195627272129059,
|
3 |
+
"best_model_checkpoint": "/content/drive/MyDrive/chatbot_info/models/bilstm_ner/checkpoint-7194",
|
4 |
+
"epoch": 5.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 11990,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.2085070892410342,
|
13 |
+
"grad_norm": 1.1974992752075195,
|
14 |
+
"learning_rate": 1.9165971643035866e-05,
|
15 |
+
"loss": 0.3538,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.4170141784820684,
|
20 |
+
"grad_norm": 0.7257668972015381,
|
21 |
+
"learning_rate": 1.8331943286071728e-05,
|
22 |
+
"loss": 0.1415,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.6255212677231026,
|
27 |
+
"grad_norm": 0.9772794842720032,
|
28 |
+
"learning_rate": 1.7497914929107593e-05,
|
29 |
+
"loss": 0.121,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.8340283569641368,
|
34 |
+
"grad_norm": 0.7119922637939453,
|
35 |
+
"learning_rate": 1.6663886572143454e-05,
|
36 |
+
"loss": 0.1167,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_accuracy": 0.9700635783910071,
|
42 |
+
"eval_f1": 0.8221168895049858,
|
43 |
+
"eval_loss": 0.1039084866642952,
|
44 |
+
"eval_precision": 0.8215658235767271,
|
45 |
+
"eval_recall": 0.8226686951852514,
|
46 |
+
"eval_runtime": 33.3276,
|
47 |
+
"eval_samples_per_second": 287.75,
|
48 |
+
"eval_steps_per_second": 18.003,
|
49 |
+
"step": 2398
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"epoch": 1.042535446205171,
|
53 |
+
"grad_norm": 0.8703278303146362,
|
54 |
+
"learning_rate": 1.5829858215179316e-05,
|
55 |
+
"loss": 0.1042,
|
56 |
+
"step": 2500
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 1.2510425354462051,
|
60 |
+
"grad_norm": 0.5966465473175049,
|
61 |
+
"learning_rate": 1.499582985821518e-05,
|
62 |
+
"loss": 0.0928,
|
63 |
+
"step": 3000
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"epoch": 1.4595496246872393,
|
67 |
+
"grad_norm": 0.6188527345657349,
|
68 |
+
"learning_rate": 1.4161801501251045e-05,
|
69 |
+
"loss": 0.0894,
|
70 |
+
"step": 3500
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"epoch": 1.6680567139282736,
|
74 |
+
"grad_norm": 0.8101356625556946,
|
75 |
+
"learning_rate": 1.3327773144286907e-05,
|
76 |
+
"loss": 0.087,
|
77 |
+
"step": 4000
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 1.8765638031693077,
|
81 |
+
"grad_norm": 0.9334137439727783,
|
82 |
+
"learning_rate": 1.249374478732277e-05,
|
83 |
+
"loss": 0.0865,
|
84 |
+
"step": 4500
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 2.0,
|
88 |
+
"eval_accuracy": 0.9718397518144737,
|
89 |
+
"eval_f1": 0.8323142155768801,
|
90 |
+
"eval_loss": 0.09386646002531052,
|
91 |
+
"eval_precision": 0.8302021191345383,
|
92 |
+
"eval_recall": 0.8344370860927153,
|
93 |
+
"eval_runtime": 41.7135,
|
94 |
+
"eval_samples_per_second": 229.901,
|
95 |
+
"eval_steps_per_second": 14.384,
|
96 |
+
"step": 4796
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 2.085070892410342,
|
100 |
+
"grad_norm": 0.7590944766998291,
|
101 |
+
"learning_rate": 1.1659716430358635e-05,
|
102 |
+
"loss": 0.0819,
|
103 |
+
"step": 5000
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"epoch": 2.293577981651376,
|
107 |
+
"grad_norm": 1.2667491436004639,
|
108 |
+
"learning_rate": 1.0825688073394496e-05,
|
109 |
+
"loss": 0.0709,
|
110 |
+
"step": 5500
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"epoch": 2.5020850708924103,
|
114 |
+
"grad_norm": 0.7721763849258423,
|
115 |
+
"learning_rate": 9.99165971643036e-06,
|
116 |
+
"loss": 0.0715,
|
117 |
+
"step": 6000
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"epoch": 2.7105921601334444,
|
121 |
+
"grad_norm": 0.8234202861785889,
|
122 |
+
"learning_rate": 9.157631359466222e-06,
|
123 |
+
"loss": 0.071,
|
124 |
+
"step": 6500
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"epoch": 2.9190992493744785,
|
128 |
+
"grad_norm": 0.7711541056632996,
|
129 |
+
"learning_rate": 8.323603002502085e-06,
|
130 |
+
"loss": 0.0686,
|
131 |
+
"step": 7000
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 3.0,
|
135 |
+
"eval_accuracy": 0.9727254447614853,
|
136 |
+
"eval_f1": 0.8379830958274792,
|
137 |
+
"eval_loss": 0.09195627272129059,
|
138 |
+
"eval_precision": 0.8352820877606366,
|
139 |
+
"eval_recall": 0.840701628781099,
|
140 |
+
"eval_runtime": 39.324,
|
141 |
+
"eval_samples_per_second": 243.871,
|
142 |
+
"eval_steps_per_second": 15.258,
|
143 |
+
"step": 7194
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 3.127606338615513,
|
147 |
+
"grad_norm": 0.6545294523239136,
|
148 |
+
"learning_rate": 7.4895746455379494e-06,
|
149 |
+
"loss": 0.062,
|
150 |
+
"step": 7500
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"epoch": 3.336113427856547,
|
154 |
+
"grad_norm": 0.6319155097007751,
|
155 |
+
"learning_rate": 6.655546288573812e-06,
|
156 |
+
"loss": 0.0584,
|
157 |
+
"step": 8000
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"epoch": 3.5446205170975813,
|
161 |
+
"grad_norm": 0.9260782599449158,
|
162 |
+
"learning_rate": 5.821517931609675e-06,
|
163 |
+
"loss": 0.0586,
|
164 |
+
"step": 8500
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"epoch": 3.7531276063386154,
|
168 |
+
"grad_norm": 0.8304509520530701,
|
169 |
+
"learning_rate": 4.987489574645538e-06,
|
170 |
+
"loss": 0.0577,
|
171 |
+
"step": 9000
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"epoch": 3.96163469557965,
|
175 |
+
"grad_norm": 0.9533994793891907,
|
176 |
+
"learning_rate": 4.153461217681402e-06,
|
177 |
+
"loss": 0.0552,
|
178 |
+
"step": 9500
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"epoch": 4.0,
|
182 |
+
"eval_accuracy": 0.9727972577031349,
|
183 |
+
"eval_f1": 0.8390449750138812,
|
184 |
+
"eval_loss": 0.09282902628183365,
|
185 |
+
"eval_precision": 0.8329585042113155,
|
186 |
+
"eval_recall": 0.845221048863433,
|
187 |
+
"eval_runtime": 36.8085,
|
188 |
+
"eval_samples_per_second": 260.538,
|
189 |
+
"eval_steps_per_second": 16.301,
|
190 |
+
"step": 9592
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"epoch": 4.170141784820684,
|
194 |
+
"grad_norm": 0.8649327754974365,
|
195 |
+
"learning_rate": 3.319432860717265e-06,
|
196 |
+
"loss": 0.0499,
|
197 |
+
"step": 10000
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"epoch": 4.378648874061718,
|
201 |
+
"grad_norm": 1.5371888875961304,
|
202 |
+
"learning_rate": 2.4854045037531278e-06,
|
203 |
+
"loss": 0.0502,
|
204 |
+
"step": 10500
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 4.587155963302752,
|
208 |
+
"grad_norm": 0.7297742366790771,
|
209 |
+
"learning_rate": 1.6513761467889911e-06,
|
210 |
+
"loss": 0.0475,
|
211 |
+
"step": 11000
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"epoch": 4.795663052543786,
|
215 |
+
"grad_norm": 0.8624579310417175,
|
216 |
+
"learning_rate": 8.173477898248542e-07,
|
217 |
+
"loss": 0.0469,
|
218 |
+
"step": 11500
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"epoch": 5.0,
|
222 |
+
"eval_accuracy": 0.97306535935196,
|
223 |
+
"eval_f1": 0.8401763448521553,
|
224 |
+
"eval_loss": 0.09531266242265701,
|
225 |
+
"eval_precision": 0.8361549370678958,
|
226 |
+
"eval_recall": 0.8442366207266869,
|
227 |
+
"eval_runtime": 38.8622,
|
228 |
+
"eval_samples_per_second": 246.769,
|
229 |
+
"eval_steps_per_second": 15.439,
|
230 |
+
"step": 11990
|
231 |
+
}
|
232 |
+
],
|
233 |
+
"logging_steps": 500,
|
234 |
+
"max_steps": 11990,
|
235 |
+
"num_input_tokens_seen": 0,
|
236 |
+
"num_train_epochs": 5,
|
237 |
+
"save_steps": 500,
|
238 |
+
"stateful_callbacks": {
|
239 |
+
"TrainerControl": {
|
240 |
+
"args": {
|
241 |
+
"should_epoch_stop": false,
|
242 |
+
"should_evaluate": false,
|
243 |
+
"should_log": false,
|
244 |
+
"should_save": true,
|
245 |
+
"should_training_stop": true
|
246 |
+
},
|
247 |
+
"attributes": {}
|
248 |
+
}
|
249 |
+
},
|
250 |
+
"total_flos": 0.0,
|
251 |
+
"train_batch_size": 16,
|
252 |
+
"trial_name": null,
|
253 |
+
"trial_params": null
|
254 |
+
}
|
models/bilstm_ner/checkpoint-11990/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3894ce34a7e6a861216b2ff8719fcdd8a22cf7380c7ed3b53c563a20103b82a5
|
3 |
+
size 5176
|
models/bilstm_ner/checkpoint-11990/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
models/bilstm_ner/checkpoint-2398/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b68f00ba893f31af22b9d31607ee2aea9068594d743b9d2a05eb115093f0726
|
3 |
+
size 14244
|
models/bilstm_ner/checkpoint-2398/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8667350c70351cc7a49cc64441e72314afc7bb9f732b672a56f8eedb03ba4ead
|
3 |
+
size 1064
|