Spaces:
Sleeping
Sleeping
pizzagatakasugi
commited on
Commit
·
1081955
1
Parent(s):
565028a
Update tools.py
Browse files
tools.py
CHANGED
@@ -159,3 +159,18 @@ def add_symbol(df,column):
|
|
159 |
df[column].iloc[x] = kif
|
160 |
kif = ""
|
161 |
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
df[column].iloc[x] = kif
|
160 |
kif = ""
|
161 |
return df
|
162 |
+
|
163 |
+
def preprocess_data(
|
164 |
+
data: pd.DataFrame, tokenizer: PreTrainedTokenizer
|
165 |
+
) -> BatchEncoding:
|
166 |
+
"""データの前処理"""
|
167 |
+
# 入力文字列のトークナイゼーションを行う
|
168 |
+
inputs = tokenizer(
|
169 |
+
data["input"], max_length=512, truncation=True
|
170 |
+
)
|
171 |
+
# 正解文字列のトークナイゼーションを行う
|
172 |
+
# 正解文字列はトークンIDのみ使用する
|
173 |
+
inputs["labels"] = tokenizer(
|
174 |
+
data["output"], max_length=512, truncation=True
|
175 |
+
)["input_ids"]
|
176 |
+
return inputs
|