Commit
ยท
849819f
1
Parent(s):
498ff0a
upload post processing
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ model = load_model("QuoQA-NLP/konec-privacy")
|
|
25 |
model.eval()
|
26 |
|
27 |
|
28 |
-
default_value = "
|
29 |
|
30 |
src_text = st.text_area(
|
31 |
"๊ฒ์ฌํ๊ณ ์ถ์ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์.",
|
@@ -74,10 +74,10 @@ def yield_df(default_value):
|
|
74 |
print(class_decoded)
|
75 |
|
76 |
label_map = {
|
77 |
-
"ADD":
|
78 |
"DN": "์งํ ์ ๋ณด",
|
79 |
"DT": "๋ ์ง ์ ๋ณด",
|
80 |
-
"LC": "
|
81 |
"OG": "๊ธฐ๊ด ์ ๋ณด",
|
82 |
"PS": "์ธ๋ช
/๋ณ๋ช
์ ๋ณด",
|
83 |
"QT": "์๋ ์ ๋ณด",
|
@@ -85,6 +85,7 @@ def yield_df(default_value):
|
|
85 |
"O": "๋น๋ฏผ๊ฐ ์ ๋ณด"
|
86 |
}
|
87 |
|
|
|
88 |
# pair tokens with prediction
|
89 |
tokenized_text = tokenizer.convert_ids_to_tokens(tokenized)
|
90 |
list_result = []
|
@@ -99,7 +100,6 @@ def yield_df(default_value):
|
|
99 |
df = pd.DataFrame(list_result)
|
100 |
# remove first and last row
|
101 |
df = df.iloc[1:-1]
|
102 |
-
st.table(df)
|
103 |
return df
|
104 |
|
105 |
def convert_df(df:pd.DataFrame):
|
@@ -113,15 +113,45 @@ def convert_json(df:pd.DataFrame):
|
|
113 |
return json_string
|
114 |
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
if src_text == "":
|
117 |
st.warning("Please **enter text** for translation")
|
118 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
st.markdown("### ๋ถ๋ฅ๋ ๋จ์ด๋ค")
|
120 |
st.header("")
|
121 |
cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
122 |
|
123 |
-
df_result
|
124 |
-
|
125 |
with c1:
|
126 |
#csvbutton = download_button(results, "results.csv", "๐ฅ Download .csv")
|
127 |
csvbutton = st.download_button(label="๐ฅ csv๋ก ๋ค์ด๋ก๋", data=convert_df(df_result), file_name= "results.csv", mime='text/csv', key='csv')
|
@@ -132,6 +162,8 @@ else:
|
|
132 |
#jsonbutton = download_button(results, "results.json", "๐ฅ Download .json")
|
133 |
jsonbutton = st.download_button(label="๐ฅ json์ผ๋ก ๋ค์ด๋ก๋", data=convert_json(df_result), file_name= "results.json", mime='application/json', key='json')
|
134 |
|
|
|
|
|
135 |
with st.expander("(์ฃผ) ์ฟผ์นด์์ด์์ด ๋ฐ๋ชจ ์ฌ์ฌ ๊ด๋ จ", expanded=True):
|
136 |
|
137 |
st.write(
|
|
|
25 |
model.eval()
|
26 |
|
27 |
|
28 |
+
default_value = "์์ง๋, ๋น๋จ ๊ฒ์ฌํ ๊ฑฐ ๊ฒฐ๊ณผ ๋์ค์
จ์ด์."
|
29 |
|
30 |
src_text = st.text_area(
|
31 |
"๊ฒ์ฌํ๊ณ ์ถ์ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์.",
|
|
|
74 |
print(class_decoded)
|
75 |
|
76 |
label_map = {
|
77 |
+
"ADD": "์ฃผ์ ์ ๋ณด",
|
78 |
"DN": "์งํ ์ ๋ณด",
|
79 |
"DT": "๋ ์ง ์ ๋ณด",
|
80 |
+
"LC": "์ฅ์ ์ ๋ณด",
|
81 |
"OG": "๊ธฐ๊ด ์ ๋ณด",
|
82 |
"PS": "์ธ๋ช
/๋ณ๋ช
์ ๋ณด",
|
83 |
"QT": "์๋ ์ ๋ณด",
|
|
|
85 |
"O": "๋น๋ฏผ๊ฐ ์ ๋ณด"
|
86 |
}
|
87 |
|
88 |
+
|
89 |
# pair tokens with prediction
|
90 |
tokenized_text = tokenizer.convert_ids_to_tokens(tokenized)
|
91 |
list_result = []
|
|
|
100 |
df = pd.DataFrame(list_result)
|
101 |
# remove first and last row
|
102 |
df = df.iloc[1:-1]
|
|
|
103 |
return df
|
104 |
|
105 |
def convert_df(df:pd.DataFrame):
|
|
|
113 |
return json_string
|
114 |
|
115 |
|
116 |
+
|
117 |
+
filtering_map = {
|
118 |
+
"์ฃผ์ ์ ๋ณด": "[์ฃผ์]",
|
119 |
+
"์งํ ์ ๋ณด": "[์งํ]",
|
120 |
+
"๋ ์ง ์ ๋ณด": "[๋ ์ง]",
|
121 |
+
"์ฅ์ ์ ๋ณด": "[์ฅ์]",
|
122 |
+
"๊ธฐ๊ด ์ ๋ณด": "[๊ธฐ๊ด]",
|
123 |
+
"์ธ๋ช
/๋ณ๋ช
์ ๋ณด": "[์ด๋ฆ]",
|
124 |
+
"์๋ ์ ๋ณด": "[์๋]",
|
125 |
+
"๊ด๊ณ ์ ๋ณด": "[๊ด๊ณ]",
|
126 |
+
"๋น๋ฏผ๊ฐ ์ ๋ณด": "[๋น๋ฏผ๊ฐ]"
|
127 |
+
}
|
128 |
+
|
129 |
if src_text == "":
|
130 |
st.warning("Please **enter text** for translation")
|
131 |
else:
|
132 |
+
df_result = yield_df(src_text)
|
133 |
+
st.markdown("### ํํฐ๋ง ๋ ๋ฌธ์ฅ")
|
134 |
+
|
135 |
+
display_result = ""
|
136 |
+
for index, row in df_result.iterrows():
|
137 |
+
token_info = row["ํํ์"]
|
138 |
+
label_info = row["์์ ๋ผ๋ฒจ"]
|
139 |
+
if label_info != "๋น๋ฏผ๊ฐ ์ ๋ณด":
|
140 |
+
token_info = filtering_map[label_info]
|
141 |
+
|
142 |
+
if "##" in token_info:
|
143 |
+
token_info = token_info.replace("##", "")
|
144 |
+
else:
|
145 |
+
token_info = " " + token_info
|
146 |
+
display_result += token_info
|
147 |
+
|
148 |
+
st.write(display_result)
|
149 |
+
|
150 |
st.markdown("### ๋ถ๋ฅ๋ ๋จ์ด๋ค")
|
151 |
st.header("")
|
152 |
cs, c1, c2, c3, cLast = st.columns([0.75, 1.5, 1.5, 1.5, 0.75])
|
153 |
|
154 |
+
st.table(df_result)
|
|
|
155 |
with c1:
|
156 |
#csvbutton = download_button(results, "results.csv", "๐ฅ Download .csv")
|
157 |
csvbutton = st.download_button(label="๐ฅ csv๋ก ๋ค์ด๋ก๋", data=convert_df(df_result), file_name= "results.csv", mime='text/csv', key='csv')
|
|
|
162 |
#jsonbutton = download_button(results, "results.json", "๐ฅ Download .json")
|
163 |
jsonbutton = st.download_button(label="๐ฅ json์ผ๋ก ๋ค์ด๋ก๋", data=convert_json(df_result), file_name= "results.json", mime='application/json', key='json')
|
164 |
|
165 |
+
|
166 |
+
|
167 |
with st.expander("(์ฃผ) ์ฟผ์นด์์ด์์ด ๋ฐ๋ชจ ์ฌ์ฌ ๊ด๋ จ", expanded=True):
|
168 |
|
169 |
st.write(
|