Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,25 +3,79 @@ import streamlit as st
|
|
3 |
from transformers import pipeline
|
4 |
from nagisa_bert import NagisaBertTokenizer
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
)
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
19 |
)
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from transformers import pipeline
|
4 |
from nagisa_bert import NagisaBertTokenizer
|
5 |
|
6 |
+
|
7 |
+
@st.cache(allow_output_mutation=True)
|
8 |
+
def load_tasks():
|
9 |
+
model_name = "taishi-i/nagisa_bert"
|
10 |
+
|
11 |
+
tokenizer = NagisaBertTokenizer.from_pretrained(model_name)
|
12 |
+
fill_mask = pipeline(
|
13 |
+
"fill-mask",
|
14 |
+
model=model_name,
|
15 |
+
tokenizer=tokenizer
|
16 |
+
)
|
17 |
+
|
18 |
+
feature_extract = pipeline(
|
19 |
+
"feature-extraction",
|
20 |
+
model=model_name,
|
21 |
+
tokenizer=tokenizer
|
22 |
)
|
23 |
+
|
24 |
+
tasks = {
|
25 |
+
"Tokenize": tokenizer,
|
26 |
+
"Fill-mask": fill_mask,
|
27 |
+
"Feature-extraction": feature_extract
|
28 |
+
}
|
29 |
+
return tasks
|
30 |
+
|
31 |
+
|
32 |
+
task2samples = {
|
33 |
+
"Fill-mask": {
|
34 |
+
"label": "[MASK]ใๅซใใใญในใใๅ
ฅๅใใฆใใ ใใใ",
|
35 |
+
"value": "nagisaใง[MASK]ใงใใใขใใซใงใ"
|
36 |
+
},
|
37 |
+
"Feature-extraction": {
|
38 |
+
"label": "[CLS]ใใผใฏใณใฎใใฏใใซใๅๅพใใพใใใใฏใใซๅใใใใญในใใๅ
ฅๅใใฆใใ ใใใ",
|
39 |
+
"value": "nagisaใงๅฉ็จใงใใใขใใซใงใ"
|
40 |
+
},
|
41 |
+
"Tokenize": {
|
42 |
+
"label": "ใใผใฏใใคใบใใใใญในใใๅ
ฅๅใใฆใใ ใใใ",
|
43 |
+
"value": "nagisaใงๅฉ็จใงใใใขใใซใงใ"
|
44 |
+
},
|
45 |
+
}
|
46 |
+
|
47 |
+
|
48 |
+
def main():
|
49 |
+
tasks = load_tasks()
|
50 |
+
|
51 |
+
task_selection = st.selectbox(
|
52 |
+
"Select task (Fill-mask, Feature-extraction, Tokenize)",
|
53 |
+
("Fill-mask", "Feature-extraction", "Tokenize"))
|
54 |
+
|
55 |
+
with st.form("Fill-mask"):
|
56 |
+
|
57 |
+
text = st.text_area(
|
58 |
+
label=task2samples[task_selection]["label"],
|
59 |
+
value=task2samples[task_selection]["value"],
|
60 |
+
max_chars=512
|
61 |
+
)
|
62 |
+
|
63 |
+
submitted = st.form_submit_button("Submit")
|
64 |
+
|
65 |
+
if submitted:
|
66 |
+
if task_selection == "Fill-mask":
|
67 |
+
if "[MASK]" in text:
|
68 |
+
out = tasks[task_selection](text)
|
69 |
+
st.json(out)
|
70 |
+
else:
|
71 |
+
st.write("[MASK] ใๅซใใใญในใใๅ
ฅๅใใฆใใ ใใใ")
|
72 |
+
elif task_selection == "Feature-extraction":
|
73 |
+
out = tasks[task_selection](text)[0][0]
|
74 |
+
st.code(out)
|
75 |
+
elif task_selection == "Tokenize":
|
76 |
+
out = tasks[task_selection].tokenize(text)
|
77 |
+
st.json(out)
|
78 |
+
|
79 |
+
|
80 |
+
if __name__ == "__main__":
|
81 |
+
main()
|