taishi-i commited on
Commit
896ca2b
ยท
1 Parent(s): bde3055

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -21
app.py CHANGED
@@ -3,25 +3,79 @@ import streamlit as st
3
  from transformers import pipeline
4
  from nagisa_bert import NagisaBertTokenizer
5
 
6
- tokenizer = NagisaBertTokenizer.from_pretrained("taishi-i/nagisa_bert")
7
- fill_mask = pipeline(
8
- "fill-mask",
9
- model="taishi-i/nagisa_bert",
10
- tokenizer=tokenizer
11
- )
12
-
13
- with st.form("Fill-mask"):
14
-
15
- text = st.text_area(
16
- label="[MASK]ใ‚’ๅซใ‚€ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚",
17
- value="nagisaใง[MASK]ใงใใ‚‹ใƒขใƒ‡ใƒซใงใ™",
18
- max_chars=512
 
 
 
19
  )
20
- submitted = st.form_submit_button("Submit")
21
-
22
- if submitted:
23
- if "[MASK]" in text:
24
- out = fill_mask(text)
25
- st.json(out)
26
- else:
27
- st.write("[MASK] ใ‚’ๅซใ‚€ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from transformers import pipeline
4
  from nagisa_bert import NagisaBertTokenizer
5
 
6
+
7
+ @st.cache(allow_output_mutation=True)
8
+ def load_tasks():
9
+ model_name = "taishi-i/nagisa_bert"
10
+
11
+ tokenizer = NagisaBertTokenizer.from_pretrained(model_name)
12
+ fill_mask = pipeline(
13
+ "fill-mask",
14
+ model=model_name,
15
+ tokenizer=tokenizer
16
+ )
17
+
18
+ feature_extract = pipeline(
19
+ "feature-extraction",
20
+ model=model_name,
21
+ tokenizer=tokenizer
22
  )
23
+
24
+ tasks = {
25
+ "Tokenize": tokenizer,
26
+ "Fill-mask": fill_mask,
27
+ "Feature-extraction": feature_extract
28
+ }
29
+ return tasks
30
+
31
+
32
+ task2samples = {
33
+ "Fill-mask": {
34
+ "label": "[MASK]ใ‚’ๅซใ‚€ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚",
35
+ "value": "nagisaใง[MASK]ใงใใ‚‹ใƒขใƒ‡ใƒซใงใ™"
36
+ },
37
+ "Feature-extraction": {
38
+ "label": "[CLS]ใƒˆใƒผใ‚ฏใƒณใฎใƒ™ใ‚ฏใƒˆใƒซใ‚’ๅ–ๅพ—ใ—ใพใ™ใ€‚ใƒ™ใ‚ฏใƒˆใƒซๅŒ–ใ™ใ‚‹ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚",
39
+ "value": "nagisaใงๅˆฉ็”จใงใใ‚‹ใƒขใƒ‡ใƒซใงใ™"
40
+ },
41
+ "Tokenize": {
42
+ "label": "ใƒˆใƒผใ‚ฏใƒŠใ‚คใ‚บใ™ใ‚‹ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚",
43
+ "value": "nagisaใงๅˆฉ็”จใงใใ‚‹ใƒขใƒ‡ใƒซใงใ™"
44
+ },
45
+ }
46
+
47
+
48
+ def main():
49
+ tasks = load_tasks()
50
+
51
+ task_selection = st.selectbox(
52
+ "Select task (Fill-mask, Feature-extraction, Tokenize)",
53
+ ("Fill-mask", "Feature-extraction", "Tokenize"))
54
+
55
+ with st.form("Fill-mask"):
56
+
57
+ text = st.text_area(
58
+ label=task2samples[task_selection]["label"],
59
+ value=task2samples[task_selection]["value"],
60
+ max_chars=512
61
+ )
62
+
63
+ submitted = st.form_submit_button("Submit")
64
+
65
+ if submitted:
66
+ if task_selection == "Fill-mask":
67
+ if "[MASK]" in text:
68
+ out = tasks[task_selection](text)
69
+ st.json(out)
70
+ else:
71
+ st.write("[MASK] ใ‚’ๅซใ‚€ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚")
72
+ elif task_selection == "Feature-extraction":
73
+ out = tasks[task_selection](text)[0][0]
74
+ st.code(out)
75
+ elif task_selection == "Tokenize":
76
+ out = tasks[task_selection].tokenize(text)
77
+ st.json(out)
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()