xzuyn commited on
Commit
60a0592
1 Parent(s): 98985f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -1,15 +1,26 @@
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
4
-
5
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
6
  gptj_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
7
  gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
8
  llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
9
 
10
- def tokenize(input_text):
 
 
 
 
 
 
 
 
11
  tokens = tokenizer(input_text)["input_ids"]
12
- return f"Number of tokens: {len(tokens)}"
 
 
 
 
13
 
14
- iface = gr.Interface(fn=tokenize, inputs=gr.inputs.Textbox(lines=7), outputs="text")
15
- iface.launch()
 
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
 
4
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
5
  gptj_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6b")
6
  gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
7
  llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
8
 
9
+ tokenizers = {
10
+ "GPT-2": gpt2_tokenizer,
11
+ "GPT-J": gptj_tokenizer,
12
+ "GPT-NeoX": gpt_neox_tokenizer,
13
+ "LLaMa": llama_tokenizer
14
+ }
15
+
16
+ def tokenize(input_text, tokenizer_name):
17
+ tokenizer = tokenizers[tokenizer_name]
18
  tokens = tokenizer(input_text)["input_ids"]
19
+ return f"Number of tokens for {tokenizer_name}: {len(tokens)}"
20
+
21
+ textbox_input = gr.inputs.Textbox(lines=7)
22
+ dropdown_tokenizer = gr.inputs.Dropdown(choices=list(tokenizers.keys()), default="LLaMa")
23
+ output_text = "text"
24
 
25
+ iface = gr.Interface(fn=tokenize, inputs=[textbox_input, dropdown_tokenizer], outputs=output_text)
26
+ iface.launch()