xzuyn commited on
Commit
2789d18
·
1 Parent(s): d044f3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -2,13 +2,15 @@ from transformers import AutoTokenizer
2
  import gradio as gr
3
 
4
 
5
- gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
6
- gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
7
- llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
8
- falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")
9
- phi2_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
10
- t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
11
- mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
 
 
12
 
13
  def tokenize(input_text):
14
  gpt2_tokens = gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
@@ -21,5 +23,9 @@ def tokenize(input_text):
21
 
22
  return f"GPT-2/GPT-J: {len(gpt2_tokens)}\nGPT-NeoX: {len(gpt_neox_tokens)}\nLLaMa: {len(llama_tokens)}\nFalcon: {len(falcon_tokens)}\nPhi-2: {len(phi2_tokens)}\nT5: {len(t5_tokens)}\nMistral: {len(mistral_tokens)}"
23
 
24
- iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(lines=7), outputs="text")
25
- iface.launch()
 
 
 
 
 
2
  import gradio as gr
3
 
4
 
5
+ def load_tokenizers()
6
+ gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
7
+ gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
8
+ llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
9
+ falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")
10
+ phi2_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
11
+ t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
12
+ mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
13
+
14
 
15
  def tokenize(input_text):
16
  gpt2_tokens = gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
 
23
 
24
  return f"GPT-2/GPT-J: {len(gpt2_tokens)}\nGPT-NeoX: {len(gpt_neox_tokens)}\nLLaMa: {len(llama_tokens)}\nFalcon: {len(falcon_tokens)}\nPhi-2: {len(phi2_tokens)}\nT5: {len(t5_tokens)}\nMistral: {len(mistral_tokens)}"
25
 
26
+
27
+ if __name__ == "__main__":
28
+ load_tokenizers()
29
+
30
+ iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(lines=7), outputs="text")
31
+ iface.launch()