concedo commited on
Commit
d35776c
1 Parent(s): 60dd339

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -60
app.py CHANGED
@@ -6,58 +6,21 @@ def formatarr(input):
6
 
7
  def tokenize(input_text):
8
  llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
9
-
10
- llama3_tokens = len(
11
- llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
12
- )
13
- mistral_tokens = len(
14
- mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
15
- )
16
- gpt2_tokens = len(
17
- gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
18
- )
19
- gpt_neox_tokens = len(
20
- gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
21
- )
22
- falcon_tokens = len(
23
- falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
24
- )
25
- phi2_tokens = len(
26
- phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
27
- )
28
- t5_tokens = len(
29
- t5_tokenizer(input_text, add_special_tokens=True)["input_ids"]
30
- )
31
- gemma_tokens = len(
32
- gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"]
33
- )
34
- command_r_tokens = len(
35
- command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"]
36
- )
37
- qwen_tokens = len(
38
- qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"]
39
- )
40
- codeqwen_tokens = len(
41
- codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"]
42
- )
43
 
44
  results = {
45
- "LLaMa-1/LLaMa-2": len(llama_tokens),
46
- "LLaMa-3": llama3_tokens,
47
- "Mistral": mistral_tokens,
48
- "GPT-2/GPT-J": gpt2_tokens,
49
- "GPT-NeoX": gpt_neox_tokens,
50
- "Falcon": falcon_tokens,
51
- "Phi-1/Phi-2": phi2_tokens,
52
- "T5": t5_tokens,
53
- "Gemma": gemma_tokens,
54
- "Command-R": command_r_tokens,
55
- "Qwen/Qwen1.5": qwen_tokens,
56
- "CodeQwen": codeqwen_tokens,
57
- }
58
-
59
- results2 = {
60
- "LLaMa-1/LLaMa-2": formatarr(llama_tokens),
61
  "LLaMa-3": llama3_tokens,
62
  "Mistral": mistral_tokens,
63
  "GPT-2/GPT-J": gpt2_tokens,
@@ -71,16 +34,10 @@ def tokenize(input_text):
71
  "CodeQwen": codeqwen_tokens,
72
  }
73
 
74
- # Sort the results in descending order based on token length
75
- sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
76
-
77
- lens = "\n".join([f"{model}: {tokens}" for model, tokens in sorted_results])
78
- toks = ""
79
-
80
- for model, tokens in results2.items():
81
- toks += f"\n{model}: {tokens}"
82
-
83
- return lens + "\n" + toks
84
 
85
 
86
  if __name__ == "__main__":
 
6
 
7
  def tokenize(input_text):
8
  llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
9
+ llama3_tokens = llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
10
+ mistral_tokens = mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
+ gpt2_tokens = gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
12
+ gpt_neox_tokens = gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
13
+ falcon_tokens = falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
14
+ phi2_tokens = phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
15
+ t5_tokens = t5_tokenizer(input_text, add_special_tokens=True)["input_ids"]
16
+ gemma_tokens = gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"]
17
+ command_r_tokens = command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"]
18
+ qwen_tokens = qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"]
19
+ codeqwen_tokens = codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"]
20
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  results = {
23
+ "LLaMa-1/LLaMa-2": llama_tokens,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "LLaMa-3": llama3_tokens,
25
  "Mistral": mistral_tokens,
26
  "GPT-2/GPT-J": gpt2_tokens,
 
34
  "CodeQwen": codeqwen_tokens,
35
  }
36
 
37
+ toks = ""
38
+ for model, tokens in results.items():
39
+ toks += f"\n{model}: {len(tokens)} = {formatarr(tokens)}"
40
+ return toks
 
 
 
 
 
 
41
 
42
 
43
  if __name__ == "__main__":