909ahmed commited on
Commit
1f29464
1 Parent(s): 6c1f2be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -90,12 +90,16 @@ title = "Ghalib doing tiktok"
90
  description = "A simple Gradio interface to infer urdu tokenizer"
91
 
92
  tokenizer = Tokenizer()
 
93
  with open('merges.pkl', 'rb') as files:
94
  tokenizer.vocab = pickle.load(files)
95
  with open('vocab.pkl', 'rb') as files:
96
  tokenizer.merges = pickle.load(files)
97
 
98
  def inference(text):
 
 
 
99
  return tokenizer.encode(text)
100
 
101
  iface = gr.Interface(
 
90
  description = "A simple Gradio interface to infer urdu tokenizer"
91
 
92
  tokenizer = Tokenizer()
93
+ temp = Tokenizer()
94
  with open('merges.pkl', 'rb') as files:
95
  tokenizer.vocab = pickle.load(files)
96
  with open('vocab.pkl', 'rb') as files:
97
  tokenizer.merges = pickle.load(files)
98
 
99
  def inference(text):
100
+ print(len(tokenizer.merges))
101
+ print(len(tokenizer.vocab))
102
+ print(len(temp.encode(text)) / len(tokenizer.encode(text)))
103
  return tokenizer.encode(text)
104
 
105
  iface = gr.Interface(