nbroad HF staff commited on
Commit
ef6b0bc
1 Parent(s): d2a60ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -39,7 +39,6 @@ desc2opt = {v: k for k, v in opt2desc.items()}
39
  optimization_options = list(opt2desc.values())
40
 
41
 
42
-
43
  def download_and_tokenize(
44
  ds_name,
45
  ds_config,
@@ -51,7 +50,6 @@ def download_and_tokenize(
51
  num2embed,
52
  progress=gr.Progress(track_tqdm=True),
53
  ):
54
-
55
  num_samples = download_dataset(ds_name, ds_config, ds_split, num2skip, num2embed)
56
 
57
  opt_level = desc2opt[opt_desc]
@@ -69,8 +67,6 @@ def download_and_tokenize(
69
  )
70
 
71
  return f"Downloaded! It has {len(num_samples)} docs."
72
-
73
-
74
 
75
 
76
  def embed(
@@ -85,7 +81,6 @@ def embed(
85
  num2embed,
86
  progress=gr.Progress(track_tqdm=True),
87
  ):
88
-
89
  ds = load_tokenized_dataset(ds_name, ds_config, ds_split)
90
 
91
  opt_level = desc2opt[opt_desc]
@@ -154,7 +149,9 @@ with gr.Blocks(title="Bulk embeddings") as demo:
154
  value="wikipedia",
155
  )
156
  ds_config = gr.Textbox(
157
- lines=1, label="Dataset config (leave blank to use default)", value="20220301.en"
 
 
158
  )
159
 
160
  column_name = gr.Textbox(lines=1, label="Enter column to embed", value="text")
@@ -208,18 +205,20 @@ with gr.Blocks(title="Bulk embeddings") as demo:
208
  )
209
 
210
  with gr.Row():
211
-
212
  download_btn = gr.Button(value="Download and tokenize dataset!")
213
  embed_btn = gr.Button(value="Embed texts!")
214
 
215
  last = gr.Textbox(value="")
216
 
217
  download_btn.click(
218
- fn=download,
219
  inputs=[
220
  ds_name,
221
  ds_config,
 
222
  ds_split,
 
 
223
  num2skip,
224
  num2embed,
225
  ],
@@ -244,4 +243,4 @@ with gr.Blocks(title="Bulk embeddings") as demo:
244
 
245
 
246
  if __name__ == "__main__":
247
- demo.queue(concurrency_count=20).launch(show_error=True, debug=True)
 
39
  optimization_options = list(opt2desc.values())
40
 
41
 
 
42
  def download_and_tokenize(
43
  ds_name,
44
  ds_config,
 
50
  num2embed,
51
  progress=gr.Progress(track_tqdm=True),
52
  ):
 
53
  num_samples = download_dataset(ds_name, ds_config, ds_split, num2skip, num2embed)
54
 
55
  opt_level = desc2opt[opt_desc]
 
67
  )
68
 
69
  return f"Downloaded! It has {len(num_samples)} docs."
 
 
70
 
71
 
72
  def embed(
 
81
  num2embed,
82
  progress=gr.Progress(track_tqdm=True),
83
  ):
 
84
  ds = load_tokenized_dataset(ds_name, ds_config, ds_split)
85
 
86
  opt_level = desc2opt[opt_desc]
 
149
  value="wikipedia",
150
  )
151
  ds_config = gr.Textbox(
152
+ lines=1,
153
+ label="Dataset config (leave blank to use default)",
154
+ value="20220301.en",
155
  )
156
 
157
  column_name = gr.Textbox(lines=1, label="Enter column to embed", value="text")
 
205
  )
206
 
207
  with gr.Row():
 
208
  download_btn = gr.Button(value="Download and tokenize dataset!")
209
  embed_btn = gr.Button(value="Embed texts!")
210
 
211
  last = gr.Textbox(value="")
212
 
213
  download_btn.click(
214
+ fn=download_and_tokenize,
215
  inputs=[
216
  ds_name,
217
  ds_config,
218
+ column_name,
219
  ds_split,
220
+ model_choice,
221
+ opt_desc,
222
  num2skip,
223
  num2embed,
224
  ],
 
243
 
244
 
245
  if __name__ == "__main__":
246
+ demo.queue(concurrency_count=20).launch(show_error=True, debug=True)