Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,6 @@ desc2opt = {v: k for k, v in opt2desc.items()}
|
|
39 |
optimization_options = list(opt2desc.values())
|
40 |
|
41 |
|
42 |
-
|
43 |
def download_and_tokenize(
|
44 |
ds_name,
|
45 |
ds_config,
|
@@ -51,7 +50,6 @@ def download_and_tokenize(
|
|
51 |
num2embed,
|
52 |
progress=gr.Progress(track_tqdm=True),
|
53 |
):
|
54 |
-
|
55 |
num_samples = download_dataset(ds_name, ds_config, ds_split, num2skip, num2embed)
|
56 |
|
57 |
opt_level = desc2opt[opt_desc]
|
@@ -69,8 +67,6 @@ def download_and_tokenize(
|
|
69 |
)
|
70 |
|
71 |
return f"Downloaded! It has {len(num_samples)} docs."
|
72 |
-
|
73 |
-
|
74 |
|
75 |
|
76 |
def embed(
|
@@ -85,7 +81,6 @@ def embed(
|
|
85 |
num2embed,
|
86 |
progress=gr.Progress(track_tqdm=True),
|
87 |
):
|
88 |
-
|
89 |
ds = load_tokenized_dataset(ds_name, ds_config, ds_split)
|
90 |
|
91 |
opt_level = desc2opt[opt_desc]
|
@@ -154,7 +149,9 @@ with gr.Blocks(title="Bulk embeddings") as demo:
|
|
154 |
value="wikipedia",
|
155 |
)
|
156 |
ds_config = gr.Textbox(
|
157 |
-
lines=1,
|
|
|
|
|
158 |
)
|
159 |
|
160 |
column_name = gr.Textbox(lines=1, label="Enter column to embed", value="text")
|
@@ -208,18 +205,20 @@ with gr.Blocks(title="Bulk embeddings") as demo:
|
|
208 |
)
|
209 |
|
210 |
with gr.Row():
|
211 |
-
|
212 |
download_btn = gr.Button(value="Download and tokenize dataset!")
|
213 |
embed_btn = gr.Button(value="Embed texts!")
|
214 |
|
215 |
last = gr.Textbox(value="")
|
216 |
|
217 |
download_btn.click(
|
218 |
-
fn=
|
219 |
inputs=[
|
220 |
ds_name,
|
221 |
ds_config,
|
|
|
222 |
ds_split,
|
|
|
|
|
223 |
num2skip,
|
224 |
num2embed,
|
225 |
],
|
@@ -244,4 +243,4 @@ with gr.Blocks(title="Bulk embeddings") as demo:
|
|
244 |
|
245 |
|
246 |
if __name__ == "__main__":
|
247 |
-
demo.queue(concurrency_count=20).launch(show_error=True, debug=True)
|
|
|
39 |
optimization_options = list(opt2desc.values())
|
40 |
|
41 |
|
|
|
42 |
def download_and_tokenize(
|
43 |
ds_name,
|
44 |
ds_config,
|
|
|
50 |
num2embed,
|
51 |
progress=gr.Progress(track_tqdm=True),
|
52 |
):
|
|
|
53 |
num_samples = download_dataset(ds_name, ds_config, ds_split, num2skip, num2embed)
|
54 |
|
55 |
opt_level = desc2opt[opt_desc]
|
|
|
67 |
)
|
68 |
|
69 |
return f"Downloaded! It has {len(num_samples)} docs."
|
|
|
|
|
70 |
|
71 |
|
72 |
def embed(
|
|
|
81 |
num2embed,
|
82 |
progress=gr.Progress(track_tqdm=True),
|
83 |
):
|
|
|
84 |
ds = load_tokenized_dataset(ds_name, ds_config, ds_split)
|
85 |
|
86 |
opt_level = desc2opt[opt_desc]
|
|
|
149 |
value="wikipedia",
|
150 |
)
|
151 |
ds_config = gr.Textbox(
|
152 |
+
lines=1,
|
153 |
+
label="Dataset config (leave blank to use default)",
|
154 |
+
value="20220301.en",
|
155 |
)
|
156 |
|
157 |
column_name = gr.Textbox(lines=1, label="Enter column to embed", value="text")
|
|
|
205 |
)
|
206 |
|
207 |
with gr.Row():
|
|
|
208 |
download_btn = gr.Button(value="Download and tokenize dataset!")
|
209 |
embed_btn = gr.Button(value="Embed texts!")
|
210 |
|
211 |
last = gr.Textbox(value="")
|
212 |
|
213 |
download_btn.click(
|
214 |
+
fn=download_and_tokenize,
|
215 |
inputs=[
|
216 |
ds_name,
|
217 |
ds_config,
|
218 |
+
column_name,
|
219 |
ds_split,
|
220 |
+
model_choice,
|
221 |
+
opt_desc,
|
222 |
num2skip,
|
223 |
num2embed,
|
224 |
],
|
|
|
243 |
|
244 |
|
245 |
if __name__ == "__main__":
|
246 |
+
demo.queue(concurrency_count=20).launch(show_error=True, debug=True)
|