Spaces:

davidberenstein1957
/

vectorsearch-hub-datasets

Running

davidberenstein1957 HF staff commited on 22 days ago

Commit

bcbb85b

•

1 Parent(s): 71c2318

fix: limit token lengths

Files changed (1) hide show

app.py CHANGED Viewed

@@ -56,7 +56,7 @@ def vectorize_dataset(split: str, column: str):
     global df
     global ds
     df = ds[split].to_polars()
-    embeddings = model.encode(df[column], max_length=512 * 4)
     df = df.with_columns(pl.Series(embeddings).alias("embeddings"))
@@ -153,4 +153,5 @@ with gr.Blocks() as demo:
     )
     btn_run.click(fn=run_query, inputs=query_input, outputs=results_output)
 demo.launch()

     global df
     global ds
     df = ds[split].to_polars()
+    embeddings = model.encode(df[column], max_length=512)
     df = df.with_columns(pl.Series(embeddings).alias("embeddings"))
     )
     btn_run.click(fn=run_query, inputs=query_input, outputs=results_output)
 demo.launch()