Spaces:
Sleeping
Sleeping
dhruv-anand-aintech
commited on
Commit
β’
b33ec72
1
Parent(s):
b1d30e1
try embed
Browse files
app.py
CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
|
|
2 |
import spaces
|
3 |
import torch
|
4 |
import vdf_io
|
|
|
|
|
5 |
|
6 |
zero = torch.Tensor([0]).cuda()
|
7 |
print(zero.device) # <-- 'cpu' π€
|
@@ -15,24 +17,28 @@ def greet(n):
|
|
15 |
return f"Hello {zero + n} Tensor"
|
16 |
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
|
26 |
|
27 |
-
def reembed_main():
|
28 |
-
|
29 |
-
|
|
|
|
|
30 |
|
31 |
|
32 |
-
def download_dataset():
|
33 |
import datasets
|
34 |
|
35 |
-
|
|
|
|
|
36 |
|
37 |
|
38 |
demo = gr.Interface(
|
|
|
2 |
import spaces
|
3 |
import torch
|
4 |
import vdf_io
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from rich import print as rprint
|
7 |
|
8 |
zero = torch.Tensor([0]).cuda()
|
9 |
print(zero.device) # <-- 'cpu' π€
|
|
|
17 |
return f"Hello {zero + n} Tensor"
|
18 |
|
19 |
|
20 |
+
@spaces.GPU
|
21 |
+
def reembed_dataset(ds, model):
|
22 |
+
model = SentenceTransformer(model, device=zero.device)
|
23 |
+
rprint(model)
|
24 |
+
rprint(model.encode("Hello, World!"))
|
25 |
+
ds.map(lambda x: model.encode(x["text"]))
|
26 |
+
rprint(ds[0])
|
27 |
|
28 |
|
29 |
+
def reembed_main(dataset_name, embedding_model, output_username):
|
30 |
+
print(f"{dataset_name=}, {embedding_model=}, {output_username=}")
|
31 |
+
ds = download_dataset(dataset_name)
|
32 |
+
reembed_dataset(ds, model=embedding_model)
|
33 |
+
return "Dataset re-embedded successfully"
|
34 |
|
35 |
|
36 |
+
def download_dataset(dataset_name):
|
37 |
import datasets
|
38 |
|
39 |
+
ds = datasets.load_dataset(dataset_name)
|
40 |
+
print(len(ds))
|
41 |
+
return ds
|
42 |
|
43 |
|
44 |
demo = gr.Interface(
|