dhruv-anand-aintech commited on
Commit
b33ec72
β€’
1 Parent(s): b1d30e1
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import spaces
3
  import torch
4
  import vdf_io
 
 
5
 
6
  zero = torch.Tensor([0]).cuda()
7
  print(zero.device) # <-- 'cpu' πŸ€”
@@ -15,24 +17,28 @@ def greet(n):
15
  return f"Hello {zero + n} Tensor"
16
 
17
 
18
- def reembed_dataset():
19
- import datasets
20
- # model
21
- # embeddings = model.embed(ds)
22
- # new_embeddings = model.reembed(embeddings)
23
-
24
- # datasets.save_dataset(new_embeddings)
25
 
26
 
27
- def reembed_main():
28
- download_dataset()
29
- reembed_dataset()
 
 
30
 
31
 
32
- def download_dataset():
33
  import datasets
34
 
35
- # ds = datasets.load_dataset()
 
 
36
 
37
 
38
  demo = gr.Interface(
 
2
  import spaces
3
  import torch
4
  import vdf_io
5
+ from sentence_transformers import SentenceTransformer
6
+ from rich import print as rprint
7
 
8
  zero = torch.Tensor([0]).cuda()
9
  print(zero.device) # <-- 'cpu' πŸ€”
 
17
  return f"Hello {zero + n} Tensor"
18
 
19
 
20
+ @spaces.GPU
21
+ def reembed_dataset(ds, model):
22
+ model = SentenceTransformer(model, device=zero.device)
23
+ rprint(model)
24
+ rprint(model.encode("Hello, World!"))
25
+ ds.map(lambda x: model.encode(x["text"]))
26
+ rprint(ds[0])
27
 
28
 
29
+ def reembed_main(dataset_name, embedding_model, output_username):
30
+ print(f"{dataset_name=}, {embedding_model=}, {output_username=}")
31
+ ds = download_dataset(dataset_name)
32
+ reembed_dataset(ds, model=embedding_model)
33
+ return "Dataset re-embedded successfully"
34
 
35
 
36
+ def download_dataset(dataset_name):
37
  import datasets
38
 
39
+ ds = datasets.load_dataset(dataset_name)
40
+ print(len(ds))
41
+ return ds
42
 
43
 
44
  demo = gr.Interface(