ariG23498 HF Staff commited on
Commit
ee678e0
·
1 Parent(s): ccff069

chore: fixing secrets

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -7,18 +7,16 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import os
8
 
9
  os.environ["HF_TOKEN"] = os.getenv("auth")
 
 
 
 
10
 
11
- @spaces.GPU
12
  def process_query(query):
13
- dataset = load_dataset("ariG23498/pis-blogs-chunked")
14
- embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2")
15
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
16
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.bfloat16)
17
-
18
- embedding_model = embedding_model.to("cuda")
19
  text_embeddings = embedding_model.encode(dataset["train"]["text"])
20
-
21
  query_embedding = embedding_model.encode(query)
 
22
  similarity_scores = embedding_model.similarity(query_embedding, text_embeddings)
23
  top_indices = (-similarity_scores).argsort()[0][:5]
24
 
@@ -33,7 +31,6 @@ def process_query(query):
33
 
34
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
35
  len_text = len(input_text)
36
- model = model.to("cuda")
37
  with torch.inference_mode():
38
  generated_outputs = model.generate(**input_ids, max_new_tokens=1000, do_sample=False)
39
  generated_outputs = tokenizer.batch_decode(generated_outputs, skip_special_tokens=True)
 
7
  import os
8
 
9
  os.environ["HF_TOKEN"] = os.getenv("auth")
10
+ dataset = load_dataset("ariG23498/pis-blogs-chunked")
11
+ embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2", device="cuda")
12
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
13
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.bfloat16, device_map="auto")
14
 
15
+ @spaces.GPU(duration=500)
16
  def process_query(query):
 
 
 
 
 
 
17
  text_embeddings = embedding_model.encode(dataset["train"]["text"])
 
18
  query_embedding = embedding_model.encode(query)
19
+
20
  similarity_scores = embedding_model.similarity(query_embedding, text_embeddings)
21
  top_indices = (-similarity_scores).argsort()[0][:5]
22
 
 
31
 
32
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
33
  len_text = len(input_text)
 
34
  with torch.inference_mode():
35
  generated_outputs = model.generate(**input_ids, max_new_tokens=1000, do_sample=False)
36
  generated_outputs = tokenizer.batch_decode(generated_outputs, skip_special_tokens=True)