ariG23498 HF Staff commited on
Commit
33a95fe
·
1 Parent(s): 33ffdb4
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -5,36 +5,32 @@ from datasets import load_dataset
5
  from sentence_transformers import SentenceTransformer
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
 
 
 
 
 
8
  @spaces.GPU
9
  def process_query(query):
10
- dataset = load_dataset("ariG23498/pis-blogs-chunked")
11
- embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2", device="cuda:0")
12
  text_embeddings = embedding_model.encode(dataset["train"]["text"])
13
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
14
- model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.bfloat16, device_map="auto")
15
 
16
- print(f"Query: {query}")
17
-
18
  query_embedding = embedding_model.encode(query)
19
-
20
  similarity_scores = embedding_model.similarity(query_embedding, text_embeddings)
21
  top_indices = (-similarity_scores).argsort()[0][:5]
22
 
23
  context = dataset["train"]["text"][top_indices[0]]
24
  url = dataset["train"]["url"][top_indices[0]]
25
 
26
- print(f"Searching URL: {url}")
27
- print(f"Found context: {context}")
28
-
29
  input_text = (
30
  f"Based on the context provided, '{context}', how would"
31
  f"you address the user's query regarding '{query}'? Please"
32
  " provide a detailed and contextually relevant response."
33
  )
34
 
35
- input_ids = tokenizer(input_text, return_tensors="pt").to("cuda:0")
36
  len_text = len(input_text)
37
-
38
  with torch.inference_mode():
39
  generated_outputs = model.generate(**input_ids, max_new_tokens=1000, do_sample=False)
40
  generated_outputs = tokenizer.batch_decode(generated_outputs, skip_special_tokens=True)
 
5
  from sentence_transformers import SentenceTransformer
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
 
8
+ dataset = load_dataset("ariG23498/pis-blogs-chunked")
9
+ embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2")
10
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
11
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.bfloat16)
12
+
13
  @spaces.GPU
14
  def process_query(query):
15
+ embedding_model = embedding_model.to("cuda")
 
16
  text_embeddings = embedding_model.encode(dataset["train"]["text"])
 
 
17
 
 
 
18
  query_embedding = embedding_model.encode(query)
 
19
  similarity_scores = embedding_model.similarity(query_embedding, text_embeddings)
20
  top_indices = (-similarity_scores).argsort()[0][:5]
21
 
22
  context = dataset["train"]["text"][top_indices[0]]
23
  url = dataset["train"]["url"][top_indices[0]]
24
 
 
 
 
25
  input_text = (
26
  f"Based on the context provided, '{context}', how would"
27
  f"you address the user's query regarding '{query}'? Please"
28
  " provide a detailed and contextually relevant response."
29
  )
30
 
31
+ input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
32
  len_text = len(input_text)
33
+ model = model.to("cuda")
34
  with torch.inference_mode():
35
  generated_outputs = model.generate(**input_ids, max_new_tokens=1000, do_sample=False)
36
  generated_outputs = tokenizer.batch_decode(generated_outputs, skip_special_tokens=True)