chore: fixing secrets
Browse files
app.py
CHANGED
@@ -7,18 +7,16 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
7 |
import os
|
8 |
|
9 |
os.environ["HF_TOKEN"] = os.getenv("auth")
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
@spaces.GPU
|
12 |
def process_query(query):
|
13 |
-
dataset = load_dataset("ariG23498/pis-blogs-chunked")
|
14 |
-
embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2")
|
15 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
16 |
-
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.bfloat16)
|
17 |
-
|
18 |
-
embedding_model = embedding_model.to("cuda")
|
19 |
text_embeddings = embedding_model.encode(dataset["train"]["text"])
|
20 |
-
|
21 |
query_embedding = embedding_model.encode(query)
|
|
|
22 |
similarity_scores = embedding_model.similarity(query_embedding, text_embeddings)
|
23 |
top_indices = (-similarity_scores).argsort()[0][:5]
|
24 |
|
@@ -33,7 +31,6 @@ def process_query(query):
|
|
33 |
|
34 |
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
35 |
len_text = len(input_text)
|
36 |
-
model = model.to("cuda")
|
37 |
with torch.inference_mode():
|
38 |
generated_outputs = model.generate(**input_ids, max_new_tokens=1000, do_sample=False)
|
39 |
generated_outputs = tokenizer.batch_decode(generated_outputs, skip_special_tokens=True)
|
|
|
7 |
import os
|
8 |
|
9 |
os.environ["HF_TOKEN"] = os.getenv("auth")
|
10 |
+
dataset = load_dataset("ariG23498/pis-blogs-chunked")
|
11 |
+
embedding_model = SentenceTransformer(model_name_or_path="all-mpnet-base-v2", device="cuda")
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
|
13 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", torch_dtype=torch.bfloat16, device_map="auto")
|
14 |
|
15 |
+
@spaces.GPU(duration=500)
|
16 |
def process_query(query):
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
text_embeddings = embedding_model.encode(dataset["train"]["text"])
|
|
|
18 |
query_embedding = embedding_model.encode(query)
|
19 |
+
|
20 |
similarity_scores = embedding_model.similarity(query_embedding, text_embeddings)
|
21 |
top_indices = (-similarity_scores).argsort()[0][:5]
|
22 |
|
|
|
31 |
|
32 |
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
|
33 |
len_text = len(input_text)
|
|
|
34 |
with torch.inference_mode():
|
35 |
generated_outputs = model.generate(**input_ids, max_new_tokens=1000, do_sample=False)
|
36 |
generated_outputs = tokenizer.batch_decode(generated_outputs, skip_special_tokens=True)
|