Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,19 @@ from datasets import load_dataset
|
|
7 |
openai.api_key="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"
|
8 |
from openai.embeddings_utils import get_embedding
|
9 |
from openai.embeddings_utils import cosine_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
Bio_embeddings = load_dataset('vjain/biology_AP_embeddings')
|
12 |
|
13 |
df = pd.DataFrame(Bio_embeddings['train'])
|
@@ -17,8 +29,8 @@ df = pd.DataFrame(Bio_embeddings['train'])
|
|
17 |
def reply(input):
|
18 |
|
19 |
input = input
|
20 |
-
input_vector = get_embedding(input,
|
21 |
-
df["similiarities"]=df["embedding"].apply(lambda x: cosine_similarity(x,input_vector))
|
22 |
data = df.sort_values("similiarities", ascending=False).head(20)
|
23 |
data.to_csv("sorted.csv")
|
24 |
context = []
|
|
|
7 |
openai.api_key="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"
|
8 |
from openai.embeddings_utils import get_embedding
|
9 |
from openai.embeddings_utils import cosine_similarity
|
10 |
+
import requests
|
11 |
+
model_id = "sentence-transformers/all-MiniLM-L6-v2"
|
12 |
+
import json
|
13 |
+
hf_token = "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"
|
14 |
+
import re
|
15 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
16 |
|
17 |
+
def generate_embeddings(texts, model_id, hf_token):
|
18 |
+
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
|
19 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
20 |
+
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}})
|
21 |
+
embeddings = response.json()
|
22 |
+
return embeddings
|
23 |
Bio_embeddings = load_dataset('vjain/biology_AP_embeddings')
|
24 |
|
25 |
df = pd.DataFrame(Bio_embeddings['train'])
|
|
|
29 |
def reply(input):
|
30 |
|
31 |
input = input
|
32 |
+
input_vector = get_embedding(input, mdoel_id,hf_token)
|
33 |
+
df["similiarities"]=df["embedding"].apply(lambda x: cosine_similarity([x],[input_vector])[0][0])
|
34 |
data = df.sort_values("similiarities", ascending=False).head(20)
|
35 |
data.to_csv("sorted.csv")
|
36 |
context = []
|