espejelomar
commited on
Commit
•
ff4ec71
1
Parent(s):
a6e7feb
add backend code
Browse files- backend/__init__.py +0 -0
- backend/__pycache__/__init__.cpython-36.pyc +0 -0
- backend/__pycache__/__init__.cpython-38.pyc +0 -0
- backend/__pycache__/config.cpython-36.pyc +0 -0
- backend/__pycache__/config.cpython-38.pyc +0 -0
- backend/__pycache__/inference.cpython-36.pyc +0 -0
- backend/__pycache__/inference.cpython-38.pyc +0 -0
- backend/__pycache__/utils.cpython-36.pyc +0 -0
- backend/__pycache__/utils.cpython-38.pyc +0 -0
- backend/inference.py +25 -0
- backend/utils.py +28 -0
backend/__init__.py
ADDED
File without changes
|
backend/__pycache__/__init__.cpython-36.pyc
ADDED
Binary file (159 Bytes). View file
|
|
backend/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (181 Bytes). View file
|
|
backend/__pycache__/config.cpython-36.pyc
ADDED
Binary file (737 Bytes). View file
|
|
backend/__pycache__/config.cpython-38.pyc
ADDED
Binary file (767 Bytes). View file
|
|
backend/__pycache__/inference.cpython-36.pyc
ADDED
Binary file (2.2 kB). View file
|
|
backend/__pycache__/inference.cpython-38.pyc
ADDED
Binary file (887 Bytes). View file
|
|
backend/__pycache__/utils.cpython-36.pyc
ADDED
Binary file (1.54 kB). View file
|
|
backend/__pycache__/utils.cpython-38.pyc
ADDED
Binary file (1.91 kB). View file
|
|
backend/inference.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from backend.utils import load_model, load_embeddings, load_texts
|
3 |
+
|
4 |
+
# Search
|
5 |
+
def query_search(query: str, n_answers: int, model_name: str):
|
6 |
+
model = load_model(model_name)
|
7 |
+
|
8 |
+
# Creating embeddings
|
9 |
+
# query_emb = model.encode(query, convert_to_tensor=True)[None, :]
|
10 |
+
query_emb = model.encode(query, convert_to_tensor=True)
|
11 |
+
|
12 |
+
print("loading embedding")
|
13 |
+
corpus_emb = load_embeddings()
|
14 |
+
corpus_texts = load_texts()
|
15 |
+
|
16 |
+
# Getting hits
|
17 |
+
hits = torch.nn.functional.cosine_similarity(
|
18 |
+
query_emb[None, :], corpus_emb, dim=1, eps=1e-8
|
19 |
+
)
|
20 |
+
|
21 |
+
corpus_texts["Similarity"] = hits.tolist()
|
22 |
+
|
23 |
+
return corpus_texts.sort_values(by="Similarity", ascending=False).head(n_answers)[
|
24 |
+
["Description", "Code"]
|
25 |
+
]
|
backend/utils.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sentence_transformers import SentenceTransformer
|
3 |
+
import streamlit as st
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
@st.cache(allow_output_mutation=True)
|
8 |
+
def load_model(model_name):
|
9 |
+
# Lazy downloading
|
10 |
+
model = SentenceTransformer(model_name)
|
11 |
+
return model
|
12 |
+
|
13 |
+
|
14 |
+
@st.cache(allow_output_mutation=True)
|
15 |
+
def load_embeddings():
|
16 |
+
# embedding pre-generated
|
17 |
+
corpus_emb = torch.load(
|
18 |
+
"./embeddings/descriptions_emb_100000_examples.pt",
|
19 |
+
map_location=torch.device("cpu"),
|
20 |
+
)
|
21 |
+
return corpus_emb
|
22 |
+
|
23 |
+
|
24 |
+
@st.cache(allow_output_mutation=True)
|
25 |
+
def load_texts():
|
26 |
+
# texts database pre-generated
|
27 |
+
corpus_texts = pd.read_csv("./data/codesearchnet_100000_examples.csv")
|
28 |
+
return corpus_texts
|