muryshev commited on
Commit
d941729
·
1 Parent(s): 8361842
.dockerignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ *.db
6
+ *.sqlite
7
+ *.log
8
+ .DS_Store
9
+ .env
10
+ venv
11
+ *.bat
12
+ desktop.ini
13
+ *.git
14
+ .cache
15
+ .local
16
+ .nv
17
+ *.bash_history
18
+ *.zip
19
+ *.yaml
.gitattributes CHANGED
@@ -8,6 +8,8 @@
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
 
11
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.index filter=lfs diff=lfs merge=lfs -text
12
+ *.json filter=lfs diff=lfs merge=lfs -text
13
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
14
  *.model filter=lfs diff=lfs merge=lfs -text
15
  *.msgpack filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ *.bat
2
+ __pycache__
3
+ .cache
4
+ .local
5
+ .nv
6
+ *.bash_history
7
+ *.zip
Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim-bullseye
3
+
4
+ # Set Python to use unbuffered mode
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ ENV PATH="/var/www/.local/bin:${PATH}"
8
+
9
+ # Create a non-root user
10
+ RUN useradd -m -u 1000 -U -s /bin/bash myuser
11
+
12
+ # Install dependencies
13
+ RUN apt-get update && \
14
+ apt-get install -y --no-install-recommends python3-pip python3-dev && \
15
+ rm -rf /var/lib/apt/lists/*
16
+
17
+ # Set the working directory in the container
18
+ RUN mkdir /var/www
19
+ ENV HOME=/var/www
20
+ WORKDIR /var/www
21
+
22
+ # Change ownership of /var/www to the non-root user
23
+ RUN chown -R myuser:myuser /var/www
24
+
25
+ # Switch to the non-root user
26
+ USER myuser
27
+
28
+ # Copy the current directory contents into the container at /var/www
29
+ COPY . /var/www
30
+
31
+ # Install Python dependencies
32
+ RUN pip install --user -r requirements.txt
33
+
34
+ # Expose the port
35
+ EXPOSE 7860
36
+
37
+ RUN mkdir /var/www/logs
38
+
39
+ # Set environment variables
40
+ ENV MODEL_PATH="BAAI/bge-m3" \
41
+ DEVICE="cpu"
42
+
43
+ # Run fastapi_app.py when the container launches
44
+ CMD python3 -m uvicorn search_api:app --host=0.0.0.0 --port=7860
faiss_indexes/.gitkeep ADDED
File without changes
faiss_indexes/faiss__bge_5000.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c603ce88279357224673c7f87067c794bbaa8a9b10c1b75f42bd6971979579cf
3
+ size 11771949
faiss_indexes/filtered_db_data__bge_5000.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49943f09e2392bb7450171eeb8cbed925e06b1e0d39c74415bc3ea8fd01c8b45
3
+ size 4735630
faiss_indexes/index_keys__bge_5000.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb0846481da81b8eac5a52134d5292041c72cbd0e0ab9712ce016c375554c34
3
+ size 523565
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy==1.23.5
2
+ faiss-cpu==1.7.3
3
+ torch==2.0.1
4
+ transformers==4.30.2
5
+ fastapi==0.95.2
6
+ pydantic==1.10.7
search_api.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import numpy as np
4
+ import faiss
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from torch.cuda.amp import autocast
8
+ from transformers import AutoTokenizer, AutoModel
9
+ from fastapi import FastAPI, HTTPException
10
+ from pydantic import BaseModel
11
+
12
+ class FaissSearch:
13
+ def __init__(self, model_path, index_path, index_keys_path, filtered_db_path, device='cuda:0'):
14
+ self.device = device
15
+ self.model_path = model_path
16
+ self.index = faiss.read_index(index_path)
17
+ self.max_len = 512
18
+
19
+ with open(index_keys_path, 'r', encoding='utf-8') as f:
20
+ self.index_keys = json.load(f)
21
+
22
+ with open(filtered_db_path, 'r', encoding='utf-8') as f:
23
+ self.filtered_db_data = json.load(f)
24
+
25
+ self.tokenizer = AutoTokenizer.from_pretrained(model_path)
26
+ self.model = None
27
+
28
+ def _load_model(self):
29
+ if self.model is None:
30
+ self.model = AutoModel.from_pretrained(self.model_path).to(self.device)
31
+
32
+ def _query_tokenization(self, text):
33
+ #text = "query: " + text # if using e5 model
34
+ text = text
35
+ tokens = self.tokenizer(
36
+ text,
37
+ return_tensors="pt",
38
+ padding='max_length',
39
+ truncation=True,
40
+ max_length=self.max_len
41
+ )
42
+ return tokens
43
+
44
+ def _query_embed_extraction(self, tokens, do_normalization=True):
45
+ self._load_model()
46
+ self.model.eval()
47
+ with torch.no_grad():
48
+ with autocast():
49
+ inputs = {k: v.to(self.device) for k, v in tokens.items()}
50
+ outputs = self.model(**inputs)
51
+ embedding = outputs.last_hidden_state[:, 0].cpu()
52
+
53
+ if do_normalization:
54
+ embedding = F.normalize(embedding, dim=-1)
55
+ return embedding.numpy()
56
+
57
+ def _search_results_filtering(self, preds, dists):
58
+ sorted_values = [(ref, score) for ref, score in zip(preds, dists)]
59
+ sorted_values = sorted(sorted_values, key=lambda x: x[1], reverse=True)
60
+ sorted_preds = [x[0] for x in sorted_values]
61
+ sorted_scores = [x[1] for x in sorted_values]
62
+ return sorted_preds, sorted_scores
63
+
64
+ def search(self, query, top=20):
65
+ query_tokens = self._query_tokenization(query)
66
+ query_embeds = self._query_embed_extraction(query_tokens, do_normalization=True)
67
+ distances, indices = self.index.search(query_embeds, len(self.filtered_db_data))
68
+
69
+ preds = [self.index_keys[str(x)] for x in indices[0]]
70
+ preds, scores = self._search_results_filtering(preds, distances[0])
71
+ docs = [self.filtered_db_data[ref] for ref in preds]
72
+
73
+ torch.cuda.empty_cache()
74
+
75
+ return preds[:top], docs[:top]
76
+
77
+
78
+ STEP = 5000
79
+ model_path = os.environ.get("MODEL_PATH", "bge/")
80
+ index_path = f"faiss_indexes/faiss__bge_{STEP}.index"
81
+ index_keys_path = f"faiss_indexes/index_keys__bge_{STEP}.json"
82
+ filtered_db_path = f"faiss_indexes/filtered_db_data__bge_{STEP}.json"
83
+
84
+ searcher = FaissSearch(model_path, index_path, index_keys_path, filtered_db_path, os.environ.get("DEVICE", "cuda:0"))
85
+
86
+ app = FastAPI()
87
+
88
+ class SearchRequest(BaseModel):
89
+ query: str
90
+ top: int = 10
91
+
92
+ class SearchResponse(BaseModel):
93
+ predictions: list
94
+ documents: list
95
+
96
+ @app.post("/search", response_model=SearchResponse)
97
+ async def search_endpoint(request: SearchRequest):
98
+ try:
99
+ preds, docs = searcher.search(request.query, top=request.top)
100
+ return SearchResponse(predictions=preds, documents=docs)
101
+ except Exception as e:
102
+ raise HTTPException(status_code=500, detail=str(e))