Spaces:

muryshev
/

chatbot-demo-search

Sleeping

App Files Files Community

muryshev commited on 25 days ago

Commit

d941729

1 Parent(s): 8361842

init

Browse files

Files changed (10) hide show

.dockerignore +19 -0
.gitattributes +2 -0
.gitignore +7 -0
Dockerfile +44 -0
faiss_indexes/.gitkeep +0 -0
faiss_indexes/faiss__bge_5000.index +3 -0
faiss_indexes/filtered_db_data__bge_5000.json +3 -0
faiss_indexes/index_keys__bge_5000.json +3 -0
requirements.txt +6 -0
search_api.py +102 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,19 @@

+__pycache__
+*.pyc
+*.pyo
+*.pyd
+*.db
+*.sqlite
+*.log
+.DS_Store
+.env
+venv
+*.bat
+desktop.ini
+*.git
+.cache
+.local
+.nv
+*.bash_history
+*.zip
+*.yaml

.gitattributes CHANGED Viewed

@@ -8,6 +8,8 @@
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text

 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.index filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+*.bat
+__pycache__
+.cache
+.local
+.nv
+*.bash_history
+*.zip

Dockerfile ADDED Viewed

	@@ -0,0 +1,44 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim-bullseye
+# Set Python to use unbuffered mode
+ENV PYTHONUNBUFFERED=1
+ENV PATH="/var/www/.local/bin:${PATH}"
+# Create a non-root user
+RUN useradd -m -u 1000 -U -s /bin/bash myuser
+# Install dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends python3-pip python3-dev && \
+    rm -rf /var/lib/apt/lists/*
+# Set the working directory in the container
+RUN mkdir /var/www
+ENV HOME=/var/www
+WORKDIR /var/www
+# Change ownership of /var/www to the non-root user
+RUN chown -R myuser:myuser /var/www
+# Switch to the non-root user
+USER myuser
+# Copy the current directory contents into the container at /var/www
+COPY . /var/www
+# Install Python dependencies
+RUN pip install --user -r requirements.txt
+# Expose the port
+EXPOSE 7860
+RUN mkdir /var/www/logs
+# Set environment variables
+ENV MODEL_PATH="BAAI/bge-m3" \
+    DEVICE="cpu"
+# Run fastapi_app.py when the container launches
+CMD python3 -m uvicorn search_api:app --host=0.0.0.0 --port=7860

faiss_indexes/.gitkeep ADDED Viewed

File without changes

faiss_indexes/faiss__bge_5000.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c603ce88279357224673c7f87067c794bbaa8a9b10c1b75f42bd6971979579cf
+size 11771949

faiss_indexes/filtered_db_data__bge_5000.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49943f09e2392bb7450171eeb8cbed925e06b1e0d39c74415bc3ea8fd01c8b45
+size 4735630

faiss_indexes/index_keys__bge_5000.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afb0846481da81b8eac5a52134d5292041c72cbd0e0ab9712ce016c375554c34
+size 523565

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+numpy==1.23.5
+faiss-cpu==1.7.3
+torch==2.0.1
+transformers==4.30.2
+fastapi==0.95.2
+pydantic==1.10.7

search_api.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import json
+import numpy as np
+import faiss
+import torch
+import torch.nn.functional as F
+from torch.cuda.amp import autocast
+from transformers import AutoTokenizer, AutoModel
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+class FaissSearch:
+    def __init__(self, model_path, index_path, index_keys_path, filtered_db_path, device='cuda:0'):
+        self.device = device
+        self.model_path = model_path
+        self.index = faiss.read_index(index_path)
+        self.max_len = 512
+        with open(index_keys_path, 'r', encoding='utf-8') as f:
+            self.index_keys = json.load(f)
+        with open(filtered_db_path, 'r', encoding='utf-8') as f:
+            self.filtered_db_data = json.load(f)
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.model = None
+    def _load_model(self):
+        if self.model is None:
+            self.model = AutoModel.from_pretrained(self.model_path).to(self.device)
+    def _query_tokenization(self, text):
+        #text = "query: " + text # if using e5 model
+        text = text
+        tokens = self.tokenizer(
+            text,
+            return_tensors="pt",
+            padding='max_length',
+            truncation=True,
+            max_length=self.max_len
+        )
+        return tokens
+    def _query_embed_extraction(self, tokens, do_normalization=True):
+        self._load_model()
+        self.model.eval()
+        with torch.no_grad():
+            with autocast():
+                inputs = {k: v.to(self.device) for k, v in tokens.items()}
+                outputs = self.model(**inputs)
+                embedding = outputs.last_hidden_state[:, 0].cpu()
+                if do_normalization:
+                    embedding = F.normalize(embedding, dim=-1)
+        return embedding.numpy()
+    def _search_results_filtering(self, preds, dists):
+        sorted_values = [(ref, score) for ref, score in zip(preds, dists)]
+        sorted_values = sorted(sorted_values, key=lambda x: x[1], reverse=True)
+        sorted_preds = [x[0] for x in sorted_values]
+        sorted_scores = [x[1] for x in sorted_values]
+        return sorted_preds, sorted_scores
+    def search(self, query, top=20):
+        query_tokens = self._query_tokenization(query)
+        query_embeds = self._query_embed_extraction(query_tokens, do_normalization=True)
+        distances, indices = self.index.search(query_embeds, len(self.filtered_db_data))
+        preds = [self.index_keys[str(x)] for x in indices[0]]
+        preds, scores = self._search_results_filtering(preds, distances[0])
+        docs = [self.filtered_db_data[ref] for ref in preds]
+        torch.cuda.empty_cache()
+        return preds[:top], docs[:top]
+STEP = 5000
+model_path = os.environ.get("MODEL_PATH", "bge/")
+index_path = f"faiss_indexes/faiss__bge_{STEP}.index"
+index_keys_path = f"faiss_indexes/index_keys__bge_{STEP}.json"
+filtered_db_path = f"faiss_indexes/filtered_db_data__bge_{STEP}.json"
+searcher = FaissSearch(model_path, index_path, index_keys_path, filtered_db_path, os.environ.get("DEVICE", "cuda:0"))
+app = FastAPI()
+class SearchRequest(BaseModel):
+    query: str
+    top: int = 10
+class SearchResponse(BaseModel):
+    predictions: list
+    documents: list
+@app.post("/search", response_model=SearchResponse)
+async def search_endpoint(request: SearchRequest):
+    try:
+        preds, docs = searcher.search(request.query, top=request.top)
+        return SearchResponse(predictions=preds, documents=docs)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))