Spaces:

traversaal-internal
/

pro-search-api

Running

App Files Files Community

vhr1007 commited on Aug 18

Commit

500c1ba

•

1 Parent(s): b55082a

init

Browse files

Files changed (10) hide show

.gitignore +2 -0
Dockerfile +16 -0
app.py +109 -0
config.py +7 -0
requiements.txt +8 -0
services/__init__py +0 -0
services/openai_service.py +48 -0
services/qdrant_searcher.py +44 -0
utils/__init__.py +0 -0
utils/auth.py +44 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Base image
+FROM python:3.8-slim
+# Set working directory
+WORKDIR /app
+# Copy requirements.txt and install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY . .
+# Command to run the FastAPI application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from huggingface_hub import login
+from fastapi import FastAPI, Depends, HTTPException
+import logging
+from pydantic import BaseModel
+from sentence_transformers import SentenceTransformer
+from services.qdrant_searcher import QdrantSearcher
+from services.openai_service import generate_rag_response
+from utils.auth import token_required
+from dotenv import load_dotenv
+import os
+load_dotenv()  # Load environment variables from .env file
+app = FastAPI()
+os.environ["HF_HOME"] = "/tmp/huggingface_cache"
+# Ensure the cache directory exists
+cache_dir = os.environ["HF_HOME"]
+if not os.path.exists(cache_dir):
+    os.makedirs(cache_dir)
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+# Load Hugging Face token from environment variable
+huggingface_token = os.getenv('HUGGINGFACE_HUB_TOKEN')
+if huggingface_token:
+    login(token=huggingface_token, add_to_git_credential=True)
+else:
+    raise ValueError("Hugging Face token is not set. Please set the HUGGINGFACE_HUB_TOKEN environment variable.")
+# Initialize the Qdrant searcher
+qdrant_url = os.getenv('QDRANT_URL')
+access_token = os.getenv('QDRANT_ACCESS_TOKEN')
+encoder = SentenceTransformer('paraphrase-MiniLM-L6-v2', trust_remote_code=True)  # Replace with your actual encoder
+searcher = QdrantSearcher(encoder, qdrant_url, access_token)
+# Request body models
+class SearchDocumentsRequest(BaseModel):
+    query: str
+    limit: int = 3
+class GenerateRAGRequest(BaseModel):
+    search_query: str
+@app.post("/api/search-documents")
+async def search_documents(
+    body: SearchDocumentsRequest,
+    credentials: tuple = Depends(token_required)
+):
+    customer_id, user_id = credentials
+    # Check if customer_id or user_id is missing
+    if not customer_id or not user_id:
+        logging.error("Failed to extract customer_id or user_id from the JWT token.")
+        raise HTTPException(status_code=401, detail="Invalid token: missing customer_id or user_id")
+    logging.info("Received request to search documents")
+    try:
+        collection_name = "my_embeddings"
+        hits, error = searcher.search_documents(collection_name, body.query, user_id, body.limit)
+        if error:
+            logging.error(f"Search documents error: {error}")
+            raise HTTPException(status_code=500, detail=error)
+        return hits
+    except Exception as e:
+        logging.error(f"Unexpected error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/generate-rag-response")
+async def generate_rag_response_api(
+    body: GenerateRAGRequest,
+    credentials: tuple = Depends(token_required)
+):
+    customer_id, user_id = credentials
+    # Check if customer_id or user_id is missing
+    if not customer_id or not user_id:
+        logging.error("Failed to extract customer_id or user_id from the JWT token.")
+        raise HTTPException(status_code=401, detail="Invalid token: missing customer_id or user_id")
+    logging.info("Received request to generate RAG response")
+    try:
+        collection_name = "my_embeddings"
+        hits, error = searcher.search_documents(collection_name, body.search_query, user_id)
+        if error:
+            logging.error(f"Search documents error: {error}")
+            raise HTTPException(status_code=500, detail=error)
+        response, error = generate_rag_response(hits, body.search_query)
+        if error:
+            logging.error(f"Generate RAG response error: {error}")
+            raise HTTPException(status_code=500, detail=error)
+        return {"response": response}
+    except Exception as e:
+        logging.error(f"Unexpected error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == '__main__':
+    import uvicorn
+    uvicorn.run(app, host='0.0.0.0', port=8000)

config.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from dotenv import load_dotenv
+import os
+QDRANT_URL = os.getenv('QDRANT_URL')
+QDRANT_ACCESS_TOKEN = os.getenv('QDRANT_ACCESS_TOKEN')
+OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+JWKS_URL = os.getenv('JWKS_URL')

requiements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.78.0
+uvicorn==0.17.6
+pandas==1.3.5
+qdrant-client==0.9.2
+sentence-transformers==2.2.2
+openai==0.27.0
+PyJWT==2.6.0
+python-dotenv==0.19.2

services/__init__py ADDED Viewed

File without changes

services/openai_service.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import logging
+import os
+from openai import OpenAI
+from openai import OpenAIError, RateLimitError
+from config import OPENAI_API_KEY
+# Initialize the OpenAI client with the API key from the environment variable
+#api_key = os.getenv('OPENAI_API_KEY')
+client = OpenAI(api_key=OPENAI_API_KEY)
+def generate_rag_response(json_output, user_query):
+    logging.info("Generating RAG response")
+    # Extract text from the JSON output
+    context_texts = [hit['chunk_text'] for hit in json_output]
+    # Create the context for the prompt
+    context = "\n".join(context_texts)
+    prompt = f"Based on the given context, answer the user query: {user_query}\nContext:\n{context}"
+    main_prompt = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    try:
+        # Create a chat completion request
+        chat_completion = client.chat.completions.create(
+            messages=main_prompt,
+            model="gpt-4o-mini",  # Use the gpt-4o-mini model
+            timeout=10
+        )
+        # Log the response from the model
+        logging.info("RAG response generation completed")
+        logging.info(f"RAG response: {chat_completion.choices[0].message.content}")
+        return chat_completion.choices[0].message.content, None
+    except RateLimitError as e:
+        logging.error(f"Rate limit exceeded: {e}")
+        return None, "Rate limit exceeded. Please try again later."
+    except OpenAIError as e:
+        logging.error(f"OpenAI API error: {e}")
+        return None, f"An error occurred: {str(e)}"
+    except Exception as e:
+        logging.error(f"Unexpected error: {e}")
+        return None, str(e)

services/qdrant_searcher.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import logging
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import Filter, FieldCondition
+class QdrantSearcher:
+    def __init__(self, encoder, qdrant_url, access_token):
+        self.encoder = encoder
+        self.client = QdrantClient(url=qdrant_url, api_key=access_token)
+    def search_documents(self, collection_name, query, user_id, limit=3):
+        logging.info("Starting document search")
+        query_vector = self.encoder.encode(query).tolist()
+        query_filter = Filter(must=[FieldCondition(key="user_id", match={"value": user_id})])
+        try:
+            hits = self.client.search(
+                collection_name=collection_name,
+                query_vector=query_vector,
+                limit=limit,
+                query_filter=query_filter
+            )
+        except Exception as e:
+            logging.error(f"Error during Qdrant search: {e}")
+            return None, str(e)
+        if not hits:
+            logging.info("No documents found for the given query")
+            return None, "No documents found for the given query."
+        hits_list = []
+        for hit in hits:
+            hit_info = {
+                "id": hit.id,
+                "score": hit.score,
+                "file_id": hit.payload.get('file_id'),
+                "organization_id": hit.payload.get('organization_id'),
+                "chunk_index": hit.payload.get('chunk_index'),
+                "chunk_text": hit.payload.get('chunk_text'),
+                "s3_bucket_key": hit.payload.get('s3_bucket_key')
+            }
+            hits_list.append(hit_info)
+        logging.info(f"Document search completed with {len(hits_list)} hits")
+        return hits_list, None

utils/__init__.py ADDED Viewed

File without changes

utils/auth.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import logging
+from fastapi import Depends, HTTPException
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+import jwt
+from jwt import PyJWKClient
+from config import JWKS_URL
+security = HTTPBearer()
+def get_public_key(token: str):
+    try:
+        jwks_client = PyJWKClient(JWKS_URL)
+        signing_key = jwks_client.get_signing_key_from_jwt(token)
+        return signing_key.key
+    except Exception as e:
+        logging.error(f"Error fetching public key: {e}")
+        raise
+def token_required(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    token = credentials.credentials
+    try:
+        public_key = get_public_key(token)
+        decoded = jwt.decode(
+            token,
+            public_key,
+            algorithms=['RS256'],
+            issuer="https://assuring-lobster-64.clerk.accounts.dev"
+        )
+        customer_id = decoded.get('org_id')
+        user_id = decoded.get('sub')
+        logging.info(f"Customer/Org ID: {customer_id}, User ID: {user_id}")
+        if not customer_id:
+            logging.error("Customer ID is missing in the token!")
+            raise HTTPException(status_code=401, detail="Customer ID is missing in the token!")
+        return customer_id, user_id
+    except jwt.ExpiredSignatureError:
+        logging.error("Token has expired")
+        raise HTTPException(status_code=401, detail="Token has expired")
+    except jwt.InvalidTokenError as e:
+        logging.error(f"Invalid token: {e}")
+        raise HTTPException(status_code=401, detail="Invalid token")
+    except Exception as e:
+        logging.error(f"Error decoding token: {e}")
+        raise HTTPException(status_code=401, detail=str(e))