Spaces:

deepakMLKT
/

Biskane-RAG

Sleeping

App Files Files Community

Deepak Saini commited on Jun 13, 2024

Commit

08fd05e

1 Parent(s): 9c37c88

files added

Browse files

Files changed (3) hide show

Dockerfile +16 -0
app.py +42 -0
utils.py +90 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from utils import retrive_context, generate_response
+# Initialize FastAPI
+app = FastAPI()
+class QueryRequest(BaseModel):
+    # Asked query should be in string format
+    query: str
+class QueryResponse(BaseModel):
+    # Response should be in string format
+    response: str
+@app.post("/infer", response_model=QueryResponse)
+def infer(query_request: QueryRequest):
+    query = query_request.query
+    context = retrive_context(query)
+    if context == 500:
+        raise HTTPException(status_code=500, detail="Error retrieving context")
+    response = generate_response(query, context)
+    if response == 500:
+        raise HTTPException(status_code=500, detail="Error generating response")
+    return QueryResponse(response=response)
+# Root endpoint for testing
+@app.get("/")
+def read_root():
+    return {"message": "Inference API is running"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")

utils.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# Required modules
+import os
+from pinecone import Pinecone
+from transformers import AutoModel
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
+from dotenv import load_dotenv
+load_dotenv()
+# Initialize clients, indexes, models etc.
+pc_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
+pc_index = pc_client.Index(os.getenv("PINECONE_INDEX"))
+embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)
+groq_llm=ChatGroq(
+    groq_api_key=os.getenv("GROQ_API_KEY"),
+    model_name="Llama3-8b-8192"
+)
+#context retrivel
+def retrive_context(user_query:str) -> str:
+    """Retrives the context for asked query from vector database
+    Args:
+        user_query (str): Questions asked by user to bot
+    Returns:
+        context (str): Question's context
+    """
+    context = ""
+    try:
+        embedded_query = embedding_model.encode(user_query).tolist()
+    except Exception as e:
+        return 500
+    try:
+        res = pc_index.query(
+            vector=embedded_query,
+            top_k=5,
+            include_values=True,
+            include_metadata = True
+        )
+    except Exception as e:
+        return 500
+    for match in res['matches']:
+        context = context + match['metadata']['text'] + " "
+    print(context)
+    return context
+# Prompt Engineering for LLM
+prompt = ChatPromptTemplate.from_template(
+    """
+    Hello! As a RAG agent for Biskane, your task is to answer the user's question using the provided context. Please keep your responses brief and straightforward.
+    <context>
+    {context}
+    <context>
+    Question: {query}
+    """
+)
+# Response generator
+def generate_response(query:str, context:str) -> str:
+    """Generates the response for asked question from given context
+    Args:
+        query (str): Query asked by user to bot
+        context (str): Context, retrived from vector database
+    Returns:
+        answer (str): Generated response
+    """
+    try:
+        chain = prompt | groq_llm
+        llm_response = chain.invoke({
+            "context": context,
+            "query": query
+        })
+        return llm_response.content
+    except Exception as e:
+        return 500