Deepak Saini commited on
Commit
08fd05e
·
1 Parent(s): 9c37c88

files added

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. app.py +42 -0
  3. utils.py +90 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+
8
+ WORKDIR /app
9
+
10
+ COPY --chown=user ./requirements.txt requirements.txt
11
+
12
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
13
+
14
+ COPY --chown=user . /app
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from utils import retrive_context, generate_response
4
+
5
+
6
+ # Initialize FastAPI
7
+ app = FastAPI()
8
+
9
+
10
+ class QueryRequest(BaseModel):
11
+ # Asked query should be in string format
12
+ query: str
13
+
14
+
15
+ class QueryResponse(BaseModel):
16
+ # Response should be in string format
17
+ response: str
18
+
19
+
20
+
21
+ @app.post("/infer", response_model=QueryResponse)
22
+ def infer(query_request: QueryRequest):
23
+ query = query_request.query
24
+ context = retrive_context(query)
25
+ if context == 500:
26
+ raise HTTPException(status_code=500, detail="Error retrieving context")
27
+
28
+ response = generate_response(query, context)
29
+ if response == 500:
30
+ raise HTTPException(status_code=500, detail="Error generating response")
31
+
32
+ return QueryResponse(response=response)
33
+
34
+
35
+ # Root endpoint for testing
36
+ @app.get("/")
37
+ def read_root():
38
+ return {"message": "Inference API is running"}
39
+
40
+ if __name__ == "__main__":
41
+ import uvicorn
42
+ uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")
utils.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Required modules
2
+ import os
3
+ from pinecone import Pinecone
4
+ from transformers import AutoModel
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_groq import ChatGroq
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+
11
+ # Initialize clients, indexes, models etc.
12
+ pc_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
13
+ pc_index = pc_client.Index(os.getenv("PINECONE_INDEX"))
14
+ embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)
15
+ groq_llm=ChatGroq(
16
+ groq_api_key=os.getenv("GROQ_API_KEY"),
17
+ model_name="Llama3-8b-8192"
18
+ )
19
+
20
+
21
+ #context retrivel
22
+ def retrive_context(user_query:str) -> str:
23
+ """Retrives the context for asked query from vector database
24
+
25
+ Args:
26
+ user_query (str): Questions asked by user to bot
27
+
28
+ Returns:
29
+ context (str): Question's context
30
+ """
31
+
32
+ context = ""
33
+ try:
34
+ embedded_query = embedding_model.encode(user_query).tolist()
35
+ except Exception as e:
36
+ return 500
37
+
38
+ try:
39
+ res = pc_index.query(
40
+ vector=embedded_query,
41
+ top_k=5,
42
+ include_values=True,
43
+ include_metadata = True
44
+ )
45
+ except Exception as e:
46
+ return 500
47
+
48
+ for match in res['matches']:
49
+ context = context + match['metadata']['text'] + " "
50
+
51
+ print(context)
52
+ return context
53
+
54
+
55
+ # Prompt Engineering for LLM
56
+ prompt = ChatPromptTemplate.from_template(
57
+ """
58
+ Hello! As a RAG agent for Biskane, your task is to answer the user's question using the provided context. Please keep your responses brief and straightforward.
59
+
60
+ <context>
61
+ {context}
62
+ <context>
63
+ Question: {query}
64
+ """
65
+ )
66
+
67
+
68
+ # Response generator
69
+ def generate_response(query:str, context:str) -> str:
70
+ """Generates the response for asked question from given context
71
+
72
+ Args:
73
+ query (str): Query asked by user to bot
74
+ context (str): Context, retrived from vector database
75
+
76
+ Returns:
77
+ answer (str): Generated response
78
+ """
79
+ try:
80
+ chain = prompt | groq_llm
81
+ llm_response = chain.invoke({
82
+ "context": context,
83
+ "query": query
84
+ })
85
+ return llm_response.content
86
+ except Exception as e:
87
+ return 500
88
+
89
+
90
+