root commited on
Commit
b0ccf04
1 Parent(s): 08fd05e

files added

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -16
  2. app.py +42 -42
  3. utils.py +90 -90
Dockerfile CHANGED
@@ -1,16 +1,16 @@
1
- # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
-
4
- FROM python:3.9
5
-
6
- RUN useradd -m -u 1000 user
7
-
8
- WORKDIR /app
9
-
10
- COPY --chown=user ./requirements.txt requirements.txt
11
-
12
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
13
-
14
- COPY --chown=user . /app
15
-
16
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+
8
+ WORKDIR /app
9
+
10
+ COPY --chown=user ./requirements.txt requirements.txt
11
+
12
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
13
+
14
+ COPY --chown=user . /app
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -1,42 +1,42 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- from utils import retrive_context, generate_response
4
-
5
-
6
- # Initialize FastAPI
7
- app = FastAPI()
8
-
9
-
10
- class QueryRequest(BaseModel):
11
- # Asked query should be in string format
12
- query: str
13
-
14
-
15
- class QueryResponse(BaseModel):
16
- # Response should be in string format
17
- response: str
18
-
19
-
20
-
21
- @app.post("/infer", response_model=QueryResponse)
22
- def infer(query_request: QueryRequest):
23
- query = query_request.query
24
- context = retrive_context(query)
25
- if context == 500:
26
- raise HTTPException(status_code=500, detail="Error retrieving context")
27
-
28
- response = generate_response(query, context)
29
- if response == 500:
30
- raise HTTPException(status_code=500, detail="Error generating response")
31
-
32
- return QueryResponse(response=response)
33
-
34
-
35
- # Root endpoint for testing
36
- @app.get("/")
37
- def read_root():
38
- return {"message": "Inference API is running"}
39
-
40
- if __name__ == "__main__":
41
- import uvicorn
42
- uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from utils import retrive_context, generate_response
4
+
5
+
6
+ # Initialize FastAPI
7
+ app = FastAPI()
8
+
9
+
10
+ class QueryRequest(BaseModel):
11
+ # Asked query should be in string format
12
+ query: str
13
+
14
+
15
+ class QueryResponse(BaseModel):
16
+ # Response should be in string format
17
+ response: str
18
+
19
+
20
+
21
+ @app.post("/infer", response_model=QueryResponse)
22
+ def infer(query_request: QueryRequest):
23
+ query = query_request.query
24
+ context = retrive_context(query)
25
+ if context == 500:
26
+ raise HTTPException(status_code=500, detail="Error retrieving context")
27
+
28
+ response = generate_response(query, context)
29
+ if response == 500:
30
+ raise HTTPException(status_code=500, detail="Error generating response")
31
+
32
+ return QueryResponse(response=response)
33
+
34
+
35
+ # Root endpoint for testing
36
+ @app.get("/")
37
+ def read_root():
38
+ return {"message": "Inference API is running"}
39
+
40
+ if __name__ == "__main__":
41
+ import uvicorn
42
+ uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")
utils.py CHANGED
@@ -1,90 +1,90 @@
1
- # Required modules
2
- import os
3
- from pinecone import Pinecone
4
- from transformers import AutoModel
5
- from langchain_core.prompts import ChatPromptTemplate
6
- from langchain_groq import ChatGroq
7
- from dotenv import load_dotenv
8
- load_dotenv()
9
-
10
-
11
- # Initialize clients, indexes, models etc.
12
- pc_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
13
- pc_index = pc_client.Index(os.getenv("PINECONE_INDEX"))
14
- embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)
15
- groq_llm=ChatGroq(
16
- groq_api_key=os.getenv("GROQ_API_KEY"),
17
- model_name="Llama3-8b-8192"
18
- )
19
-
20
-
21
- #context retrivel
22
- def retrive_context(user_query:str) -> str:
23
- """Retrives the context for asked query from vector database
24
-
25
- Args:
26
- user_query (str): Questions asked by user to bot
27
-
28
- Returns:
29
- context (str): Question's context
30
- """
31
-
32
- context = ""
33
- try:
34
- embedded_query = embedding_model.encode(user_query).tolist()
35
- except Exception as e:
36
- return 500
37
-
38
- try:
39
- res = pc_index.query(
40
- vector=embedded_query,
41
- top_k=5,
42
- include_values=True,
43
- include_metadata = True
44
- )
45
- except Exception as e:
46
- return 500
47
-
48
- for match in res['matches']:
49
- context = context + match['metadata']['text'] + " "
50
-
51
- print(context)
52
- return context
53
-
54
-
55
- # Prompt Engineering for LLM
56
- prompt = ChatPromptTemplate.from_template(
57
- """
58
- Hello! As a RAG agent for Biskane, your task is to answer the user's question using the provided context. Please keep your responses brief and straightforward.
59
-
60
- <context>
61
- {context}
62
- <context>
63
- Question: {query}
64
- """
65
- )
66
-
67
-
68
- # Response generator
69
- def generate_response(query:str, context:str) -> str:
70
- """Generates the response for asked question from given context
71
-
72
- Args:
73
- query (str): Query asked by user to bot
74
- context (str): Context, retrived from vector database
75
-
76
- Returns:
77
- answer (str): Generated response
78
- """
79
- try:
80
- chain = prompt | groq_llm
81
- llm_response = chain.invoke({
82
- "context": context,
83
- "query": query
84
- })
85
- return llm_response.content
86
- except Exception as e:
87
- return 500
88
-
89
-
90
-
 
1
+ # Required modules
2
+ import os
3
+ from pinecone import Pinecone
4
+ from transformers import AutoModel
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_groq import ChatGroq
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+
11
+ # Initialize clients, indexes, models etc.
12
+ pc_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
13
+ pc_index = pc_client.Index(os.getenv("PINECONE_INDEX"))
14
+ embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)
15
+ groq_llm=ChatGroq(
16
+ groq_api_key=os.getenv("GROQ_API_KEY"),
17
+ model_name="Llama3-8b-8192"
18
+ )
19
+
20
+
21
+ #context retrivel
22
+ def retrive_context(user_query:str) -> str:
23
+ """Retrives the context for asked query from vector database
24
+
25
+ Args:
26
+ user_query (str): Questions asked by user to bot
27
+
28
+ Returns:
29
+ context (str): Question's context
30
+ """
31
+
32
+ context = ""
33
+ try:
34
+ embedded_query = embedding_model.encode(user_query).tolist()
35
+ except Exception as e:
36
+ return 500
37
+
38
+ try:
39
+ res = pc_index.query(
40
+ vector=embedded_query,
41
+ top_k=5,
42
+ include_values=True,
43
+ include_metadata = True
44
+ )
45
+ except Exception as e:
46
+ return 500
47
+
48
+ for match in res['matches']:
49
+ context = context + match['metadata']['text'] + " "
50
+
51
+ print(context)
52
+ return context
53
+
54
+
55
+ # Prompt Engineering for LLM
56
+ prompt = ChatPromptTemplate.from_template(
57
+ """
58
+ Hello! As a RAG agent for Biskane, your task is to answer the user's question using the provided context. Please keep your responses brief and straightforward.
59
+
60
+ <context>
61
+ {context}
62
+ <context>
63
+ Question: {query}
64
+ """
65
+ )
66
+
67
+
68
+ # Response generator
69
+ def generate_response(query:str, context:str) -> str:
70
+ """Generates the response for asked question from given context
71
+
72
+ Args:
73
+ query (str): Query asked by user to bot
74
+ context (str): Context, retrived from vector database
75
+
76
+ Returns:
77
+ answer (str): Generated response
78
+ """
79
+ try:
80
+ chain = prompt | groq_llm
81
+ llm_response = chain.invoke({
82
+ "context": context,
83
+ "query": query
84
+ })
85
+ return llm_response.content
86
+ except Exception as e:
87
+ return 500
88
+
89
+
90
+