File size: 7,524 Bytes
353edf3
 
 
 
 
 
 
 
 
dc3ed8e
353edf3
 
 
 
 
 
 
 
 
 
 
 
 
25c2ffb
353edf3
 
 
 
4adc02d
d8143c9
 
 
 
 
353edf3
 
 
 
a531f4b
353edf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25c2ffb
353edf3
 
 
 
a531f4b
 
 
 
 
353edf3
 
 
a531f4b
353edf3
 
 
 
4adc02d
 
 
 
 
 
353edf3
 
 
 
 
 
 
 
 
 
dc3ed8e
 
353edf3
 
 
 
 
 
 
 
 
 
4adc02d
 
 
353edf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25c2ffb
353edf3
 
 
 
 
 
 
 
a531f4b
353edf3
 
 
 
4adc02d
353edf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25c2ffb
353edf3
 
 
 
 
 
 
 
a531f4b
353edf3
 
 
 
4adc02d
353edf3
 
 
 
 
25c2ffb
353edf3
 
 
 
 
 
 
 
 
a531f4b
353edf3
 
 
 
 
4adc02d
 
 
 
 
 
 
a531f4b
 
 
 
 
4adc02d
 
 
 
 
 
 
 
 
 
 
 
a531f4b
4adc02d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#%%
### Router

from src.index import *

from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
# from langchain_openai import AzureChatOpenAI

#%%
# Data model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""

    datasource: Literal["vectorstore", "web_search"] = Field(
        ...,
        description="Given a user question choose to route it to web search or a vectorstore.",
    )


# LLM with function call
# llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
structured_llm_router = llm.with_structured_output(RouteQuery)

# Prompt
system = """You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to Indian Penal Code and The Indian Constitution. 
It can answer questions related to Indian Law, IPC and the Constitution.
Use vectorstore if the question is a legal query within the scope of IPC, Indian Law and the Indian Constitution.
Use web-search if the question is a legal query outside the scope of IPC, Indian Law and the Indian Constitution.
Use web-search and your own knowledge if the question requires general legal help.
Use web-search if the questions is a legal query that requires latest information."""
route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
        ("human", "{chat_context}"),
    ]
)

question_router = route_prompt | structured_llm_router

# %%
### Retrieval Grader

# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

# LLM with function call
# llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or is relevant to the user question, grade it as relevant. \n
    The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
    Return 'yes' if the document is relevant to the question, otherwise return 'no'.
    Also return 'yes' if the document may be relevant, and might be useful, otherwise return 'no'."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question} \n\n Chat context: {chat_context}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader


# question = "agent memory"
# docs = retriever.invoke(question)
# doc_txt = docs[1].page_content
# print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

#%%

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
# llm = AzureChatOpenAI(model_name="gpt-4o-mini", temperature=0.3)



# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

# # Run
# generation = rag_chain.invoke({"context": docs, "question": question})
# print(generation)

#%%

### Hallucination Grader


# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )


# LLM with function call
# llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation} \n\n Chat context: {chat_context}"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader
# hallucination_grader.invoke({"documents": docs, "generation": generation})

#%%
### Answer Grader


# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )


# LLM with function call
# llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
structured_llm_grader = llm.with_structured_output(GradeAnswer)

# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n 
     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation} \n\n Chat context: {chat_context}"),
    ]
)

answer_grader = answer_prompt | structured_llm_grader
# answer_grader.invoke({"question": question, "generation": generation})

#%%
### Question Re-writer

# LLM
# # llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)

# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n\n Here is the chat context: \n\n {chat_context} \n. Use it to form a better question. Formulate an improved question.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()
# question_rewriter.invoke({"question": question})

class IntentClassifier(BaseModel):
    """Classify the intent of the user query."""

    intent: Literal["greeting", "legal_query", "follow_up", "off_topic"] = Field(
        ...,
        description="""Classify the intent of the user query.
        'greeting' if the user is saying greetings,
        'legal_query' if the user is asking for information about law,
        'follow_up' if the user is asking for information related to the previous conversation. If you think the user is referring to a previous conversation, you can classify it as 'follow_up'.
        'off_topic' if the user is asking for information about anything else."""
    )

# LLM with function call
# llm = AzureChatOpenAI(model="gpt-4o-mini", temperature=0.3)
structured_llm_intent_classifier = llm.with_structured_output(IntentClassifier)

# Prompt
system = """You are an intent classifier that classifies the intent of a user query. \n 
    Give the intent as one of the following: 'greeting', 'legal_query', 'follow_up', 'off_topic'."""
intent_classifier_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Here is the user query: \n\n {question} \n\n Here is the chat context: \n\n {chat_context} \n\n Classify the intent of the user query."),
    ]
)

intent_classifier = intent_classifier_prompt | structured_llm_intent_classifier