izammohammed commited on
Commit
30b1ac9
1 Parent(s): 5a3a502

added source files

Browse files
Files changed (4) hide show
  1. src/helper.py +27 -0
  2. src/llama_call.py +55 -0
  3. src/openai_call.py +50 -0
  4. src/prompt.py +16 -0
src/helper.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+
5
+
6
+ # Extract data from the PDF
7
+ def load_pdf(data):
8
+ loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
9
+
10
+ documents = loader.load()
11
+
12
+ return documents
13
+
14
+
15
+ # Create text chunks
16
+ def text_split(extracted_data):
17
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
18
+ text_chunks = text_splitter.split_documents(extracted_data)
19
+
20
+ return text_chunks
21
+
22
+
23
+ def download_hugging_face_embeddings():
24
+ embeddings = HuggingFaceEmbeddings(
25
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
26
+ )
27
+ return embeddings
src/llama_call.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.helper import download_hugging_face_embeddings
2
+ from langchain_pinecone import PineconeVectorStore
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain_community.llms import CTransformers
5
+ from langchain.chains import RetrievalQA
6
+ from dotenv import load_dotenv
7
+ from src.prompt import prompt_template
8
+ import os
9
+
10
+ load_dotenv()
11
+
12
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
13
+ PINECONE_API_ENV = os.environ.get("PINECONE_API_ENV")
14
+
15
+
16
+ embeddings = download_hugging_face_embeddings()
17
+ index_name = "llm-chatbot"
18
+
19
+ # Initializing the Pinecone
20
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
21
+
22
+
23
+ PROMPT = PromptTemplate(
24
+ template=prompt_template, input_variables=["context", "question"]
25
+ )
26
+
27
+ chain_type_kwargs = {"prompt": PROMPT}
28
+
29
+ current_dir = os.getcwd()
30
+ llm = CTransformers(
31
+ model=os.path.join(current_dir, "saved_models/llama-2-7b-chat.ggmlv3.q4_0.bin"),
32
+ model_type="llama",
33
+ streaming=True,
34
+ config={"max_new_tokens": 256, "temperature": 0.6, "context_length": -1},
35
+ )
36
+
37
+
38
+ qa = RetrievalQA.from_chain_type(
39
+ llm=llm,
40
+ chain_type="stuff",
41
+ retriever=docsearch.as_retriever(search_kwargs={"k": 2}),
42
+ return_source_documents=True,
43
+ chain_type_kwargs=chain_type_kwargs,
44
+ verbose=True,
45
+ )
46
+
47
+
48
+ def llama_call(input):
49
+ result = qa.invoke({"query": input})
50
+ return str(result["result"])
51
+
52
+
53
+ if __name__ == "__main__":
54
+ msg = "If a previous owner of a land had allowed a neighbour or neighbour to walk or drive over his land in a shortcut and this has been going on for say a decade or so can I as the new owner stop them now from using the shortcut?"
55
+ print(f"response: {llama_call(msg)}")
src/openai_call.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.helper import download_hugging_face_embeddings
2
+ from langchain_pinecone import PineconeVectorStore
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain_openai import OpenAI
5
+ from langchain.chains import RetrievalQA
6
+ from dotenv import load_dotenv
7
+ from src.prompt import prompt_template
8
+ import os
9
+
10
+ load_dotenv()
11
+
12
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
13
+ PINECONE_API_ENV = os.environ.get("PINECONE_API_ENV")
14
+
15
+
16
+ embeddings = download_hugging_face_embeddings()
17
+ index_name = "llm-chatbot"
18
+
19
+ # Initializing the Pinecone
20
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
21
+
22
+
23
+ PROMPT = PromptTemplate(
24
+ template=prompt_template, input_variables=["context", "question"]
25
+ )
26
+
27
+ chain_type_kwargs = {"prompt": PROMPT}
28
+
29
+ current_dir = os.getcwd()
30
+ llm = OpenAI()
31
+
32
+
33
+ qa = RetrievalQA.from_chain_type(
34
+ llm=llm,
35
+ chain_type="stuff",
36
+ retriever=docsearch.as_retriever(search_kwargs={"k": 2}),
37
+ return_source_documents=True,
38
+ chain_type_kwargs=chain_type_kwargs,
39
+ verbose=True,
40
+ )
41
+
42
+
43
+ def openai_call(input):
44
+ result = qa.invoke({"query": input})
45
+ return str(result["result"])
46
+
47
+
48
+ if __name__ == "__main__":
49
+ msg = "If a previous owner of a land had allowed a neighbour or neighbour to walk or drive over his land in a shortcut and this has been going on for say a decade or so can I as the new owner stop them now from using the shortcut?"
50
+ print(f"response: {openai_call(msg)}")
src/prompt.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompt_template = """
2
+ As a seasoned legal advisor, you possess deep knowledge of legal intricacies and are skilled in referencing relevant laws and regulations. Users will seek guidance on various legal matters.
3
+
4
+ If a question falls outside the scope of legal expertise, kindly inform the user that your specialization is limited to legal advice.
5
+
6
+ In cases where you're uncertain of the answer, it's important to uphold integrity by admitting 'I don't know' rather than providing potentially erroneous information.
7
+
8
+ Below is a snippet of context from the relevant section of the constitution, although it will not be disclosed to users.
9
+
10
+ Context: {context}
11
+ Question: {question}
12
+
13
+ Your response should consist solely of helpful advice without any extraneous details.
14
+
15
+ Helpful advice:
16
+ """