diff --git "a/Langchain_bot.ipynb" "b/Langchain_bot.ipynb" new file mode 100644--- /dev/null +++ "b/Langchain_bot.ipynb" @@ -0,0 +1,6682 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "## langchain-0.0.129\n", + "# ! pip install -U langchain" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: python-dotenv in d:\\anaconda3\\envs\\nlp\\lib\\site-packages (1.0.0)\n" + ] + } + ], + "source": [ + "# !pip install python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install -U chromadb " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "# os.environ[\"OPENAI_API_KEY\"] = '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' \n", + "os.environ[\"OPENAI_API_KEY\"] = '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'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.indexes import VectorstoreIndexCreator\n", + "from langchain.vectorstores import Chroma\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.document_loaders import UnstructuredPDFLoader\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "docs = DirectoryLoader('Data/Policies/').load()\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 50)\n", + "all_splits = text_splitter.split_documents(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# bm25_retriever = BM25Retriever.from_documents(all_splits)\n", + "\n", + "# # initialize the bm25 retriever and faiss retriever\n", + "# # bm25_retriever = BM25Retriever.from_texts(doc_list)\n", + "# # bm25_retriever = BM25Retriever.from_documents(docs)\n", + "# bm25_retriever.k = 2\n", + "\n", + "embedding = OpenAIEmbeddings()\n", + "VECTOR_STORE_DIRECTORY = \"Vector Store\\\\\"\n", + "faiss_vectorstore = Chroma(persist_directory=VECTOR_STORE_DIRECTORY, embedding_function=OpenAIEmbeddings())\n", + "faiss_vectorstore = Chroma.from_documents(all_splits, embedding)\n", + "faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={\"k\": 2})\n", + "\n", + "# initialize the ensemble retriever\n", + "# ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])\n", + "\n", + "from langchain.chains import RetrievalQA\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chat_models import ChatOpenAI\n", + "template = \"\"\"\n", + "You are an Expert Policy Advisor.These Below are the Documents that are extracted from the different Policies.Your Job \n", + " is to Provide the Answer to below question based on the text below. \n", + " Here are few instructions for you to follow when answering a question.\n", + " - When you didnt find the relevant answers from below text Just Say \"I dont know this,Please contact your HRBP for more details.\"\n", + " - These are policy Documents, When answering a question Do Not return in response that \"This information is At Annex A/B\".Provide a Complete response to request.\n", + " - Try to answer the questions in bullet format if possible.\n", + " - Use three sentences maximum to Answer the question in very concise manner\n", + " \n", + " \n", + " {context}\n", + " Question: {question}\n", + " Helpful Answer:\n", + " \"\"\"\n", + "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=faiss_retriever,\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'- The leave policy allows employees to take time off from work for various reasons such as vacation, personal illness, or family emergencies.\\n- Employees are typically granted a certain number of paid leave days per year, which may vary based on their length of service or job level.\\n- The policy outlines the process for requesting and approving leave, as well as any restrictions or requirements for taking leave.'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qa_chain.run(\"what is leaves plicy?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Different Document Loaders/ Splitters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Markdown Splitter\n", + "- This will split the documents based on their Headings. \n", + " - I think this will be better approach for catering our usecase. As the policies are divided into separate chunks based on the thier headings. I think Tables will also be catered in this case as well." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import PDFMinerLoader\n", + "from langchain.document_loaders import PDFMinerPDFasHTMLLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "loader = PDFMinerPDFasHTMLLoader(\"Data\\\\Policies\\\\2.16 Role Based Entitlements Policy V7 22.pdf\")\n", + "data = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "data = loader.load()[0] " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "
Page 1
\n", + "
2. Benefits \n", + "
2.16 Role Based Entitlements Policy \n", + "
Owner \n", + "
Approved By \n", + "
Version 1.0 \n", + "
Version 2.0 \n", + "
Version 3.0 \n", + "
Version 4.0 \n", + "
Version 5.0 \n", + "
Version 6.0 \n", + "
Version 7.0 \n", + "
People & Organization \n", + "
CPO, CFO, CEO \n", + "
01/01/2019 \n", + "
01/04/2019 \n", + "
01/07/2019 \n", + "
01/05/2020 \n", + "
01/01/2021 \n", + "
01/05/2021 \n", + "
04/02/2022 \n", + "
2.16 Role Based Entitlements Policy \n", + "
Purpose \n", + "
Jazz enables its people to perform their Jobs by equipping them with the required facilities. Under this \n", + "
policy, all field base roles are provided with car maintenance allowance (CMA) and Fuel to accomplish \n", + "
their business targets within their assigned territories. \n", + "
Field Base Roles \n", + "
Any role where majority of working time during the day is spent on field for business purpose. \n", + "
Application \n", + "
All Jazz employees on field roles (as mentioned in annexure A) that require CMA and Fuel support on a \n", + "
daily basis to achieve performance KPIs. \n", + "
Entitlements \n", + "
Eligible roles and their respective entitlement are shared in annexure A. \n", + "
Features \n", + "
Your CMA and fuel eligibility is subject to business requirements and can change or cease as the \n", + "
requirements evolve. \n", + "
At any point in time, your monthly fuel entitlement will be higher of the two limits i.e. role \n", + "
based and grade-based fuel limit. \n", + "
Your CMA and fuel is meant for addressing your travel needs within your allocated territories; \n", + "
for other business travel needs you can follow the regular travel process. \n", + "
This policy does not apply to technology staff, they will be facilitated through 228 service. \n", + "
Changes in annexure with respect to eligible field-based roles can be updated based on \n", + "
alignment of respective HOD, HRBP and Rewards Team. \n", + "
Specific roles can be assigned with the benefit supported by a business rationale and duly \n", + "
aligned from the respective CXO, HOD, HRBP and Rewards Team. \n", + "
CMA is not applicable to L3 and above job grades. \n", + "
Hardship Allowance is provided for specific roles deployed in remote areas (details in \n", + "
Annexure B). \n", + "
The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "
in which case your employment shall be governed by such revised rules and regulations \n", + "
1 \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
Page 2
\n", + "
Annexure A | CMA/Fuel Entitlement \n", + "
Commercial \n", + "
Department \n", + "
Job Title \n", + "
Regional Sales \n", + "
Regional Sales / Channel \n", + "
Planning \n", + "
Credit & Collection \n", + "
Regional Sales Head \n", + "
Area Manager \n", + "
Territory Sales Supervisor \n", + "
Franchise Services Executive \n", + "
Trade Marketing Officer / \n", + "
Executive \n", + "
Regional Supervisor Credit & \n", + "
Collection \n", + "
Jazz Business \n", + "
Department \n", + "
Job Title \n", + "
B2B Sales \n", + "
Head of B2G/LA/SME \n", + "
B2G \n", + "
Business Development \n", + "
Manager \n", + "
Corporate Solutions \n", + "
Manager Corporate Solutions \n", + "
Grades \n", + "
L4 \n", + "
L2 & L3 \n", + "
L1 \n", + "
L1 & L2 \n", + "
Grades \n", + "
L4 \n", + "
Entitlement \n", + "
(monthly) \n", + "
Fuel: 300ltrs \n", + "
Driver: PKR 20,000 \n", + "
CMA: PKR 25,000 \n", + "
Fuel: 200ltrs \n", + "
CMA: PKR 15,000 \n", + "
Fuel: 100ltrs \n", + "
CMA: PKR 15,000 \n", + "
Fuel: 150ltrs \n", + "
CMA: PKR 5,000 \n", + "
Fuel: 150ltrs \n", + "
Entitlement \n", + "
(monthly) \n", + "
Fuel: 300ltrs \n", + "
Driver: PKR 20,000 \n", + "
Enterprise Sales \n", + "
Manager Enterprise Sales \n", + "
L3 \n", + "
Fuel: 200ltrs \n", + "
B2B Marketing M2M \n", + "
Manager M2M Solutions \n", + "
B2B Marketing MFS & Agri \n", + "
Manager MFS & Agri \n", + "
B2G \n", + "
Corporate Solutions \n", + "
Enterprise Sales \n", + "
B2B Marketing M2M \n", + "
B2B Marketing MFS & Agri \n", + "
B2B Operations \n", + "
Business Development \n", + "
Manager / Team Lead \n", + "
Key Account Manager / \n", + "
Business Consultants \n", + "
Manager Corporate Solutions \n", + "
CAM / Business Consultants \n", + "
Manager Enterprise Sales \n", + "
KAM M2M / Business \n", + "
Consultants \n", + "
Business Development \n", + "
Manager \n", + "
Collection Executives / \n", + "
Supervisors \n", + "
L1 & L2 \n", + "
CMA: PKR 25,000 \n", + "
Fuel: 200ltrs
\n", + "
CMA: PKR 5,000 \n", + "
Fuel: 150ltrs
\n", + "
The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "
in which case your employment shall be governed by such revised rules and regulations \n", + "
2 \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
Page 3
\n", + "
Digital Financial Services \n", + "
Department \n", + "
Job Title \n", + "
DFS Channels Alternative \n", + "
Distribution Channel \n", + "
DFS Channels SME \n", + "
DFS Channels Trade \n", + "
Marketing \n", + "
Regional Head of Alternative Distribution \n", + "
Channel \n", + "
Business Development Experts \n", + "
Head of SME Business \n", + "
Expert SME Business \n", + "
Trade Marketing Officer / Executives \n", + "
Head of Public / Private Partnerships \n", + "
Manager Public / Private Partnerships \n", + "
Strategic Partnerships \n", + "
Business Development Roles \n", + "
Specialist / Expert Partnership Lifecycle \n", + "
Management (North / Central / South) \n", + "
Legal \n", + "
Department \n", + "
Job Title \n", + "
Litigation \n", + "
Sr. Legal Counsel \n", + "
(South) \n", + "
C&RA \n", + "
Department \n", + "
Job Title \n", + "
Corporate Affairs \n", + "
Specialist / Expert \n", + "
Corporate Affairs \n", + "
P&O \n", + "
Department \n", + "
Job Title \n", + "
Administration & Real \n", + "
Estate \n", + "
Executive Facilities \n", + "
Management \n", + "
(Power/Genset) \n", + "
Grade \n", + "
L3 \n", + "
Grade \n", + "
L1/L2 \n", + "
Grade \n", + "
L1 \n", + "
Annexure B | Hardship Allowance Entitlement \n", + "
Location \n", + "
Quetta \n", + "
Gwadar \n", + "
Gilgit \n", + "
Role \n", + "
Regional Sales Head \n", + "
Area Sales Manager \n", + "
Zonal Manager \n", + "
Grades \n", + "
L4 \n", + "
L3 \n", + "
L1 & L2 \n", + "
L3 \n", + "
L1 & L2 \n", + "
L1 & L2 \n", + "
L4 \n", + "
L3 \n", + "
L1 / L2 \n", + "
Entitlement \n", + "
(monthly) \n", + "
Fuel: 300ltrs \n", + "
Fuel: 200ltrs \n", + "
CMA: PKR 25,000 \n", + "
Fuel: 200ltrs \n", + "
Fuel: 200ltrs \n", + "
CMA: PKR 25,000 \n", + "
Fuel: 200ltrs \n", + "
CMA: PKR 15,000 \n", + "
Fuel: 100ltrs \n", + "
Fuel: 300ltrs \n", + "
Fuel 200ltrs \n", + "
CMA: PKR 25,000 \n", + "
Fuel: 200ltrs \n", + "
Location \n", + "
Entitlement \n", + "
(monthly) \n", + "
Karachi \n", + "
Fuel: 200ltrs \n", + "
Location \n", + "
Entitlement \n", + "
(monthly) \n", + "
Islamabad \n", + "
Fuel: 175/200ltrs \n", + "
Location \n", + "
Entitlement \n", + "
(monthly) \n", + "
Islamabad \n", + "
Fuel: 150ltrs \n", + "
Entitlement \n", + "
(monthly) \n", + "
PKR 50,000 \n", + "
PKR 25,000 \n", + "
3 \n", + "
The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "
in which case your employment shall be governed by such revised rules and regulations \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
Page 4
\n", + "
DI Khan \n", + "
Zhob \n", + "
MBU Lead \n", + "
The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "
in which case your employment shall be governed by such revised rules and regulations \n", + "
4 \n", + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
Page: 1, 2, 3, 4
\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(data.page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup\n", + "soup = BeautifulSoup(data.page_content,'html.parser')\n", + "content = soup.find_all('div')" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "cur_fs = None\n", + "cur_text = ''\n", + "snippets = [] # first collect all snippets that have the same font size\n", + "for c in content:\n", + " sp = c.find('span')\n", + " if not sp:\n", + " continue\n", + " st = sp.get('style')\n", + " if not st:\n", + " continue\n", + " fs = re.findall('font-size:(\\d+)px',st)\n", + " if not fs:\n", + " continue\n", + " fs = int(fs[0])\n", + " if not cur_fs:\n", + " cur_fs = fs\n", + " if fs == cur_fs:\n", + " cur_text += c.text\n", + " else:\n", + " snippets.append((cur_text,cur_fs))\n", + " cur_fs = fs\n", + " cur_text = c.text\n", + "snippets.append((cur_text,cur_fs))\n", + "# Note: The above logic is very straightforward. One can also add more strategies such as removing duplicate snippets (as\n", + "# headers/footers in a PDF appear on multiple pages so if we find duplicates it's safe to assume that it is redundant info)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.docstore.document import Document\n", + "cur_idx = -1\n", + "semantic_snippets = []\n", + "# Assumption: headings have higher font size than their respective content\n", + "for s in snippets:\n", + " # if current snippet's font size > previous section's heading => it is a new heading\n", + " if not semantic_snippets or s[1] > semantic_snippets[cur_idx].metadata['heading_font']:\n", + " metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]}\n", + " metadata.update(data.metadata)\n", + " semantic_snippets.append(Document(page_content='',metadata=metadata))\n", + " cur_idx += 1\n", + " continue\n", + "\n", + " # if current snippet's font size <= previous section's content => content belongs to the same section (one can also create\n", + " # a tree like structure for sub sections if needed but that may require some more thinking and may be data specific)\n", + " if not semantic_snippets[cur_idx].metadata['content_font'] or s[1] <= semantic_snippets[cur_idx].metadata['content_font']:\n", + " semantic_snippets[cur_idx].page_content += s[0]\n", + " semantic_snippets[cur_idx].metadata['content_font'] = max(s[1], semantic_snippets[cur_idx].metadata['content_font'])\n", + " continue\n", + "\n", + " # if current snippet's font size > previous section's content but less than previous section's heading than also make a new\n", + " # section (e.g. title of a PDF will have the highest font size but we don't want it to subsume all sections)\n", + " metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]}\n", + " metadata.update(data.metadata)\n", + " semantic_snippets.append(Document(page_content='',metadata=metadata))\n", + " cur_idx += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Purpose \n", + "Jazz enables its people to perform their Jobs by equipping them with the required facilities. Under this \n", + "policy, all field base roles are provided with car maintenance allowance (CMA) and Fuel to accomplish \n", + "their business targets within their assigned territories. \n", + "Field Base Roles \n", + "Any role where majority of working time during the day is spent on field for business purpose. \n", + "Application \n", + "All Jazz employees on field roles (as mentioned in annexure A) that require CMA and Fuel support on a \n", + "daily basis to achieve performance KPIs. \n", + "Entitlements \n", + "Eligible roles and their respective entitlement are shared in annexure A. \n", + "Features \n", + "▪ Your CMA and fuel eligibility is subject to business requirements and can change or cease as the \n", + "requirements evolve. \n", + "▪ At any point in time, your monthly fuel entitlement will be higher of the two limits i.e. role \n", + "based and grade-based fuel limit. \n", + "▪ Your CMA and fuel is meant for addressing your travel needs within your allocated territories; \n", + "for other business travel needs you can follow the regular travel process. \n", + "▪ This policy does not apply to technology staff, they will be facilitated through 228 service. \n", + "▪ Changes in annexure with respect to eligible field-based roles can be updated based on \n", + "alignment of respective HOD, HRBP and Rewards Team. \n", + "▪ Specific roles can be assigned with the benefit supported by a business rationale and duly \n", + "aligned from the respective CXO, HOD, HRBP and Rewards Team. \n", + "▪ CMA is not applicable to L3 and above job grades. \n", + "▪ Hardship Allowance is provided for specific roles deployed in remote areas (details in \n", + "Annexure B). \n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "in which case your employment shall be governed by such revised rules and regulations \n", + "1 \n", + "Annexure A | CMA/Fuel Entitlement \n", + "Commercial \n", + "Department \n", + "Job Title \n", + "Regional Sales \n", + "Regional Sales / Channel \n", + "Planning \n", + "Credit & Collection \n", + "Regional Sales Head \n", + "Area Manager \n", + "Territory Sales Supervisor \n", + "Franchise Services Executive \n", + "Trade Marketing Officer / \n", + "Executive \n", + "Regional Supervisor Credit & \n", + "Collection \n", + "Jazz Business \n", + "Department \n", + "Job Title \n", + "B2B Sales \n", + "Head of B2G/LA/SME \n", + "B2G \n", + "Business Development \n", + "Manager \n", + "Corporate Solutions \n", + "Manager Corporate Solutions \n", + "Grades \n", + "L4 \n", + "L2 & L3 \n", + "L1 \n", + "L1 & L2 \n", + "Grades \n", + "L4 \n", + "Entitlement \n", + "(monthly) \n", + "Fuel: 300ltrs \n", + "Driver: PKR 20,000 \n", + "CMA: PKR 25,000 \n", + "Fuel: 200ltrs \n", + "CMA: PKR 15,000 \n", + "Fuel: 100ltrs \n", + "CMA: PKR 15,000 \n", + "Fuel: 150ltrs \n", + "CMA: PKR 5,000 \n", + "Fuel: 150ltrs \n", + "Entitlement \n", + "(monthly) \n", + "Fuel: 300ltrs \n", + "Driver: PKR 20,000 \n", + "Enterprise Sales \n", + "Manager Enterprise Sales \n", + "L3 \n", + "Fuel: 200ltrs \n", + "B2B Marketing – M2M \n", + "Manager M2M Solutions \n", + "B2B Marketing – MFS & Agri \n", + "Manager MFS & Agri \n", + "B2G \n", + "Corporate Solutions \n", + "Enterprise Sales \n", + "B2B Marketing – M2M \n", + "B2B Marketing – MFS & Agri \n", + "B2B Operations \n", + "Business Development \n", + "Manager / Team Lead \n", + "Key Account Manager / \n", + "Business Consultants \n", + "Manager Corporate Solutions \n", + "CAM / Business Consultants \n", + "Manager Enterprise Sales \n", + "KAM – M2M / Business \n", + "Consultants \n", + "Business Development \n", + "Manager \n", + "Collection Executives / \n", + "Supervisors \n", + "L1 & L2 \n", + "CMA: PKR 25,000 \n", + "Fuel: 200ltrs\n", + "CMA: PKR 5,000 \n", + "Fuel: 150ltrs\n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "in which case your employment shall be governed by such revised rules and regulations \n", + "2 \n", + "Digital Financial Services \n", + "Department \n", + "Job Title \n", + "DFS Channels – Alternative \n", + "Distribution Channel \n", + "DFS Channels – SME \n", + "DFS Channels – Trade \n", + "Marketing \n", + "Regional Head of Alternative Distribution \n", + "Channel \n", + "Business Development Experts \n", + "Head of SME Business \n", + "Expert SME Business \n", + "Trade Marketing Officer / Executives \n", + "Head of Public / Private Partnerships \n", + "Manager Public / Private Partnerships \n", + "Strategic Partnerships \n", + "Business Development Roles \n", + "Specialist / Expert Partnership Lifecycle \n", + "Management (North / Central / South) \n", + "Legal \n", + "Department \n", + "Job Title \n", + "Litigation \n", + "Sr. Legal Counsel \n", + "(South) \n", + "C&RA \n", + "Department \n", + "Job Title \n", + "Corporate Affairs \n", + "Specialist / Expert \n", + "Corporate Affairs \n", + "P&O \n", + "Department \n", + "Job Title \n", + "Administration & Real \n", + "Estate \n", + "Executive Facilities \n", + "Management \n", + "(Power/Genset) \n", + "Grade \n", + "L3 \n", + "Grade \n", + "L1/L2 \n", + "Grade \n", + "L1 \n", + "Annexure B | Hardship Allowance Entitlement \n", + "Location \n", + "Quetta \n", + "Gwadar \n", + "Gilgit \n", + "Role \n", + "Regional Sales Head \n", + "Area Sales Manager \n", + "Zonal Manager \n", + "Grades \n", + "L4 \n", + "L3 \n", + "L1 & L2 \n", + "L3 \n", + "L1 & L2 \n", + "L1 & L2 \n", + "L4 \n", + "L3 \n", + "L1 / L2 \n", + "Entitlement \n", + "(monthly) \n", + "Fuel: 300ltrs \n", + "Fuel: 200ltrs \n", + "CMA: PKR 25,000 \n", + "Fuel: 200ltrs \n", + "Fuel: 200ltrs \n", + "CMA: PKR 25,000 \n", + "Fuel: 200ltrs \n", + "CMA: PKR 15,000 \n", + "Fuel: 100ltrs \n", + "Fuel: 300ltrs\n", + "Fuel 200ltrs \n", + "CMA: PKR 25,000 \n", + "Fuel: 200ltrs \n", + "Location \n", + "Entitlement \n", + "(monthly) \n", + "Karachi \n", + "Fuel: 200ltrs \n", + "Location \n", + "Entitlement \n", + "(monthly) \n", + "Islamabad \n", + "Fuel: 175/200ltrs \n", + "Location \n", + "Entitlement \n", + "(monthly) \n", + "Islamabad \n", + "Fuel: 150ltrs \n", + "Entitlement \n", + "(monthly) \n", + "PKR 50,000\n", + "PKR 25,000 \n", + "3 \n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "in which case your employment shall be governed by such revised rules and regulations \n", + "DI Khan \n", + "Zhob \n", + "MBU Lead \n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, \n", + "in which case your employment shall be governed by such revised rules and regulations \n", + "4 \n", + "\n" + ] + } + ], + "source": [ + "print(semantic_snippets[1].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "loader = UnstructuredPDFLoader(\"Data\\\\Policies\\\\2.16 Role Based Entitlements Policy V7 22.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "test_docs = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2. Benefits 2.16 Role Based Entitlements Policy Owner Approved By Version 1.0 Version 2.0 Version 3.0 Version 4.0 Version 5.0 Version 6.0 Version 7.0 People & Organization CPO, CFO, CEO 01/01/2019 01/04/2019 01/07/2019 01/05/2020 01/01/2021 01/05/2021 04/02/2022\n", + "\n", + "2.16 Role Based Entitlements Policy\n", + "\n", + "Purpose\n", + "\n", + "Jazz enables its people to perform their Jobs by equipping them with the required facilities. Under this policy, all field base roles are provided with car maintenance allowance (CMA) and Fuel to accomplish their business targets within their assigned territories.\n", + "\n", + "Field Base Roles\n", + "\n", + "Any role where majority of working time during the day is spent on field for business purpose.\n", + "\n", + "Application\n", + "\n", + "All Jazz employees on field roles (as mentioned in annexure A) that require CMA and Fuel support on a daily basis to achieve performance KPIs.\n", + "\n", + "Entitlements\n", + "\n", + "Eligible roles and their respective entitlement are shared in annexure A.\n", + "\n", + "Features\n", + "\n", + "▪ Your CMA and fuel eligibility is subject to business requirements and can change or cease as the\n", + "\n", + "requirements evolve.\n", + "\n", + "▪ At any point in time, your monthly fuel entitlement will be higher of the two limits i.e. role\n", + "\n", + "based and grade-based fuel limit.\n", + "\n", + "▪ Your CMA and fuel is meant for addressing your travel needs within your allocated territories;\n", + "\n", + "for other business travel needs you can follow the regular travel process.\n", + "\n", + "▪ This policy does not apply to technology staff, they will be facilitated through 228 service. ▪ Changes in annexure with respect to eligible field-based roles can be updated based on\n", + "\n", + "alignment of respective HOD, HRBP and Rewards Team.\n", + "\n", + "▪ Specific roles can be assigned with the benefit supported by a business rationale and duly\n", + "\n", + "aligned from the respective CXO, HOD, HRBP and Rewards Team.\n", + "\n", + "▪ CMA is not applicable to L3 and above job grades. ▪ Hardship Allowance is provided for specific roles deployed in remote areas (details in\n", + "\n", + "Annexure B).\n", + "\n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, in which case your employment shall be governed by such revised rules and regulations\n", + "\n", + "1\n", + "\n", + "Annexure A | CMA/Fuel Entitlement\n", + "\n", + "Commercial\n", + "\n", + "Department\n", + "\n", + "Regional Sales\n", + "\n", + "Job Title\n", + "\n", + "Regional Sales Head\n", + "\n", + "Area Manager\n", + "\n", + "Territory Sales Supervisor\n", + "\n", + "Grades\n", + "\n", + "L4\n", + "\n", + "L2 & L3\n", + "\n", + "L1\n", + "\n", + "Entitlement (monthly) Fuel: 300ltrs Driver: PKR 20,000 CMA: PKR 25,000 Fuel: 200ltrs CMA: PKR 15,000 Fuel: 100ltrs\n", + "\n", + "Regional Sales / Channel Planning\n", + "\n", + "Credit & Collection\n", + "\n", + "Franchise Services Executive\n", + "\n", + "Trade Marketing Officer / Executive Regional Supervisor Credit & Collection\n", + "\n", + "L1 & L2\n", + "\n", + "CMA: PKR 15,000 Fuel: 150ltrs\n", + "\n", + "CMA: PKR 5,000 Fuel: 150ltrs\n", + "\n", + "Jazz Business\n", + "\n", + "Department\n", + "\n", + "B2B Sales\n", + "\n", + "Job Title\n", + "\n", + "Head of B2G/LA/SME\n", + "\n", + "Grades\n", + "\n", + "L4\n", + "\n", + "Entitlement (monthly) Fuel: 300ltrs Driver: PKR 20,000\n", + "\n", + "B2G\n", + "\n", + "Business Development Manager\n", + "\n", + "Corporate Solutions\n", + "\n", + "Manager Corporate Solutions\n", + "\n", + "Enterprise Sales\n", + "\n", + "Manager Enterprise Sales\n", + "\n", + "L3\n", + "\n", + "Fuel: 200ltrs\n", + "\n", + "B2B Marketing – M2M\n", + "\n", + "Manager M2M Solutions\n", + "\n", + "B2B Marketing – MFS & Agri\n", + "\n", + "Manager MFS & Agri\n", + "\n", + "B2G\n", + "\n", + "Corporate Solutions\n", + "\n", + "Enterprise Sales\n", + "\n", + "B2B Marketing – M2M\n", + "\n", + "B2B Marketing – MFS & Agri\n", + "\n", + "B2B Operations\n", + "\n", + "Business Development Manager / Team Lead Key Account Manager / Business Consultants Manager Corporate Solutions CAM / Business Consultants Manager Enterprise Sales KAM – M2M / Business Consultants Business Development Manager Collection Executives / Supervisors\n", + "\n", + "L1 & L2\n", + "\n", + "CMA: PKR 25,000 Fuel: 200ltrs\n", + "\n", + "CMA: PKR 5,000 Fuel: 150ltrs\n", + "\n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, in which case your employment shall be governed by such revised rules and regulations\n", + "\n", + "2\n", + "\n", + "Digital Financial Services\n", + "\n", + "Department\n", + "\n", + "Job Title\n", + "\n", + "Grades\n", + "\n", + "Entitlement (monthly)\n", + "\n", + "DFS Channels – Alternative Distribution Channel\n", + "\n", + "Regional Head of Alternative Distribution Channel\n", + "\n", + "Business Development Experts\n", + "\n", + "L4\n", + "\n", + "L3\n", + "\n", + "L1 & L2\n", + "\n", + "Fuel: 300ltrs\n", + "\n", + "Fuel: 200ltrs\n", + "\n", + "CMA: PKR 25,000 Fuel: 200ltrs\n", + "\n", + "Head of SME Business\n", + "\n", + "L3\n", + "\n", + "Fuel: 200ltrs\n", + "\n", + "DFS Channels – SME\n", + "\n", + "Expert SME Business\n", + "\n", + "L1 & L2\n", + "\n", + "CMA: PKR 25,000 Fuel: 200ltrs\n", + "\n", + "DFS Channels – Trade Marketing\n", + "\n", + "Trade Marketing Officer / Executives\n", + "\n", + "L1 & L2\n", + "\n", + "CMA: PKR 15,000 Fuel: 100ltrs\n", + "\n", + "Head of Public / Private Partnerships\n", + "\n", + "L4\n", + "\n", + "Fuel: 300ltrs\n", + "\n", + "Manager Public / Private Partnerships\n", + "\n", + "L3\n", + "\n", + "Fuel 200ltrs\n", + "\n", + "Strategic Partnerships\n", + "\n", + "Business Development Roles\n", + "\n", + "Specialist / Expert Partnership Lifecycle Management (North / Central / South)\n", + "\n", + "L1 / L2\n", + "\n", + "CMA: PKR 25,000 Fuel: 200ltrs\n", + "\n", + "Legal\n", + "\n", + "Department\n", + "\n", + "Job Title\n", + "\n", + "Grade\n", + "\n", + "Location\n", + "\n", + "Entitlement (monthly)\n", + "\n", + "Litigation\n", + "\n", + "Sr. Legal Counsel (South)\n", + "\n", + "L3\n", + "\n", + "Karachi\n", + "\n", + "Fuel: 200ltrs\n", + "\n", + "C&RA\n", + "\n", + "Department\n", + "\n", + "Job Title\n", + "\n", + "Grade\n", + "\n", + "Location\n", + "\n", + "Entitlement (monthly)\n", + "\n", + "Corporate Affairs\n", + "\n", + "Specialist / Expert Corporate Affairs\n", + "\n", + "L1/L2\n", + "\n", + "Islamabad\n", + "\n", + "Fuel: 175/200ltrs\n", + "\n", + "P&O\n", + "\n", + "Department\n", + "\n", + "Job Title\n", + "\n", + "Grade\n", + "\n", + "Location\n", + "\n", + "Entitlement (monthly)\n", + "\n", + "Administration & Real Estate\n", + "\n", + "Executive Facilities Management (Power/Genset)\n", + "\n", + "L1\n", + "\n", + "Islamabad\n", + "\n", + "Fuel: 150ltrs\n", + "\n", + "Annexure B | Hardship Allowance Entitlement\n", + "\n", + "Location\n", + "\n", + "Quetta Gwadar Gilgit\n", + "\n", + "Role\n", + "\n", + "Regional Sales Head\n", + "\n", + "Area Sales Manager\n", + "\n", + "Zonal Manager\n", + "\n", + "Entitlement (monthly) PKR 50,000\n", + "\n", + "PKR 25,000\n", + "\n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, in which case your employment shall be governed by such revised rules and regulations\n", + "\n", + "3\n", + "\n", + "DI Khan Zhob\n", + "\n", + "MBU Lead\n", + "\n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, in which case your employment shall be governed by such revised rules and regulations\n", + "\n", + "4\n" + ] + } + ], + "source": [ + "print(test_docs[0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "DirectoryLoader.__init__() got an unexpected keyword argument 'mode'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32md:\\Work\\Jazz\\GPT\\JIN-e\\Langchain_bot.ipynb Cell 6\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m docs \u001b[39m=\u001b[39m DirectoryLoader(\u001b[39m'\u001b[39;49m\u001b[39mData/Policies/\u001b[39;49m\u001b[39m'\u001b[39;49m,mode\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39melements\u001b[39;49m\u001b[39m\"\u001b[39;49m)\u001b[39m.\u001b[39mload()\n", + "\u001b[1;31mTypeError\u001b[0m: DirectoryLoader.__init__() got an unexpected keyword argument 'mode'" + ] + } + ], + "source": [ + "docs = DirectoryLoader('Data/Policies/').load()\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)\n", + "all_splits = text_splitter.split_documents(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "presist_dictroy = 'Vector Store/'" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "## Storing the vector store to the Database \n", + "vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings(),persist_directory=presist_dictroy)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "## Loading the Database from the disk to\n", + "vectorstore = Chroma(persist_directory=presist_dictroy,embedding_function=OpenAIEmbeddings())" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vectorstore" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"What is leaves policy?\"\n", + "docs = vectorstore.similarity_search(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='1.30. Jazz Flex Policy\\n\\nJazz Flex Philosophy\\n\\nJazz enables an inclusive, collaborative, and progressive work environment for its people spelling flexibility, productivity, and employee wellbeing. \\n\\nDefining the future of work, Jazz is employing hybrid ways of working to foster agility, efficiency, and healthy work-life integration. Whilst ensuring to make our workplace more engaging and safer.', metadata={'source': 'Data\\\\Policies\\\\Flex Policy - V4.docx'}),\n", + " Document(page_content='1.25 Company Fleet Policy\\n\\nEligibility & Applicability \\n\\nThe policy covers business needs of employees who need to undertake inter/intra city travel through company fleet and/or third party vehicles (Careem), the latter being available in major cities (Karachi, Lahore, Rawalpindi and Islamabad).\\n\\nBusiness Needs:\\n\\nThe business needs for which this facility shall be applicable include: \\n\\nAirport pick and drop facility from home/office to airport and back', metadata={'source': 'Data\\\\Policies\\\\1.25 Company Fleet Policy 11092018.docx'}),\n", + " Document(page_content='1.25 Company Fleet Policy\\n\\nEligibility & Applicability \\n\\nThe policy covers business needs of employees who need to undertake inter/intra city travel through company fleet and/or third party vehicles (Careem), the latter being available in major cities (Karachi, Lahore, Rawalpindi and Islamabad).\\n\\nBusiness Needs:\\n\\nThe business needs for which this facility shall be applicable include: \\n\\nAirport pick and drop facility from home/office to airport and back', metadata={'source': 'Data\\\\Policies\\\\1.25 Company Fleet Policy 11092018.docx'}),\n", + " Document(page_content='1.25 Company Fleet Policy\\n\\nEligibility & Applicability \\n\\nThe policy covers business needs of employees who need to undertake inter/intra city travel through company fleet and/or third party vehicles (Careem), the latter being available in major cities (Karachi, Lahore, Rawalpindi and Islamabad).\\n\\nBusiness Needs:\\n\\nThe business needs for which this facility shall be applicable include: \\n\\nAirport pick and drop facility from home/office to airport and back', metadata={'source': 'Data\\\\Policies\\\\1.25 Company Fleet Policy 11092018.docx'})]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "retriever = vectorstore.as_retriever(search_type=\"mmr\")\n", + "matched_docs = retriever.get_relevant_documents(query)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Commercial and People & Organization\\n\\n2. Benefits\\n\\n2.4. Official Phone & Connections Policy\\n\\nOwner\\n\\nApproved By\\n\\nVersion 1.0\\n\\nVersion 2.0\\n\\nVersion 3.0\\n\\nVersion 4.0\\n\\nVersion 5.0\\n\\nVersion 6.0\\n\\nVersion 7.0\\n\\nCPO, CCO, CFO, CEO\\n\\n16/03/2016\\n\\n01/06/2017\\n\\n01/11/2018\\n\\n01/01/2019\\n\\n01/05/2019\\n\\n01/07/2019\\n\\n05/11/2020\\n\\n2.4. Official Phone & Connections Policy\\n\\nThis policy entails the allowances and entitlements for handset, airtime and packages offered to current\\n\\nEmployees and Ex-Employees', metadata={'source': 'Data\\\\Policies\\\\2.4. Official Phone Connections Policy V7 New.pdf'}),\n", + " Document(page_content='Commercial and People & Organization\\n\\n2. Benefits\\n\\n2.4. Official Phone & Connections Policy\\n\\nOwner\\n\\nApproved By\\n\\nVersion 1.0\\n\\nVersion 2.0\\n\\nVersion 3.0\\n\\nVersion 4.0\\n\\nVersion 5.0\\n\\nVersion 6.0\\n\\nVersion 7.0\\n\\nCPO, CCO, CFO, CEO\\n\\n16/03/2016\\n\\n01/06/2017\\n\\n01/11/2018\\n\\n01/01/2019\\n\\n01/05/2019\\n\\n01/07/2019\\n\\n05/11/2020\\n\\n2.4. Official Phone & Connections Policy\\n\\nThis policy entails the allowances and entitlements for handset, airtime and packages offered to current\\n\\nEmployees and Ex-Employees', metadata={'source': 'Data\\\\Policies\\\\2.4. Official Phone Connections Policy V7 New.pdf'}),\n", + " Document(page_content='1.30. Jazz Flex Policy\\n\\nJazz Flex Philosophy\\n\\nJazz enables an inclusive, collaborative, and progressive work environment for its people spelling flexibility, productivity, and employee wellbeing. \\n\\nDefining the future of work, Jazz is employing hybrid ways of working to foster agility, efficiency, and healthy work-life integration. Whilst ensuring to make our workplace more engaging and safer.', metadata={'source': 'Data\\\\Policies\\\\Flex Policy - V4.docx'}),\n", + " Document(page_content='1.25 Company Fleet Policy\\n\\nEligibility & Applicability \\n\\nThe policy covers business needs of employees who need to undertake inter/intra city travel through company fleet and/or third party vehicles (Careem), the latter being available in major cities (Karachi, Lahore, Rawalpindi and Islamabad).\\n\\nBusiness Needs:\\n\\nThe business needs for which this facility shall be applicable include: \\n\\nAirport pick and drop facility from home/office to airport and back', metadata={'source': 'Data\\\\Policies\\\\1.25 Company Fleet Policy 11092018.docx'})]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matched_docs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multi Query Retriver and MMR\n", + "- The MultiQueryRetriever automates the process of prompt tuning by using an LLM to generate multiple queries from different perspectives for a given user input query. For each query, it retrieves a set of relevant documents and takes the unique union across all queries to get a larger set of potentially relevant documents. By generating multiple perspectives on the same question, the MultiQueryRetriever might be able to overcome some of the limitations of the distance-based retrieval and get a richer set of results.\n", + "- Can be used for better retriving Documents for QnA." + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.retrievers.multi_query import MultiQueryRetriever\n", + "\n", + "logging.basicConfig()\n", + "logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)\n", + "\n", + "retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(),\n", + " llm=ChatOpenAI(temperature=0))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide information about the policy regarding leaves?', '2. What are the rules and regulations related to leaves?', \"3. Could you explain the company's policy on leaves?\"]\n" + ] + } + ], + "source": [ + "unique_docs = retriever_from_llm.get_relevant_documents(query=query)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Conditional to nature of job/role requirements', metadata={'source': 'Data\\\\Policies\\\\Flex Policy - V4.docx'}),\n", + " Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'})]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unique_docs" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MultiQueryRetriever(tags=None, metadata=None, retriever=VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], metadata=None, vectorstore=, search_type='similarity', search_kwargs={}), llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='You are an AI language model assistant. Your task is \\n to generate 3 different versions of the given user \\n question to retrieve relevant documents from a vector database. \\n By generating multiple perspectives on the user question, \\n your goal is to help the user overcome some of the limitations \\n of distance-based similarity search. Provide these alternative \\n questions separated by newlines. Original question: {question}', template_format='f-string', validate_template=True), llm=ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=, model_name='gpt-3.5-turbo', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None), output_key='text', output_parser=LineListOutputParser(pydantic_object=), return_final_only=True, llm_kwargs={}), verbose=True, parser_key='lines')" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever_from_llm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using MMR" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"what are the parental leaves?\"\n", + "docs = vectorstore.similarity_search(question)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='2.8.16. For adopting parents, leaves can be availed on the arrival of the new family member.\\n\\n2.8.17. Work from home and/or flexible working options for returning mothers are conditional to\\n\\nnature of job/role requirements and shall be availed with prior alignment with the line\\n\\nmanager. All returning mothers are encouraged to discuss these arrangements at length with\\n\\ntheir line managers so that they can enjoy a smooth return to office.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='2.8.16. For adopting parents, leaves can be availed on the arrival of the new family member.\\n\\n2.8.17. Work from home and/or flexible working options for returning mothers are conditional to\\n\\nnature of job/role requirements and shall be availed with prior alignment with the line\\n\\nmanager. All returning mothers are encouraged to discuss these arrangements at length with\\n\\ntheir line managers so that they can enjoy a smooth return to office.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='2.8.2.2. If your medical concern exceeds three (03) working days, you will be required to share the\\n\\nrespective doctors’ advice with your manager. Your extended time off may be with/without\\n\\npay depending upon the case details and subject to HoD’s (CXO-1) approval.\\n\\n2.8.3. Parental Leave\\n\\nYour parental leaves’ entitlement includes the following with applicability to both birthing and\\n\\nadopting parents, as per same terms.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='2.8.2.2. If your medical concern exceeds three (03) working days, you will be required to share the\\n\\nrespective doctors’ advice with your manager. Your extended time off may be with/without\\n\\npay depending upon the case details and subject to HoD’s (CXO-1) approval.\\n\\n2.8.3. Parental Leave\\n\\nYour parental leaves’ entitlement includes the following with applicability to both birthing and\\n\\nadopting parents, as per same terms.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'})]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# question = \"What is minimum distance is required to raise TAF for intercity travels?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "retriever = vectorstore.as_retriever(search_type=\"mmr\")\n", + "matched_docs = retriever.get_relevant_documents(question)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='2.8.16. For adopting parents, leaves can be availed on the arrival of the new family member.\\n\\n2.8.17. Work from home and/or flexible working options for returning mothers are conditional to\\n\\nnature of job/role requirements and shall be availed with prior alignment with the line\\n\\nmanager. All returning mothers are encouraged to discuss these arrangements at length with\\n\\ntheir line managers so that they can enjoy a smooth return to office.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='2.8.2.2. If your medical concern exceeds three (03) working days, you will be required to share the\\n\\nrespective doctors’ advice with your manager. Your extended time off may be with/without\\n\\npay depending upon the case details and subject to HoD’s (CXO-1) approval.\\n\\n2.8.3. Parental Leave\\n\\nYour parental leaves’ entitlement includes the following with applicability to both birthing and\\n\\nadopting parents, as per same terms.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='traditional work norms, aiming at enhancing your experience and well-being both at work and beyond.\\n\\nLeaves Categorization\\n\\nYour leaves’ entitlement includes the following:\\n\\n2.8.1. Annual Time-Off\\n\\n2.8.1.1. Upon confirmation, you can avail up to thirty (30) working days for each year of service.\\n\\nAnnual leaves are earned over your service period and are adjusted on pro-rated basis.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='2.8.3.10. All employees are advised to avail their entitled parental leave as per their requirement.\\n\\nAny un-availed parental leaves will lapse on completion of first year.\\n\\n2.8.4. Sabbatical Leave\\n\\n2.8.4.1. If you who have worked with Jazz for at least five (05) years with consistent positive\\n\\nperformance and wish to proceed for education, you can take a sabbatical leave of maximum', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'})]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matched_docs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using LLMs to Answer the Question" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chat_models import ChatOpenAI\n", + "template = template = template = \"\"\"You are an Expert Policy Advisor.These Below are the Documents that are extracted from the different Policies.YOur Job \n", + " is to Provide the Answer to below question based on the text below. \n", + " Here are few instructions for you to follow when answering a question.\n", + " - When you didnt find the relevant answers from below text Just Say \"I dont know this,Please contact your HRBP for more details.\"\n", + " - These are policy Documents, When answering a question Do Not return in response that \"This information is At Annex A/B\".Provide a Complete response to request.\n", + " - Try to answer the questions in bullet format and keep the answer concise as you can.\n", + " \n", + " {context}\n", + " Question: {question}\n", + " Helpful Answer:\"\"\"\n", + "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + "qa_chain_multi_query = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=vectorstore.as_retriever(),\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What is minimum distance is required to raise TAF for intercity travels?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "result = qa_chain_multi_query({\"query\": question})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The minimum distance required to raise a TAF for intercity travels is 80 KM.\n" + ] + } + ], + "source": [ + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What is minimum distance is required to raise TAF for intercity travels?\"\n", + "result = qa_chain_multi_query({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Car Maintenance Allowance (CMA) is a benefit provided to employees in field-based roles to support their business travel and fuel expenses. The specific entitlements for different roles can be found in annexure A.\n" + ] + } + ], + "source": [ + "question = \"What is car mantinance Allowance? and list cma entitlement for different roles?\"\n", + "result = qa_chain_multi_query({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Car Maintenance Allowance (CMA) is a benefit provided to field-based roles at Jazz to support their business travel and achieve performance KPIs. The entitlements for CMA vary based on different roles. Unfortunately, the specific entitlements for different roles are not mentioned in the provided text. Please contact your HRBP for more details on the CMA entitlements for different roles.\n" + ] + } + ], + "source": [ + "question = \"What is car mantinance Allowance? and list cma entitlement for different roles?\"\n", + "result = qa_chain_multi_query({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "L3 grade is entitled to Suzuki Swift AT and their buy back policy is as follows:\n", + "- 2 years old vehicle: 73% of invoice value\n", + "- 3 years old vehicle: 55% of invoice value\n", + "- 4 years old vehicle: 35% of invoice value\n", + "- 5 years old vehicle: 18% of invoice value\n", + "\n", + "L4 grade is entitled to Honda City 1.3 AT and their buy back policy is as follows:\n", + "- 2 years old vehicle: 73% of invoice value\n", + "- 3 years old vehicle: 55% of invoice value\n", + "- 4 years old vehicle: 35% of invoice value\n", + "- 5 years old vehicle: 18% of invoice value\n", + "\n", + "L5 grade is entitled to Toyota Corolla 1.3 AT and their buy back policy is as follows:\n", + "- 2 years old vehicle: 73% of invoice value\n", + "- 3 years old vehicle: 55% of invoice value\n", + "- 4 years old vehicle: 35% of invoice value\n", + "- 5 years old vehicle: 18% of invoice value\n", + "\n", + "L6 grade is entitled to Toyota Corolla Grande 1.8 AT and their buy back policy is as follows:\n", + "- 2 years old vehicle: 73% of invoice value\n", + "- 3 years old vehicle: 55% of invoice value\n", + "- 4 years old vehicle: 35% of invoice value\n", + "- 5 years old vehicle: 18% of invoice value\n", + "\n", + "Suzuki Swift AT, Honda City 1.3 AT, Toyota Corolla 1.3 AT, Toyota Corolla Grande 1.8 AT, and Honda Civic 1.8 AT are not mentioned in the document, so their buy back policies are not provided.\n", + "\n", + "For the luxury category, Toyota Fortuner and Toyota Land Cruiser are mentioned, but their buy back policies are not provided in the document.\n" + ] + } + ], + "source": [ + "question = \"What cars each grade is entitled what is their buy back policy?\"\n", + "result = qa_chain_multi_query({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using MultiQuery as Retriver" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'retriever_from_llm' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32md:\\Work\\Jazz\\GPT\\JIN-e\\Langchain_bot.ipynb Cell 32\u001b[0m line \u001b[0;36m1\n\u001b[0;32m 11\u001b[0m QA_CHAIN_PROMPT \u001b[39m=\u001b[39m PromptTemplate\u001b[39m.\u001b[39mfrom_template(template)\n\u001b[0;32m 13\u001b[0m llm \u001b[39m=\u001b[39m ChatOpenAI(model_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mgpt-3.5-turbo\u001b[39m\u001b[39m\"\u001b[39m, temperature\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m)\n\u001b[0;32m 15\u001b[0m qa_chain \u001b[39m=\u001b[39m RetrievalQA\u001b[39m.\u001b[39mfrom_chain_type(\n\u001b[0;32m 16\u001b[0m llm,\n\u001b[1;32m---> 17\u001b[0m retriever\u001b[39m=\u001b[39mretriever_from_llm,\n\u001b[0;32m 18\u001b[0m chain_type_kwargs\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mprompt\u001b[39m\u001b[39m\"\u001b[39m: QA_CHAIN_PROMPT}\n\u001b[0;32m 19\u001b[0m )\n", + "\u001b[1;31mNameError\u001b[0m: name 'retriever_from_llm' is not defined" + ] + } + ], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "template = \"\"\"Use the following pieces of context to answer the question at the end. \n", + "If you don't know the answer, just say that you don't know, don't try to make up an answer. \n", + "Use three sentences maximum and keep the answer as concise as possible.\n", + "{context}\n", + "Question: {question}\n", + "Helpful Answer:\"\"\"\n", + "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=retriever_from_llm,\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What advantages do cars offer across all grade levels?', '2. How do cars benefit individuals in all grades?', '3. What are the benefits of owning a car for people of all educational levels?']\n" + ] + } + ], + "source": [ + "result = qa_chain({\"query\": question})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The minimum distance required to raise a TAF for intercity travels is 80 KM.\n" + ] + } + ], + "source": [ + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. How far do I need to travel to increase the TAF for intercity travels?', '2. What is the minimum distance needed to raise the TAF for intercity travels?', '3. At what distance does the TAF for intercity travels start to increase?']\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 8.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n", + "WARNING:langchain.embeddings.openai:Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 8.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-BB8HNYx7iFUPtPvtX9KKNmUU on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There is no minimum distance requirement mentioned for raising TAF for intercity travels in the given context.\n" + ] + } + ], + "source": [ + "question = \"What is minimum distance is required to raise TAF for intercity travels?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the fuel limits for each grade?', '2. Can you provide me with the fuel limits for each grade?', '3. I would like to know the fuel limits for each grade. Could you please provide that information?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "L4: 300 liters\n", + "L3: 200 liters\n", + "L1 & L2: 200 liters\n" + ] + } + ], + "source": [ + "question = \"Give me fuel limit for each grade?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the maximum amount of PF that can be withdrawn from the account? Is it possible to return the withdrawn amount?', '2. Is there a limit on the PF withdrawal amount? Can the withdrawn PF be refunded?', '3. How much PF can I withdraw from my account? If I withdraw it, can I later return the withdrawn amount?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The maximum amount of PF that can be drawn is 2 times the individual PF balance, with a minimum capping at PKR 1 Million. It is not mentioned whether the PF can be returned back.\n" + ] + } + ], + "source": [ + "question = \"how much PF can be drawn and can it be returned back?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the maximum number of unpaid leaves allowed and what are the steps to apply for them?', '2. Can you please explain the procedure for applying for unpaid leaves and the limit on the number of leaves we can take?', '3. How many unpaid leaves are permitted and what is the application process for requesting them?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You can take a maximum of three (03) months of unpaid leave within one (01) year. To apply for unpaid leave, you should discuss the possibility with your manager and people operations representatives.\n" + ] + } + ], + "source": [ + "question = \"How many unpaid leaves can we take and what is process to apply for it?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Is it possible to divide my maternity leave into multiple stages?', '2. Can I break up my maternity leave into different periods?', '3. Is there a way to separate my maternity leave into multiple phases?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Yes, you can split your maternity leave into multiple phases. You can work from home for the first three months with reduced hours and workload after completing your maternity leave, and then follow flexible working options for the subsequent three months with your line manager's alignment.\n" + ] + } + ], + "source": [ + "question = \"Can i split my maternity leaves in to multiple phases\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the eligibility criteria for an internal employee to apply for new roles within Jazz?', '2. How does an internal employee become eligible to apply for new roles within Jazz?', '3. Are there any specific requirements for an internal employee to apply for new roles within Jazz?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Internal employees can apply for new roles within Jazz after serving at least one year in their current role and grade.\n" + ] + } + ], + "source": [ + "question = \"when can an internal employee apply for new roles within jazz?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'qa_chain' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[83], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m question \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mwhat talent acquisition policy says about hireview?\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m----> 2\u001b[0m result \u001b[39m=\u001b[39m qa_chain({\u001b[39m\"\u001b[39m\u001b[39mquery\u001b[39m\u001b[39m\"\u001b[39m: question})\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(result[\u001b[39m\"\u001b[39m\u001b[39mresult\u001b[39m\u001b[39m\"\u001b[39m])\n", + "\u001b[1;31mNameError\u001b[0m: name 'qa_chain' is not defined" + ] + } + ], + "source": [ + "question = \"what talent acquisition policy says about hireview?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Which version of the Talent policy is currently being utilized?', '2. What is the specific version of the Talent policy that you are currently employing?', '3. Can you provide information about the version of the Talent policy that is in use?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The version of the Talent Policy being used is Version 5.0.\n" + ] + } + ], + "source": [ + "question = \"what is the verion of Talent policy that you are using?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the minimum qualifications needed for an intern?', '2. What qualifications does an intern need to have?', '3. What kind of qualifications are required from interns?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The minimum qualification for an intern is completion of A levels or equivalent. The intern is required to have a qualification from an HEC recognized University.\n" + ] + } + ], + "source": [ + "question = \"what is the minimum qualification for an intern? and what kind of qualification is required from intern to have?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Whose approval is necessary for an exception to the notice period?', '2. Who needs to give approval for a notice period exception?', '3. Which authority is responsible for approving notice period exceptions?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HoD's (CXO-1) approval.\n" + ] + } + ], + "source": [ + "question = \"for notice period exception whose approval is required?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"for notice period exception whose approval is required?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"for notice period exception whose approval is required?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MultiVector Retriever\n", + "##### Parent Document Retriever\n", + " - Using parent Document retriver whose purpose is to devide the documents in two smaller chunks and stores its embedding to capture more meaningfull answers and then also captures full embeddings of complete document." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers import ParentDocumentRetriever" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers.multi_vector import MultiVectorRetriever\n", + "from langchain.vectorstores import Chroma\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.storage import InMemoryStore\n", + "from langchain.document_loaders import DirectoryLoader\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n" + ] + } + ], + "source": [ + "docs = DirectoryLoader('Data/Policies/').load()\n", + "# This text splitter is used to create the child documents\n", + "child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)\n", + "# The vectorstore to use to index the child chunks\n", + "vectorstore = Chroma(\n", + " collection_name=\"full_documents\",\n", + " embedding_function=OpenAIEmbeddings()\n", + ")\n", + "# The storage layer for the parent documents\n", + "store = InMemoryStore()\n", + "retriever = ParentDocumentRetriever(\n", + " vectorstore=vectorstore, \n", + " docstore=store, \n", + " child_splitter=child_splitter,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "retriever.add_documents(docs, ids=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "36" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(list(store.yield_keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "## Getting the Documents related to seached term\n", + "sub_docs = vectorstore.similarity_search(\"Fuel\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Car Policy\\n\\nFeatures\\n\\nFuel\\n\\nYou can avail your fuel entitlement through company provided fuel card carrying a pre-defined monthly limit (in liters).\\n\\nIn case your job qualifies for role based fuel, your monthly fuel entitlement will be higher of the two limits i.e. role based and grade based fuel limit.\\n\\nCar Allowance', metadata={'doc_id': '9b233d9e-ec50-48a3-afd3-74384fdedefc', 'source': 'Data\\\\Policies\\\\2.3. Car Policy V8.docx'}),\n", + " Document(page_content='Fuel: 200ltrs\\n\\nCMA: PKR 5,000\\n\\nFuel: 150ltrs\\n\\nThe Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice,\\n\\nin which case your employment shall be governed by such revised rules and regulations\\n\\n2\\n\\nDigital Financial Services\\n\\nDepartment\\n\\nJob Title\\n\\nDFS Channels – Alternative\\n\\nDistribution Channel', metadata={'doc_id': 'eb3f2bc5-aa12-4266-9137-c2b596cb4964', 'source': 'Data\\\\Policies\\\\2.16 Role Based Entitlements Policy V7 22.pdf'}),\n", + " Document(page_content='Fuel: 200ltrs\\n\\nCMA: PKR 25,000\\n\\nFuel: 200ltrs\\n\\nFuel: 200ltrs\\n\\nCMA: PKR 25,000\\n\\nFuel: 200ltrs\\n\\nCMA: PKR 15,000\\n\\nFuel: 100ltrs\\n\\nFuel: 300ltrs\\n\\nFuel 200ltrs\\n\\nCMA: PKR 25,000\\n\\nFuel: 200ltrs\\n\\nLocation\\n\\nEntitlement\\n\\n(monthly)\\n\\nKarachi\\n\\nFuel: 200ltrs\\n\\nLocation\\n\\nEntitlement\\n\\n(monthly)\\n\\nIslamabad\\n\\nFuel: 175/200ltrs\\n\\nLocation\\n\\nEntitlement\\n\\n(monthly)\\n\\nIslamabad\\n\\nFuel: 150ltrs\\n\\nEntitlement\\n\\n(monthly)', metadata={'doc_id': 'eb3f2bc5-aa12-4266-9137-c2b596cb4964', 'source': 'Data\\\\Policies\\\\2.16 Role Based Entitlements Policy V7 22.pdf'}),\n", + " Document(page_content='2. Benefits\\n\\n2.3. Car Policy\\n\\nOwner\\n\\nApproved By\\n\\nVersion 1.0\\n\\nVersion 2.0\\n\\nVersion 3.0\\n\\nVersion 4.0\\n\\nVersion 5.0\\n\\nVersion 6.0\\n\\nVersion 7.0\\n\\nPeople & Organization\\n\\nCPO, CFO, CEO\\n\\n22/06/2016\\n\\n01/04/2017\\n\\n01/03/2018\\n\\n01/08/2018\\n\\n01/11/2018\\n\\n01/05/2019\\n\\n01/07/2019\\n\\n2.3. Car Policy\\n\\nFeatures\\n\\n2.3.1. Fuel', metadata={'doc_id': '4b2673d7-4490-48d8-aae1-76f1eece9098', 'source': 'Data\\\\Policies\\\\2.3. Car Policy V7 Nov 2019.pdf'})]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sub_docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(sub_docs[0].page_content) " + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "retrieved_docs = retriever.get_relevant_documents(\"fuel\")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Car Policy\n", + "\n", + "Features\n", + "\n", + "Fuel\n", + "\n", + "You can avail your fuel entitlement through company provided fuel card carrying a pre-defined monthly limit (in liters).\n", + "\n", + "In case your job qualifies for role based fuel, your monthly fuel entitlement will be higher of the two limits i.e. role based and grade based fuel limit.\n", + "\n", + "Car Allowance\n", + "\n", + "In case of entitlement, you can opt for a defined car allowance, which reaches you through monthly payroll.\n", + "\n", + "Operating Lease\n", + "\n", + "In case of eligibility, you have the choice to opt for a vehicle through vendor facilitated operating lease. Vehicle options, maintenance, buyback, contract terms and further details are available at the .\n", + "\n", + "Useful Notes\n", + "\n", + "Upon resignation, an employee may continue the vehicle lease contract on personal capacity.\n", + "\n", + "Where a resigned employee does not want to continue with the operating lease facility, the company may continue the lease & offer the specific lease to any interested applicant who wishes to avail the facility. The buyback period will continue as per the effective date in the lease contract initially signed-off with the resigned employee.\n", + "\n", + "In case an applicant wishes to utilize & continue the car operating lease specified in above clause, the lease would continue with deductions from the Salary as per the standard procedure & without any deduction from the resigned employee’s settlement.\n", + "\n", + "In case no applicant avails the existing lease, the respective dues as per the lease contract would be deducted from the resigned employee’s settlement.\n", + "\n", + "Administration would facilitate an employee for renting a vehicle from the vendor during the time leased vehicle delivery is in process. The rental amount will be deducted from employee’s salary and accordingly paid to the vendor.\n", + "\n", + "Vehicle options are subject to availability; available options will be updated on the operating lease portal.\n", + "\n", + "Buy back of Jazz owned / lease vehicles currently in possession of entitled employees will be dealt with as per previous policies until the useful life is completed and the said asset is disposed from Fixed Asset Register\n" + ] + } + ], + "source": [ + "print(retrieved_docs[1].page_content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Summary \n", + "- Here i will also be storing summary of documents for better retrival. Right now i am getting ratelimit error." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.prompts import ChatPromptTemplate\n", + "from langchain.schema.output_parser import StrOutputParser\n", + "import uuid\n", + "from langchain.schema.document import Document" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "chain = (\n", + " {\"doc\": lambda x: x.page_content}\n", + " | ChatPromptTemplate.from_template(\"Summarize the following document:\\n\\n{doc}\")\n", + " | ChatOpenAI(max_retries=0)\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "ename": "RateLimitError", + "evalue": "Rate limit reached for gpt-3.5-turbo in organization org-x1evDRl7Z91oUYDUdl96xewe on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mRateLimitError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32md:\\Work\\Jazz\\GPT\\JIN-e\\Langchain_bot.ipynb Cell 62\u001b[0m line \u001b[0;36m1\n\u001b[1;32m----> 1\u001b[0m summaries \u001b[39m=\u001b[39m chain\u001b[39m.\u001b[39;49mbatch(docs, {\u001b[39m\"\u001b[39;49m\u001b[39mmax_concurrency\u001b[39;49m\u001b[39m\"\u001b[39;49m: \u001b[39m5\u001b[39;49m})\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\schema\\runnable\\base.py:1081\u001b[0m, in \u001b[0;36mRunnableSequence.batch\u001b[1;34m(self, inputs, config, return_exceptions, **kwargs)\u001b[0m\n\u001b[0;32m 1079\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 1080\u001b[0m \u001b[39mfor\u001b[39;00m i, step \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msteps):\n\u001b[1;32m-> 1081\u001b[0m inputs \u001b[39m=\u001b[39m step\u001b[39m.\u001b[39;49mbatch(\n\u001b[0;32m 1082\u001b[0m inputs,\n\u001b[0;32m 1083\u001b[0m [\n\u001b[0;32m 1084\u001b[0m \u001b[39m# each step a child run of the corresponding root run\u001b[39;49;00m\n\u001b[0;32m 1085\u001b[0m patch_config(\n\u001b[0;32m 1086\u001b[0m config, callbacks\u001b[39m=\u001b[39;49mrm\u001b[39m.\u001b[39;49mget_child(\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mseq:step:\u001b[39;49m\u001b[39m{\u001b[39;49;00mi\u001b[39m+\u001b[39;49m\u001b[39m1\u001b[39;49m\u001b[39m}\u001b[39;49;00m\u001b[39m\"\u001b[39;49m)\n\u001b[0;32m 1087\u001b[0m )\n\u001b[0;32m 1088\u001b[0m \u001b[39mfor\u001b[39;49;00m rm, config \u001b[39min\u001b[39;49;00m \u001b[39mzip\u001b[39;49m(run_managers, configs)\n\u001b[0;32m 1089\u001b[0m ],\n\u001b[0;32m 1090\u001b[0m )\n\u001b[0;32m 1092\u001b[0m \u001b[39m# finish the root runs\u001b[39;00m\n\u001b[0;32m 1093\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\schema\\runnable\\base.py:133\u001b[0m, in \u001b[0;36mRunnable.batch\u001b[1;34m(self, inputs, config, return_exceptions, **kwargs)\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[39mreturn\u001b[39;00m cast(List[Output], [invoke(inputs[\u001b[39m0\u001b[39m], configs[\u001b[39m0\u001b[39m])])\n\u001b[0;32m 132\u001b[0m \u001b[39mwith\u001b[39;00m get_executor_for_config(configs[\u001b[39m0\u001b[39m]) \u001b[39mas\u001b[39;00m executor:\n\u001b[1;32m--> 133\u001b[0m \u001b[39mreturn\u001b[39;00m cast(List[Output], \u001b[39mlist\u001b[39;49m(executor\u001b[39m.\u001b[39;49mmap(invoke, inputs, configs)))\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:621\u001b[0m, in \u001b[0;36mExecutor.map..result_iterator\u001b[1;34m()\u001b[0m\n\u001b[0;32m 618\u001b[0m \u001b[39mwhile\u001b[39;00m fs:\n\u001b[0;32m 619\u001b[0m \u001b[39m# Careful not to keep a reference to the popped future\u001b[39;00m\n\u001b[0;32m 620\u001b[0m \u001b[39mif\u001b[39;00m timeout \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m--> 621\u001b[0m \u001b[39myield\u001b[39;00m _result_or_cancel(fs\u001b[39m.\u001b[39;49mpop())\n\u001b[0;32m 622\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 623\u001b[0m \u001b[39myield\u001b[39;00m _result_or_cancel(fs\u001b[39m.\u001b[39mpop(), end_time \u001b[39m-\u001b[39m time\u001b[39m.\u001b[39mmonotonic())\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:319\u001b[0m, in \u001b[0;36m_result_or_cancel\u001b[1;34m(***failed resolving arguments***)\u001b[0m\n\u001b[0;32m 317\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 318\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 319\u001b[0m \u001b[39mreturn\u001b[39;00m fut\u001b[39m.\u001b[39;49mresult(timeout)\n\u001b[0;32m 320\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 321\u001b[0m fut\u001b[39m.\u001b[39mcancel()\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:458\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 456\u001b[0m \u001b[39mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 457\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39m==\u001b[39m FINISHED:\n\u001b[1;32m--> 458\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_result()\n\u001b[0;32m 459\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 460\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTimeoutError\u001b[39;00m()\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:403\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 401\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception:\n\u001b[0;32m 402\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 403\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception\n\u001b[0;32m 404\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 405\u001b[0m \u001b[39m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 406\u001b[0m \u001b[39mself\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[39mreturn\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfn(\u001b[39m*\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mkwargs)\n\u001b[0;32m 59\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m exc:\n\u001b[0;32m 60\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfuture\u001b[39m.\u001b[39mset_exception(exc)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\schema\\runnable\\base.py:126\u001b[0m, in \u001b[0;36mRunnable.batch..invoke\u001b[1;34m(input, config)\u001b[0m\n\u001b[0;32m 124\u001b[0m \u001b[39mreturn\u001b[39;00m e\n\u001b[0;32m 125\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m--> 126\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39minvoke(\u001b[39minput\u001b[39m, config, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:112\u001b[0m, in \u001b[0;36mBaseChatModel.invoke\u001b[1;34m(self, input, config, stop, **kwargs)\u001b[0m\n\u001b[0;32m 99\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minvoke\u001b[39m(\n\u001b[0;32m 100\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 101\u001b[0m \u001b[39minput\u001b[39m: LanguageModelInput,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 105\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any,\n\u001b[0;32m 106\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m BaseMessageChunk:\n\u001b[0;32m 107\u001b[0m config \u001b[39m=\u001b[39m config \u001b[39mor\u001b[39;00m {}\n\u001b[0;32m 108\u001b[0m \u001b[39mreturn\u001b[39;00m cast(\n\u001b[0;32m 109\u001b[0m BaseMessageChunk,\n\u001b[0;32m 110\u001b[0m cast(\n\u001b[0;32m 111\u001b[0m ChatGeneration,\n\u001b[1;32m--> 112\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgenerate_prompt(\n\u001b[0;32m 113\u001b[0m [\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_convert_input(\u001b[39minput\u001b[39m)],\n\u001b[0;32m 114\u001b[0m stop\u001b[39m=\u001b[39mstop,\n\u001b[0;32m 115\u001b[0m callbacks\u001b[39m=\u001b[39mconfig\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mcallbacks\u001b[39m\u001b[39m\"\u001b[39m),\n\u001b[0;32m 116\u001b[0m tags\u001b[39m=\u001b[39mconfig\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mtags\u001b[39m\u001b[39m\"\u001b[39m),\n\u001b[0;32m 117\u001b[0m metadata\u001b[39m=\u001b[39mconfig\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mmetadata\u001b[39m\u001b[39m\"\u001b[39m),\n\u001b[0;32m 118\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 119\u001b[0m )\u001b[39m.\u001b[39mgenerations[\u001b[39m0\u001b[39m][\u001b[39m0\u001b[39m],\n\u001b[0;32m 120\u001b[0m )\u001b[39m.\u001b[39mmessage,\n\u001b[0;32m 121\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:414\u001b[0m, in \u001b[0;36mBaseChatModel.generate_prompt\u001b[1;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[0;32m 406\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mgenerate_prompt\u001b[39m(\n\u001b[0;32m 407\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 408\u001b[0m prompts: List[PromptValue],\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 411\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any,\n\u001b[0;32m 412\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m LLMResult:\n\u001b[0;32m 413\u001b[0m prompt_messages \u001b[39m=\u001b[39m [p\u001b[39m.\u001b[39mto_messages() \u001b[39mfor\u001b[39;00m p \u001b[39min\u001b[39;00m prompts]\n\u001b[1;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgenerate(prompt_messages, stop\u001b[39m=\u001b[39mstop, callbacks\u001b[39m=\u001b[39mcallbacks, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:309\u001b[0m, in \u001b[0;36mBaseChatModel.generate\u001b[1;34m(self, messages, stop, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[39mif\u001b[39;00m run_managers:\n\u001b[0;32m 308\u001b[0m run_managers[i]\u001b[39m.\u001b[39mon_llm_error(e)\n\u001b[1;32m--> 309\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[0;32m 310\u001b[0m flattened_outputs \u001b[39m=\u001b[39m [\n\u001b[0;32m 311\u001b[0m LLMResult(generations\u001b[39m=\u001b[39m[res\u001b[39m.\u001b[39mgenerations], llm_output\u001b[39m=\u001b[39mres\u001b[39m.\u001b[39mllm_output)\n\u001b[0;32m 312\u001b[0m \u001b[39mfor\u001b[39;00m res \u001b[39min\u001b[39;00m results\n\u001b[0;32m 313\u001b[0m ]\n\u001b[0;32m 314\u001b[0m llm_output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_combine_llm_outputs([res\u001b[39m.\u001b[39mllm_output \u001b[39mfor\u001b[39;00m res \u001b[39min\u001b[39;00m results])\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:299\u001b[0m, in \u001b[0;36mBaseChatModel.generate\u001b[1;34m(self, messages, stop, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[0;32m 296\u001b[0m \u001b[39mfor\u001b[39;00m i, m \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(messages):\n\u001b[0;32m 297\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 298\u001b[0m results\u001b[39m.\u001b[39mappend(\n\u001b[1;32m--> 299\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate_with_cache(\n\u001b[0;32m 300\u001b[0m m,\n\u001b[0;32m 301\u001b[0m stop\u001b[39m=\u001b[39mstop,\n\u001b[0;32m 302\u001b[0m run_manager\u001b[39m=\u001b[39mrun_managers[i] \u001b[39mif\u001b[39;00m run_managers \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 303\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 304\u001b[0m )\n\u001b[0;32m 305\u001b[0m )\n\u001b[0;32m 306\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 307\u001b[0m \u001b[39mif\u001b[39;00m run_managers:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:446\u001b[0m, in \u001b[0;36mBaseChatModel._generate_with_cache\u001b[1;34m(self, messages, stop, run_manager, **kwargs)\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 443\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAsked to cache, but no cache found at `langchain.cache`.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 444\u001b[0m )\n\u001b[0;32m 445\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported:\n\u001b[1;32m--> 446\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate(\n\u001b[0;32m 447\u001b[0m messages, stop\u001b[39m=\u001b[39mstop, run_manager\u001b[39m=\u001b[39mrun_manager, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m 448\u001b[0m )\n\u001b[0;32m 449\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 450\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate(messages, stop\u001b[39m=\u001b[39mstop, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\openai.py:345\u001b[0m, in \u001b[0;36mChatOpenAI._generate\u001b[1;34m(self, messages, stop, run_manager, stream, **kwargs)\u001b[0m\n\u001b[0;32m 343\u001b[0m message_dicts, params \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_create_message_dicts(messages, stop)\n\u001b[0;32m 344\u001b[0m params \u001b[39m=\u001b[39m {\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs}\n\u001b[1;32m--> 345\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcompletion_with_retry(\n\u001b[0;32m 346\u001b[0m messages\u001b[39m=\u001b[39mmessage_dicts, run_manager\u001b[39m=\u001b[39mrun_manager, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[0;32m 347\u001b[0m )\n\u001b[0;32m 348\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_create_chat_result(response)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\openai.py:278\u001b[0m, in \u001b[0;36mChatOpenAI.completion_with_retry\u001b[1;34m(self, run_manager, **kwargs)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[39m@retry_decorator\u001b[39m\n\u001b[0;32m 275\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_completion_with_retry\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[0;32m 276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclient\u001b[39m.\u001b[39mcreate(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m--> 278\u001b[0m \u001b[39mreturn\u001b[39;00m _completion_with_retry(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:289\u001b[0m, in \u001b[0;36mBaseRetrying.wraps..wrapped_f\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m 287\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(f)\n\u001b[0;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapped_f\u001b[39m(\u001b[39m*\u001b[39margs: t\u001b[39m.\u001b[39mAny, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw: t\u001b[39m.\u001b[39mAny) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m t\u001b[39m.\u001b[39mAny:\n\u001b[1;32m--> 289\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m(f, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:379\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[1;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[0;32m 377\u001b[0m retry_state \u001b[39m=\u001b[39m RetryCallState(retry_object\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m, fn\u001b[39m=\u001b[39mfn, args\u001b[39m=\u001b[39margs, kwargs\u001b[39m=\u001b[39mkwargs)\n\u001b[0;32m 378\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 379\u001b[0m do \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49miter(retry_state\u001b[39m=\u001b[39;49mretry_state)\n\u001b[0;32m 380\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(do, DoAttempt):\n\u001b[0;32m 381\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:325\u001b[0m, in \u001b[0;36mBaseRetrying.iter\u001b[1;34m(self, retry_state)\u001b[0m\n\u001b[0;32m 323\u001b[0m retry_exc \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mretry_error_cls(fut)\n\u001b[0;32m 324\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreraise:\n\u001b[1;32m--> 325\u001b[0m \u001b[39mraise\u001b[39;00m retry_exc\u001b[39m.\u001b[39;49mreraise()\n\u001b[0;32m 326\u001b[0m \u001b[39mraise\u001b[39;00m retry_exc \u001b[39mfrom\u001b[39;00m \u001b[39mfut\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mexception\u001b[39;00m()\n\u001b[0;32m 328\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwait:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:158\u001b[0m, in \u001b[0;36mRetryError.reraise\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 156\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mreraise\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m t\u001b[39m.\u001b[39mNoReturn:\n\u001b[0;32m 157\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlast_attempt\u001b[39m.\u001b[39mfailed:\n\u001b[1;32m--> 158\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mlast_attempt\u001b[39m.\u001b[39;49mresult()\n\u001b[0;32m 159\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:451\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 449\u001b[0m \u001b[39mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 450\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39m==\u001b[39m FINISHED:\n\u001b[1;32m--> 451\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_result()\n\u001b[0;32m 453\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_condition\u001b[39m.\u001b[39mwait(timeout)\n\u001b[0;32m 455\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:403\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 401\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception:\n\u001b[0;32m 402\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 403\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception\n\u001b[0;32m 404\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 405\u001b[0m \u001b[39m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 406\u001b[0m \u001b[39mself\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:382\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[1;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[0;32m 380\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(do, DoAttempt):\n\u001b[0;32m 381\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 382\u001b[0m result \u001b[39m=\u001b[39m fn(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 383\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m: \u001b[39m# noqa: B902\u001b[39;00m\n\u001b[0;32m 384\u001b[0m retry_state\u001b[39m.\u001b[39mset_exception(sys\u001b[39m.\u001b[39mexc_info()) \u001b[39m# type: ignore[arg-type]\u001b[39;00m\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\openai.py:276\u001b[0m, in \u001b[0;36mChatOpenAI.completion_with_retry.._completion_with_retry\u001b[1;34m(**kwargs)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[39m@retry_decorator\u001b[39m\n\u001b[0;32m 275\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_completion_with_retry\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[1;32m--> 276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclient\u001b[39m.\u001b[39mcreate(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_resources\\chat_completion.py:25\u001b[0m, in \u001b[0;36mChatCompletion.create\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m 24\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mcreate(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 26\u001b[0m \u001b[39mexcept\u001b[39;00m TryAgain \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 27\u001b[0m \u001b[39mif\u001b[39;00m timeout \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m time\u001b[39m.\u001b[39mtime() \u001b[39m>\u001b[39m start \u001b[39m+\u001b[39m timeout:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_resources\\abstract\\engine_api_resource.py:153\u001b[0m, in \u001b[0;36mEngineAPIResource.create\u001b[1;34m(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[0;32m 128\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[0;32m 129\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams,\n\u001b[0;32m 137\u001b[0m ):\n\u001b[0;32m 138\u001b[0m (\n\u001b[0;32m 139\u001b[0m deployment_id,\n\u001b[0;32m 140\u001b[0m engine,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 150\u001b[0m api_key, api_base, api_type, api_version, organization, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[0;32m 151\u001b[0m )\n\u001b[1;32m--> 153\u001b[0m response, _, api_key \u001b[39m=\u001b[39m requestor\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 154\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 155\u001b[0m url,\n\u001b[0;32m 156\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[0;32m 157\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[0;32m 158\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[0;32m 159\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[0;32m 160\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[0;32m 161\u001b[0m )\n\u001b[0;32m 163\u001b[0m \u001b[39mif\u001b[39;00m stream:\n\u001b[0;32m 164\u001b[0m \u001b[39m# must be an iterator\u001b[39;00m\n\u001b[0;32m 165\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(response, OpenAIResponse)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_requestor.py:226\u001b[0m, in \u001b[0;36mAPIRequestor.request\u001b[1;34m(self, method, url, params, headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[0;32m 205\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrequest\u001b[39m(\n\u001b[0;32m 206\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 207\u001b[0m method,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 214\u001b[0m request_timeout: Optional[Union[\u001b[39mfloat\u001b[39m, Tuple[\u001b[39mfloat\u001b[39m, \u001b[39mfloat\u001b[39m]]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 215\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], \u001b[39mbool\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[0;32m 216\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequest_raw(\n\u001b[0;32m 217\u001b[0m method\u001b[39m.\u001b[39mlower(),\n\u001b[0;32m 218\u001b[0m url,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 224\u001b[0m request_timeout\u001b[39m=\u001b[39mrequest_timeout,\n\u001b[0;32m 225\u001b[0m )\n\u001b[1;32m--> 226\u001b[0m resp, got_stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_interpret_response(result, stream)\n\u001b[0;32m 227\u001b[0m \u001b[39mreturn\u001b[39;00m resp, got_stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_requestor.py:619\u001b[0m, in \u001b[0;36mAPIRequestor._interpret_response\u001b[1;34m(self, result, stream)\u001b[0m\n\u001b[0;32m 611\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m 612\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_interpret_response_line(\n\u001b[0;32m 613\u001b[0m line, result\u001b[39m.\u001b[39mstatus_code, result\u001b[39m.\u001b[39mheaders, stream\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m\n\u001b[0;32m 614\u001b[0m )\n\u001b[0;32m 615\u001b[0m \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m parse_stream(result\u001b[39m.\u001b[39miter_lines())\n\u001b[0;32m 616\u001b[0m ), \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m 617\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 618\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[1;32m--> 619\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_interpret_response_line(\n\u001b[0;32m 620\u001b[0m result\u001b[39m.\u001b[39;49mcontent\u001b[39m.\u001b[39;49mdecode(\u001b[39m\"\u001b[39;49m\u001b[39mutf-8\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 621\u001b[0m result\u001b[39m.\u001b[39;49mstatus_code,\n\u001b[0;32m 622\u001b[0m result\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 623\u001b[0m stream\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 624\u001b[0m ),\n\u001b[0;32m 625\u001b[0m \u001b[39mFalse\u001b[39;00m,\n\u001b[0;32m 626\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_requestor.py:682\u001b[0m, in \u001b[0;36mAPIRequestor._interpret_response_line\u001b[1;34m(self, rbody, rcode, rheaders, stream)\u001b[0m\n\u001b[0;32m 680\u001b[0m stream_error \u001b[39m=\u001b[39m stream \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39merror\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m resp\u001b[39m.\u001b[39mdata\n\u001b[0;32m 681\u001b[0m \u001b[39mif\u001b[39;00m stream_error \u001b[39mor\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m rcode \u001b[39m<\u001b[39m \u001b[39m300\u001b[39m:\n\u001b[1;32m--> 682\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandle_error_response(\n\u001b[0;32m 683\u001b[0m rbody, rcode, resp\u001b[39m.\u001b[39mdata, rheaders, stream_error\u001b[39m=\u001b[39mstream_error\n\u001b[0;32m 684\u001b[0m )\n\u001b[0;32m 685\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n", + "\u001b[1;31mRateLimitError\u001b[0m: Rate limit reached for gpt-3.5-turbo in organization org-x1evDRl7Z91oUYDUdl96xewe on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method." + ] + } + ], + "source": [ + "summaries = chain.batch(docs, {\"max_concurrency\": 5})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ensemble Retriver" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting rank_bm25\n", + " Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)\n", + "Requirement already satisfied: numpy in d:\\anaconda3\\envs\\nlp\\lib\\site-packages (from rank_bm25) (1.23.5)\n", + "Installing collected packages: rank_bm25\n", + "Successfully installed rank_bm25-0.2.2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n" + ] + } + ], + "source": [ + "# !pip install rank_bm25" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n", + "from langchain.vectorstores import FAISS, Chroma" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# docs = DirectoryLoader(self.DATA_DIRECTORY).load()\n", + "# text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=10)\n", + "# all_splits = text_splitter.split_documents(docs)\n", + "\n", + "bm25_retriever = BM25Retriever.from_documents(docs)\n", + "\n", + "# initialize the bm25 retriever and faiss retriever\n", + "# bm25_retriever = BM25Retriever.from_texts(doc_list)\n", + "# bm25_retriever = BM25Retriever.from_documents(docs)\n", + "bm25_retriever.k = 2\n", + "\n", + "embedding = OpenAIEmbeddings()\n", + "faiss_vectorstore = Chroma.from_documents(docs, embedding)\n", + "faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={\"k\": 2})\n", + "\n", + "# initialize the ensemble retriever\n", + "ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Features\\n\\nAt Jazz, we ensure a thorough and sustained focus on employee wellness which goes beyond the\\n\\ntraditional work norms, aiming at enhancing your experience and well-being both at work and beyond.\\n\\nLeaves Categorization\\n\\nYour leaves’ entitlement includes the following:\\n\\n2.8.1. Annual Time-Off', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='maximum capping of twenty (20) working days.\\n\\n2.8.14. In case of your resignation, your annual leaves balance cannot be adjusted against your notice\\n\\nperiod.\\n\\n2.8.15. Maternity leave can be combined with annual time-off.\\n\\n2.8.16. For adopting parents, leaves can be availed on the arrival of the new family member.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='2.8.11. In case of un-availed leaves, remaining balance will lapse at respective year end with no carry\\n\\novers.\\n\\n2.8.12. In case of availed annual leaves being more than earned ones, an adjustment is made in the\\n\\nfinal settlement, in case you have to leave Jazz.\\n\\n2.8.13. Any un-availed annual leaves balance will be en-cashed in the final settlement with a', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'}),\n", + " Document(page_content='2.8.9.\\n\\nIf you have exhausted your leaves’ balance and still face a time off requirement, you are\\n\\nadvised to discuss the possibility of unpaid leave with your manager and people operations\\n\\nrepresentatives.\\n\\n2.8.10. The maximum limit for unpaid leaves is three (03) months within one (01) year.', metadata={'source': 'Data\\\\Policies\\\\2.8. Leaves Policy V7-06Jun2022.pdf'})]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sub_docs = ensemble_retriever.get_relevant_documents(\"leaves\")\n", + "sub_docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Using LLM to Answer query" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chat_models import ChatOpenAI\n", + "template = \"\"\"\n", + "You are an Expert Policy Advisor.These Below are the Documents that are extracted from the different Policies.Your Job \n", + " is to Provide the Answer to below question based on the text below. \n", + " Here are few instructions for you to follow when answering a question.\n", + " - When you didnt find the relevant answers from below text Just Say \"I dont know this,Please contact your HRBP for more details.\"\n", + " - These are policy Documents, When answering a question Do Not return in response that \"This information is At Annex A/B\".Provide a Complete response to request.\n", + " - Try to answer the questions in bullet format if possible.\n", + " - Use three sentences maximum to Answer the question in very concise manner\n", + " \n", + " \n", + " {context}\n", + " Question: {question}\n", + " Helpful Answer:\n", + " \"\"\"\n", + "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo-16k\", temperature=0)\n", + "\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=ensemble_retriever,\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The fuel limit for the Sr. Legal Counsel in the south region is 200 liters.\n" + ] + } + ], + "source": [ + "question = \"what is fuel limit for Sr. Legal Counsel in south region?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The CMA (Car Maintenance Allowance) for an Area Sales Manager is PKR 15,000 per month.\n" + ] + } + ], + "source": [ + "question = \"what is cma for area sales manager?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The hard area allowance is provided for specific roles deployed in remote areas. The specific areas covered in the hard area allowance are Quetta, Gwadar, Gilgit, DI Khan, and Zhob. The roles that qualify for the hard area allowance are Regional Sales Head, Area Sales Manager, Zonal Manager, and MBU Lead.\n" + ] + } + ], + "source": [ + "question = \"what areas are coverd in hard area allowance along with what roles are in it?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No, under the Recognition Policy, line managers are not allowed to award lavish or extravagant gifts to employees. The policy specifically states that any form of lavish or extravagant item above PKR 25,000 cannot be awarded by a line manager or HOD. Any exception to this rule would require approval from the CPO and CEO.\n" + ] + } + ], + "source": [ + "question = \"can a line manger gift an employee a lavish thing?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The BYOD renewal period is three years. After using a company-owned device for three years or more, employees become eligible for the next BYOD allowance and their BYOD device will be replaced.\n" + ] + } + ], + "source": [ + "\n", + "question = \"what is byod renewal period?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The minimum distance required to raise TAF for intercity travels is 80 kilometers.\n" + ] + } + ], + "source": [ + "question = \"What is minimum distance is required to raise TAF for intercity travels?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The leave policy at Jazz includes annual time-off, medical time-off, parental leave, sabbatical leave, and gazette/public holidays. Employees are entitled to up to 30 working days of annual leave, 15 working days of medical leave, 6 months of maternity leave, and 30 working days of paternity leave. The policy also allows for flexible working options during pregnancy and returning mothers can avail of a phased back support program.\n" + ] + } + ], + "source": [ + "question = \"What is leave policy?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "ename": "InvalidRequestError", + "evalue": "This model's maximum context length is 4097 tokens. However, your messages resulted in 4252 tokens. Please reduce the length of the messages.", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mInvalidRequestError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32md:\\Work\\Jazz\\GPT\\JIN-e\\Langchain_bot.ipynb Cell 77\u001b[0m line \u001b[0;36m2\n\u001b[0;32m 1\u001b[0m question \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mWhat cars each grade is entitled what is their buy back policy?\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m----> 2\u001b[0m result \u001b[39m=\u001b[39m qa_chain({\u001b[39m\"\u001b[39;49m\u001b[39mquery\u001b[39;49m\u001b[39m\"\u001b[39;49m: question})\n\u001b[0;32m 3\u001b[0m \u001b[39mprint\u001b[39m(result[\u001b[39m'\u001b[39m\u001b[39mresult\u001b[39m\u001b[39m'\u001b[39m])\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\base.py:292\u001b[0m, in \u001b[0;36mChain.__call__\u001b[1;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[0;32m 290\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 291\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n\u001b[1;32m--> 292\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[0;32m 293\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_end(outputs)\n\u001b[0;32m 294\u001b[0m final_outputs: Dict[\u001b[39mstr\u001b[39m, Any] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprep_outputs(\n\u001b[0;32m 295\u001b[0m inputs, outputs, return_only_outputs\n\u001b[0;32m 296\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\base.py:286\u001b[0m, in \u001b[0;36mChain.__call__\u001b[1;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[0;32m 279\u001b[0m run_manager \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_chain_start(\n\u001b[0;32m 280\u001b[0m dumpd(\u001b[39mself\u001b[39m),\n\u001b[0;32m 281\u001b[0m inputs,\n\u001b[0;32m 282\u001b[0m name\u001b[39m=\u001b[39mrun_name,\n\u001b[0;32m 283\u001b[0m )\n\u001b[0;32m 284\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 285\u001b[0m outputs \u001b[39m=\u001b[39m (\n\u001b[1;32m--> 286\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(inputs, run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[0;32m 287\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[0;32m 288\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call(inputs)\n\u001b[0;32m 289\u001b[0m )\n\u001b[0;32m 290\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 291\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\retrieval_qa\\base.py:139\u001b[0m, in \u001b[0;36mBaseRetrievalQA._call\u001b[1;34m(self, inputs, run_manager)\u001b[0m\n\u001b[0;32m 137\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 138\u001b[0m docs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_docs(question) \u001b[39m# type: ignore[call-arg]\u001b[39;00m\n\u001b[1;32m--> 139\u001b[0m answer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcombine_documents_chain\u001b[39m.\u001b[39;49mrun(\n\u001b[0;32m 140\u001b[0m input_documents\u001b[39m=\u001b[39;49mdocs, question\u001b[39m=\u001b[39;49mquestion, callbacks\u001b[39m=\u001b[39;49m_run_manager\u001b[39m.\u001b[39;49mget_child()\n\u001b[0;32m 141\u001b[0m )\n\u001b[0;32m 143\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mreturn_source_documents:\n\u001b[0;32m 144\u001b[0m \u001b[39mreturn\u001b[39;00m {\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moutput_key: answer, \u001b[39m\"\u001b[39m\u001b[39msource_documents\u001b[39m\u001b[39m\"\u001b[39m: docs}\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\base.py:492\u001b[0m, in \u001b[0;36mChain.run\u001b[1;34m(self, callbacks, tags, metadata, *args, **kwargs)\u001b[0m\n\u001b[0;32m 487\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m(args[\u001b[39m0\u001b[39m], callbacks\u001b[39m=\u001b[39mcallbacks, tags\u001b[39m=\u001b[39mtags, metadata\u001b[39m=\u001b[39mmetadata)[\n\u001b[0;32m 488\u001b[0m _output_key\n\u001b[0;32m 489\u001b[0m ]\n\u001b[0;32m 491\u001b[0m \u001b[39mif\u001b[39;00m kwargs \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m args:\n\u001b[1;32m--> 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m(kwargs, callbacks\u001b[39m=\u001b[39;49mcallbacks, tags\u001b[39m=\u001b[39;49mtags, metadata\u001b[39m=\u001b[39;49mmetadata)[\n\u001b[0;32m 493\u001b[0m _output_key\n\u001b[0;32m 494\u001b[0m ]\n\u001b[0;32m 496\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m kwargs \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m args:\n\u001b[0;32m 497\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 498\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m`run` supported with either positional arguments or keyword arguments,\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 499\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m but none were provided.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 500\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\base.py:292\u001b[0m, in \u001b[0;36mChain.__call__\u001b[1;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[0;32m 290\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 291\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n\u001b[1;32m--> 292\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[0;32m 293\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_end(outputs)\n\u001b[0;32m 294\u001b[0m final_outputs: Dict[\u001b[39mstr\u001b[39m, Any] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprep_outputs(\n\u001b[0;32m 295\u001b[0m inputs, outputs, return_only_outputs\n\u001b[0;32m 296\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\base.py:286\u001b[0m, in \u001b[0;36mChain.__call__\u001b[1;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[0;32m 279\u001b[0m run_manager \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_chain_start(\n\u001b[0;32m 280\u001b[0m dumpd(\u001b[39mself\u001b[39m),\n\u001b[0;32m 281\u001b[0m inputs,\n\u001b[0;32m 282\u001b[0m name\u001b[39m=\u001b[39mrun_name,\n\u001b[0;32m 283\u001b[0m )\n\u001b[0;32m 284\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 285\u001b[0m outputs \u001b[39m=\u001b[39m (\n\u001b[1;32m--> 286\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(inputs, run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[0;32m 287\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[0;32m 288\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call(inputs)\n\u001b[0;32m 289\u001b[0m )\n\u001b[0;32m 290\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 291\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\combine_documents\\base.py:105\u001b[0m, in \u001b[0;36mBaseCombineDocumentsChain._call\u001b[1;34m(self, inputs, run_manager)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[39m# Other keys are assumed to be needed for LLM prediction\u001b[39;00m\n\u001b[0;32m 104\u001b[0m other_keys \u001b[39m=\u001b[39m {k: v \u001b[39mfor\u001b[39;00m k, v \u001b[39min\u001b[39;00m inputs\u001b[39m.\u001b[39mitems() \u001b[39mif\u001b[39;00m k \u001b[39m!=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39minput_key}\n\u001b[1;32m--> 105\u001b[0m output, extra_return_dict \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcombine_docs(\n\u001b[0;32m 106\u001b[0m docs, callbacks\u001b[39m=\u001b[39m_run_manager\u001b[39m.\u001b[39mget_child(), \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mother_keys\n\u001b[0;32m 107\u001b[0m )\n\u001b[0;32m 108\u001b[0m extra_return_dict[\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moutput_key] \u001b[39m=\u001b[39m output\n\u001b[0;32m 109\u001b[0m \u001b[39mreturn\u001b[39;00m extra_return_dict\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\combine_documents\\stuff.py:171\u001b[0m, in \u001b[0;36mStuffDocumentsChain.combine_docs\u001b[1;34m(self, docs, callbacks, **kwargs)\u001b[0m\n\u001b[0;32m 169\u001b[0m inputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_inputs(docs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 170\u001b[0m \u001b[39m# Call predict on the LLM.\u001b[39;00m\n\u001b[1;32m--> 171\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mllm_chain\u001b[39m.\u001b[39mpredict(callbacks\u001b[39m=\u001b[39mcallbacks, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39minputs), {}\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\llm.py:255\u001b[0m, in \u001b[0;36mLLMChain.predict\u001b[1;34m(self, callbacks, **kwargs)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mpredict\u001b[39m(\u001b[39mself\u001b[39m, callbacks: Callbacks \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[0;32m 241\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Format prompt with kwargs and pass to LLM.\u001b[39;00m\n\u001b[0;32m 242\u001b[0m \n\u001b[0;32m 243\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 253\u001b[0m \u001b[39m completion = llm.predict(adjective=\"funny\")\u001b[39;00m\n\u001b[0;32m 254\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 255\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m(kwargs, callbacks\u001b[39m=\u001b[39;49mcallbacks)[\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moutput_key]\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\base.py:292\u001b[0m, in \u001b[0;36mChain.__call__\u001b[1;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[0;32m 290\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 291\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n\u001b[1;32m--> 292\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[0;32m 293\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_end(outputs)\n\u001b[0;32m 294\u001b[0m final_outputs: Dict[\u001b[39mstr\u001b[39m, Any] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprep_outputs(\n\u001b[0;32m 295\u001b[0m inputs, outputs, return_only_outputs\n\u001b[0;32m 296\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\base.py:286\u001b[0m, in \u001b[0;36mChain.__call__\u001b[1;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[0;32m 279\u001b[0m run_manager \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_chain_start(\n\u001b[0;32m 280\u001b[0m dumpd(\u001b[39mself\u001b[39m),\n\u001b[0;32m 281\u001b[0m inputs,\n\u001b[0;32m 282\u001b[0m name\u001b[39m=\u001b[39mrun_name,\n\u001b[0;32m 283\u001b[0m )\n\u001b[0;32m 284\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 285\u001b[0m outputs \u001b[39m=\u001b[39m (\n\u001b[1;32m--> 286\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call(inputs, run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[0;32m 287\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported\n\u001b[0;32m 288\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_call(inputs)\n\u001b[0;32m 289\u001b[0m )\n\u001b[0;32m 290\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 291\u001b[0m run_manager\u001b[39m.\u001b[39mon_chain_error(e)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\llm.py:91\u001b[0m, in \u001b[0;36mLLMChain._call\u001b[1;34m(self, inputs, run_manager)\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_call\u001b[39m(\n\u001b[0;32m 87\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 88\u001b[0m inputs: Dict[\u001b[39mstr\u001b[39m, Any],\n\u001b[0;32m 89\u001b[0m run_manager: Optional[CallbackManagerForChainRun] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 90\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Dict[\u001b[39mstr\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[1;32m---> 91\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgenerate([inputs], run_manager\u001b[39m=\u001b[39;49mrun_manager)\n\u001b[0;32m 92\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcreate_outputs(response)[\u001b[39m0\u001b[39m]\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chains\\llm.py:101\u001b[0m, in \u001b[0;36mLLMChain.generate\u001b[1;34m(self, input_list, run_manager)\u001b[0m\n\u001b[0;32m 99\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Generate LLM result from inputs.\"\"\"\u001b[39;00m\n\u001b[0;32m 100\u001b[0m prompts, stop \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mprep_prompts(input_list, run_manager\u001b[39m=\u001b[39mrun_manager)\n\u001b[1;32m--> 101\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mllm\u001b[39m.\u001b[39mgenerate_prompt(\n\u001b[0;32m 102\u001b[0m prompts,\n\u001b[0;32m 103\u001b[0m stop,\n\u001b[0;32m 104\u001b[0m callbacks\u001b[39m=\u001b[39mrun_manager\u001b[39m.\u001b[39mget_child() \u001b[39mif\u001b[39;00m run_manager \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 105\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mllm_kwargs,\n\u001b[0;32m 106\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:414\u001b[0m, in \u001b[0;36mBaseChatModel.generate_prompt\u001b[1;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[0;32m 406\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mgenerate_prompt\u001b[39m(\n\u001b[0;32m 407\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 408\u001b[0m prompts: List[PromptValue],\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 411\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any,\n\u001b[0;32m 412\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m LLMResult:\n\u001b[0;32m 413\u001b[0m prompt_messages \u001b[39m=\u001b[39m [p\u001b[39m.\u001b[39mto_messages() \u001b[39mfor\u001b[39;00m p \u001b[39min\u001b[39;00m prompts]\n\u001b[1;32m--> 414\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mgenerate(prompt_messages, stop\u001b[39m=\u001b[39mstop, callbacks\u001b[39m=\u001b[39mcallbacks, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:309\u001b[0m, in \u001b[0;36mBaseChatModel.generate\u001b[1;34m(self, messages, stop, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[0;32m 307\u001b[0m \u001b[39mif\u001b[39;00m run_managers:\n\u001b[0;32m 308\u001b[0m run_managers[i]\u001b[39m.\u001b[39mon_llm_error(e)\n\u001b[1;32m--> 309\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[0;32m 310\u001b[0m flattened_outputs \u001b[39m=\u001b[39m [\n\u001b[0;32m 311\u001b[0m LLMResult(generations\u001b[39m=\u001b[39m[res\u001b[39m.\u001b[39mgenerations], llm_output\u001b[39m=\u001b[39mres\u001b[39m.\u001b[39mllm_output)\n\u001b[0;32m 312\u001b[0m \u001b[39mfor\u001b[39;00m res \u001b[39min\u001b[39;00m results\n\u001b[0;32m 313\u001b[0m ]\n\u001b[0;32m 314\u001b[0m llm_output \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_combine_llm_outputs([res\u001b[39m.\u001b[39mllm_output \u001b[39mfor\u001b[39;00m res \u001b[39min\u001b[39;00m results])\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:299\u001b[0m, in \u001b[0;36mBaseChatModel.generate\u001b[1;34m(self, messages, stop, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[0;32m 296\u001b[0m \u001b[39mfor\u001b[39;00m i, m \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(messages):\n\u001b[0;32m 297\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m 298\u001b[0m results\u001b[39m.\u001b[39mappend(\n\u001b[1;32m--> 299\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate_with_cache(\n\u001b[0;32m 300\u001b[0m m,\n\u001b[0;32m 301\u001b[0m stop\u001b[39m=\u001b[39mstop,\n\u001b[0;32m 302\u001b[0m run_manager\u001b[39m=\u001b[39mrun_managers[i] \u001b[39mif\u001b[39;00m run_managers \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 303\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs,\n\u001b[0;32m 304\u001b[0m )\n\u001b[0;32m 305\u001b[0m )\n\u001b[0;32m 306\u001b[0m \u001b[39mexcept\u001b[39;00m (\u001b[39mKeyboardInterrupt\u001b[39;00m, \u001b[39mException\u001b[39;00m) \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 307\u001b[0m \u001b[39mif\u001b[39;00m run_managers:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\base.py:446\u001b[0m, in \u001b[0;36mBaseChatModel._generate_with_cache\u001b[1;34m(self, messages, stop, run_manager, **kwargs)\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[0;32m 443\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mAsked to cache, but no cache found at `langchain.cache`.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 444\u001b[0m )\n\u001b[0;32m 445\u001b[0m \u001b[39mif\u001b[39;00m new_arg_supported:\n\u001b[1;32m--> 446\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate(\n\u001b[0;32m 447\u001b[0m messages, stop\u001b[39m=\u001b[39mstop, run_manager\u001b[39m=\u001b[39mrun_manager, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs\n\u001b[0;32m 448\u001b[0m )\n\u001b[0;32m 449\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 450\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_generate(messages, stop\u001b[39m=\u001b[39mstop, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\openai.py:345\u001b[0m, in \u001b[0;36mChatOpenAI._generate\u001b[1;34m(self, messages, stop, run_manager, stream, **kwargs)\u001b[0m\n\u001b[0;32m 343\u001b[0m message_dicts, params \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_create_message_dicts(messages, stop)\n\u001b[0;32m 344\u001b[0m params \u001b[39m=\u001b[39m {\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs}\n\u001b[1;32m--> 345\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcompletion_with_retry(\n\u001b[0;32m 346\u001b[0m messages\u001b[39m=\u001b[39mmessage_dicts, run_manager\u001b[39m=\u001b[39mrun_manager, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[0;32m 347\u001b[0m )\n\u001b[0;32m 348\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_create_chat_result(response)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\openai.py:278\u001b[0m, in \u001b[0;36mChatOpenAI.completion_with_retry\u001b[1;34m(self, run_manager, **kwargs)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[39m@retry_decorator\u001b[39m\n\u001b[0;32m 275\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_completion_with_retry\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[0;32m 276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclient\u001b[39m.\u001b[39mcreate(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m--> 278\u001b[0m \u001b[39mreturn\u001b[39;00m _completion_with_retry(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:289\u001b[0m, in \u001b[0;36mBaseRetrying.wraps..wrapped_f\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m 287\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(f)\n\u001b[0;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapped_f\u001b[39m(\u001b[39m*\u001b[39margs: t\u001b[39m.\u001b[39mAny, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw: t\u001b[39m.\u001b[39mAny) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m t\u001b[39m.\u001b[39mAny:\n\u001b[1;32m--> 289\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m(f, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkw)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:379\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[1;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[0;32m 377\u001b[0m retry_state \u001b[39m=\u001b[39m RetryCallState(retry_object\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m, fn\u001b[39m=\u001b[39mfn, args\u001b[39m=\u001b[39margs, kwargs\u001b[39m=\u001b[39mkwargs)\n\u001b[0;32m 378\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m--> 379\u001b[0m do \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49miter(retry_state\u001b[39m=\u001b[39;49mretry_state)\n\u001b[0;32m 380\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(do, DoAttempt):\n\u001b[0;32m 381\u001b[0m \u001b[39mtry\u001b[39;00m:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:314\u001b[0m, in \u001b[0;36mBaseRetrying.iter\u001b[1;34m(self, retry_state)\u001b[0m\n\u001b[0;32m 312\u001b[0m is_explicit_retry \u001b[39m=\u001b[39m fut\u001b[39m.\u001b[39mfailed \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(fut\u001b[39m.\u001b[39mexception(), TryAgain)\n\u001b[0;32m 313\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (is_explicit_retry \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mretry(retry_state)):\n\u001b[1;32m--> 314\u001b[0m \u001b[39mreturn\u001b[39;00m fut\u001b[39m.\u001b[39;49mresult()\n\u001b[0;32m 316\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mafter \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m 317\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mafter(retry_state)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:451\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 449\u001b[0m \u001b[39mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 450\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39m==\u001b[39m FINISHED:\n\u001b[1;32m--> 451\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m__get_result()\n\u001b[0;32m 453\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_condition\u001b[39m.\u001b[39mwait(timeout)\n\u001b[0;32m 455\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_state \u001b[39min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\concurrent\\futures\\_base.py:403\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 401\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception:\n\u001b[0;32m 402\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 403\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_exception\n\u001b[0;32m 404\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[0;32m 405\u001b[0m \u001b[39m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 406\u001b[0m \u001b[39mself\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\tenacity\\__init__.py:382\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[1;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[0;32m 380\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(do, DoAttempt):\n\u001b[0;32m 381\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m--> 382\u001b[0m result \u001b[39m=\u001b[39m fn(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 383\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m: \u001b[39m# noqa: B902\u001b[39;00m\n\u001b[0;32m 384\u001b[0m retry_state\u001b[39m.\u001b[39mset_exception(sys\u001b[39m.\u001b[39mexc_info()) \u001b[39m# type: ignore[arg-type]\u001b[39;00m\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\langchain\\chat_models\\openai.py:276\u001b[0m, in \u001b[0;36mChatOpenAI.completion_with_retry.._completion_with_retry\u001b[1;34m(**kwargs)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[39m@retry_decorator\u001b[39m\n\u001b[0;32m 275\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_completion_with_retry\u001b[39m(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Any:\n\u001b[1;32m--> 276\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclient\u001b[39m.\u001b[39mcreate(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_resources\\chat_completion.py:25\u001b[0m, in \u001b[0;36mChatCompletion.create\u001b[1;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m 24\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mcreate(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m 26\u001b[0m \u001b[39mexcept\u001b[39;00m TryAgain \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m 27\u001b[0m \u001b[39mif\u001b[39;00m timeout \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mand\u001b[39;00m time\u001b[39m.\u001b[39mtime() \u001b[39m>\u001b[39m start \u001b[39m+\u001b[39m timeout:\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_resources\\abstract\\engine_api_resource.py:153\u001b[0m, in \u001b[0;36mEngineAPIResource.create\u001b[1;34m(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)\u001b[0m\n\u001b[0;32m 127\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[0;32m 128\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[0;32m 129\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams,\n\u001b[0;32m 137\u001b[0m ):\n\u001b[0;32m 138\u001b[0m (\n\u001b[0;32m 139\u001b[0m deployment_id,\n\u001b[0;32m 140\u001b[0m engine,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 150\u001b[0m api_key, api_base, api_type, api_version, organization, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[0;32m 151\u001b[0m )\n\u001b[1;32m--> 153\u001b[0m response, _, api_key \u001b[39m=\u001b[39m requestor\u001b[39m.\u001b[39;49mrequest(\n\u001b[0;32m 154\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m 155\u001b[0m url,\n\u001b[0;32m 156\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[0;32m 157\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[0;32m 158\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[0;32m 159\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[0;32m 160\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[0;32m 161\u001b[0m )\n\u001b[0;32m 163\u001b[0m \u001b[39mif\u001b[39;00m stream:\n\u001b[0;32m 164\u001b[0m \u001b[39m# must be an iterator\u001b[39;00m\n\u001b[0;32m 165\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(response, OpenAIResponse)\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_requestor.py:226\u001b[0m, in \u001b[0;36mAPIRequestor.request\u001b[1;34m(self, method, url, params, headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[0;32m 205\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrequest\u001b[39m(\n\u001b[0;32m 206\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[0;32m 207\u001b[0m method,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 214\u001b[0m request_timeout: Optional[Union[\u001b[39mfloat\u001b[39m, Tuple[\u001b[39mfloat\u001b[39m, \u001b[39mfloat\u001b[39m]]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[0;32m 215\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], \u001b[39mbool\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[0;32m 216\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrequest_raw(\n\u001b[0;32m 217\u001b[0m method\u001b[39m.\u001b[39mlower(),\n\u001b[0;32m 218\u001b[0m url,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 224\u001b[0m request_timeout\u001b[39m=\u001b[39mrequest_timeout,\n\u001b[0;32m 225\u001b[0m )\n\u001b[1;32m--> 226\u001b[0m resp, got_stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_interpret_response(result, stream)\n\u001b[0;32m 227\u001b[0m \u001b[39mreturn\u001b[39;00m resp, got_stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_requestor.py:619\u001b[0m, in \u001b[0;36mAPIRequestor._interpret_response\u001b[1;34m(self, result, stream)\u001b[0m\n\u001b[0;32m 611\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m 612\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_interpret_response_line(\n\u001b[0;32m 613\u001b[0m line, result\u001b[39m.\u001b[39mstatus_code, result\u001b[39m.\u001b[39mheaders, stream\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m\n\u001b[0;32m 614\u001b[0m )\n\u001b[0;32m 615\u001b[0m \u001b[39mfor\u001b[39;00m line \u001b[39min\u001b[39;00m parse_stream(result\u001b[39m.\u001b[39miter_lines())\n\u001b[0;32m 616\u001b[0m ), \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m 617\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 618\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[1;32m--> 619\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_interpret_response_line(\n\u001b[0;32m 620\u001b[0m result\u001b[39m.\u001b[39;49mcontent\u001b[39m.\u001b[39;49mdecode(\u001b[39m\"\u001b[39;49m\u001b[39mutf-8\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m 621\u001b[0m result\u001b[39m.\u001b[39;49mstatus_code,\n\u001b[0;32m 622\u001b[0m result\u001b[39m.\u001b[39;49mheaders,\n\u001b[0;32m 623\u001b[0m stream\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[0;32m 624\u001b[0m ),\n\u001b[0;32m 625\u001b[0m \u001b[39mFalse\u001b[39;00m,\n\u001b[0;32m 626\u001b[0m )\n", + "File \u001b[1;32md:\\anaconda3\\envs\\nlp\\lib\\site-packages\\openai\\api_requestor.py:682\u001b[0m, in \u001b[0;36mAPIRequestor._interpret_response_line\u001b[1;34m(self, rbody, rcode, rheaders, stream)\u001b[0m\n\u001b[0;32m 680\u001b[0m stream_error \u001b[39m=\u001b[39m stream \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39merror\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m resp\u001b[39m.\u001b[39mdata\n\u001b[0;32m 681\u001b[0m \u001b[39mif\u001b[39;00m stream_error \u001b[39mor\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39m200\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m rcode \u001b[39m<\u001b[39m \u001b[39m300\u001b[39m:\n\u001b[1;32m--> 682\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandle_error_response(\n\u001b[0;32m 683\u001b[0m rbody, rcode, resp\u001b[39m.\u001b[39mdata, rheaders, stream_error\u001b[39m=\u001b[39mstream_error\n\u001b[0;32m 684\u001b[0m )\n\u001b[0;32m 685\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n", + "\u001b[1;31mInvalidRequestError\u001b[0m: This model's maximum context length is 4097 tokens. However, your messages resulted in 4252 tokens. Please reduce the length of the messages." + ] + } + ], + "source": [ + "question = \"What cars each grade is entitled what is their buy back policy?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Car Maintenance Allowance (CMA) is a benefit provided to field-based roles in Jazz to support their travel needs within their allocated territories. It is meant to cover the expenses related to car maintenance.\n", + "\n", + "CMA entitlement for different roles are as follows:\n", + "- Regional Sales Head: CMA of PKR 25,000\n", + "- Area Manager: CMA of PKR 15,000\n", + "- Territory Sales Supervisor: CMA of PKR 15,000\n", + "- Franchise Services Executive: CMA of PKR 5,000\n", + "- Trade Marketing Officer/Executive: CMA of PKR 5,000\n", + "- Head of B2G/LA/SME: CMA of PKR 25,000\n", + "- Business Development Manager: CMA of PKR 15,000\n", + "- Manager Corporate Solutions: CMA of PKR 15,000\n", + "- Manager M2M Solutions: CMA of PKR 25,000\n", + "- Manager MFS & Agri: CMA of PKR 25,000\n", + "- Business Development Manager/Team Lead: CMA of PKR 25,000\n", + "- CAM/Business Consultants: CMA of PKR 25,000\n", + "- Collection Executives/Supervisors: CMA of PKR 5,000\n", + "\n", + "Please note that these are just a few examples and the complete list of CMA entitlement for different roles can be found in Annexure A of the policy document.\n" + ] + } + ], + "source": [ + "question = \"What is car mantinance Allowance? and list cma entitlement for different roles?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CMA stands for Car Maintenance Allowance. It is a benefit provided to field-based roles to cover their car maintenance expenses. \n", + "\n", + "CMA and Fuel entitlement for different roles are as follows:\n", + "\n", + "- Regional Sales Head: CMA - PKR 25,000, Fuel - 300 liters\n", + "- Area Manager: CMA - PKR 15,000, Fuel - 200 liters\n", + "- Territory Sales Supervisor: CMA - PKR 15,000, Fuel - 100 liters\n", + "- Franchise Services Executive: CMA - PKR 5,000, Fuel - 150 liters\n", + "- Trade Marketing Officer/Executive: CMA - PKR 5,000, Fuel - 150 liters\n", + "- Head of B2G/LA/SME: CMA - PKR 25,000, Fuel - 300 liters\n", + "- Business Development Manager: CMA - PKR 15,000, Fuel - 200 liters\n", + "- Manager Corporate Solutions: CMA - PKR 15,000, Fuel - 100 liters\n", + "- Enterprise Sales Manager: CMA - PKR 25,000, Fuel - 200 liters\n", + "- Manager M2M Solutions: CMA - PKR 5,000, Fuel - 150 liters\n", + "- Manager MFS & Agri: CMA - PKR 5,000, Fuel - 150 liters\n", + "\n", + "Please note that these entitlements are subject to change based on business requirements.\n" + ] + } + ], + "source": [ + "question = \"What is cma? and provide cma entitlement and Fuel entitlement for different roles?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The policy does not mention any specific areas that are covered in hard area allowance.\n" + ] + } + ], + "source": [ + "question = \"What areas are covered in hard area allowance?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The areas covered in the hard area allowance are Quetta, Gwadar, Gilgit, DI Khan, Zhob, and MBU Lead. The roles covered in the hard area allowance are Regional Sales Head, Area Sales Manager, Zonal Manager, and MBU Lead.\n" + ] + } + ], + "source": [ + "question = \"What areas and roles are covered in hard area allowance? what are these roles and what areas as covered in it? \"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "- L1 grade: 75 liters of fuel entitlement\n", + "- L2 grade: 100 liters of fuel entitlement\n", + "- L3 grade: 150 liters of fuel entitlement\n", + "- L4 grade: 250 liters of fuel entitlement\n", + "- L5 grade: 350 liters of fuel entitlement\n", + "- L6 grade: 450 liters of fuel entitlement\n" + ] + } + ], + "source": [ + "\n", + "question = \"What is fuel entitlement for each grade?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "L3 grade is entitled to a Suzuki Swift AT with a buy back policy of 73% of invoice value for a 2-year-old vehicle, 55% for a 3-year-old vehicle, 35% for a 4-year-old vehicle, and 18% for a 5-year-old vehicle.\n", + "L4 grade is entitled to a Honda City 1.3 AT with a buy back policy of 73% of invoice value for a 2-year-old vehicle, 55% for a 3-year-old vehicle, 35% for a 4-year-old vehicle, and 18% for a 5-year-old vehicle.\n", + "L5 grade is entitled to a Toyota Corolla 1.3 AT with a buy back policy of 73% of invoice value for a 2-year-old vehicle, 55% for a 3-year-old vehicle, 35% for a 4-year-old vehicle, and 18% for a 5-year-old vehicle.\n", + "L6 grade is entitled to a Toyota Corolla Grande 1.8 AT with a buy back policy of 73% of invoice value for a 2-year-old vehicle, 55% for a 3-year-old vehicle, 35% for a 4-year-old vehicle, and 18% for a 5-year-old vehicle.\n", + "Luxury category vehicles such as Toyota Fortuner and Toyota Land Cruiser have different buy back policies.\n" + ] + } + ], + "source": [ + "question = \"What cars each grade is entitled what is thier buy back policy?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for gpt-3.5-turbo in organization org-x1evDRl7Z91oUYDUdl96xewe on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I don't know this, please contact your HRBP for more details.\n" + ] + } + ], + "source": [ + "question = \"What cars each grade is entitled what is thier buy back policy?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result['result'])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Figuring the Cause of Difference between Jine and This Code" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='2. Benefits\\n\\n2.3. Car Policy\\n\\nOwner\\n\\nApproved By\\n\\nVersion 1.0\\n\\nVersion 2.0\\n\\nVersion 3.0\\n\\nVersion 4.0\\n\\nVersion 5.0\\n\\nVersion 6.0\\n\\nVersion 7.0\\n\\nPeople & Organization\\n\\nCPO, CFO, CEO\\n\\n22/06/2016\\n\\n01/04/2017\\n\\n01/03/2018\\n\\n01/08/2018\\n\\n01/11/2018\\n\\n01/05/2019\\n\\n01/07/2019\\n\\n2.3. Car Policy\\n\\nFeatures\\n\\n2.3.1. Fuel\\n\\n2.3.1.1. You can avail your fuel entitlement through company provided fuel card carrying a pre-defined\\n\\nmonthly limit (in liters).\\n\\n2.3.1.2. In case your job qualifies for role based fuel, your monthly fuel entitlement will be higher of the\\n\\ntwo limits i.e. role based and grade based fuel limit.\\n\\n2.3.2. Car Allowance\\n\\n2.3.2.1. In case of entitlement, you can opt for a defined car allowance, which reaches you through\\n\\nmonthly payroll.\\n\\n2.3.3. Operating Lease\\n\\n2.3.3.1. In case of eligibility, you have the choice to opt for a vehicle through vendor facilitated operating\\n\\nlease. Vehicle options, maintenance, buyback, contract terms and further details are available at\\n\\nthe Operating Lease Vehicle Portal.\\n\\nUseful Notes\\n\\nUpon resignation, an employee may continue the vehicle lease contract on personal capacity.\\n\\nWhere a resigned employee does not want to continue with the operating lease facility, the\\n\\ncompany may continue the lease & offer the specific lease to any interested applicant who\\n\\nwishes to avail the facility. The buyback period will continue as per the effective date in the\\n\\nlease contract initially signed-off with the resigned employee.\\n\\nIn case an applicant wishes to utilize & continue the car operating lease specified in above\\n\\nclause, the lease would continue with deductions from the Salary as per the standard procedure\\n\\n& without any deduction from the resigned employee’s settlement.\\n\\nIn case no applicant avails the existing lease, the respective dues as per the lease contract would\\n\\nbe deducted from the resigned employee’s settlement.\\n\\n\\n\\n\\n\\nThe Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice,\\n\\nin which case your employment shall be governed by such revised rules and regulations\\n\\n1\\n\\nAdministration would facilitate an employee for renting a vehicle from the vendor during the\\n\\ntime leased vehicle delivery is in process. The rental amount will be deducted from employee’s\\n\\nsalary and accordingly paid to the vendor.\\n\\nFuel, Monthly Car Allowance, Driver Allowance\\n\\nLevels\\n\\nFuel\\n\\nL1\\n\\nL2\\n\\nL3\\n\\nL4\\n\\nL5\\n\\nL6\\n\\n75 Litres\\n\\n100 Litres\\n\\n150 Litres\\n\\n250 Litres\\n\\n350 Litres\\n\\n450 Litres\\n\\nMonthly Car Allowance\\n\\n(Incl of lube)\\n\\n\\n\\n\\n\\nPKR 75,000\\n\\nPKR 115,000\\n\\nDriver/Driver Allowance*\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPKR 310,000\\n\\nAllowance of PKR 20,000\\n\\nPKR 485,000\\n\\nCompany Driver\\n\\nExpats are entitled to a company driver regardless of grade level\\n\\nCar Entitlements as per Categories and Buy Back Life. The life of the car will be determined from the\\n\\ndate of vehicle registration.\\n\\nLevels\\n\\nVehicle Entitlement*\\n\\nBuy Back\\n\\nL3\\n\\nL4\\n\\nL5\\n\\nL6\\n\\nSuzuki Swift AT\\n\\nHonda City 1.3 AT\\n\\nToyota Corolla 1.3 AT\\n\\nToyota Corolla Grande 1.8 AT\\n\\nHonda Civic 1.8 AT\\n\\nCategory I (Non-Luxury)\\n\\n2 years old vehicle: 73% of invoice value\\n\\n3 years old vehicle: 55% of invoice value\\n\\n4 years old vehicle: 35% of invoice value\\n\\n5 years old vehicle: 18% of Invoice Value\\n\\nToyota Fortuner\\n\\nCategory II (Luxury)\\n\\nToyota Land Cruiser\\n\\n03 Years Old Vehicle: 60% of invoice value\\n\\n04 Years Old Vehicle: 50% of invoice value\\n\\n05 Years Old Vehicle: 25% of invoice\\n\\nvalue\\n\\nVehicle options are subject to availability; available options will be updated on the operating lease\\n\\nportal.\\n\\nBuy back of Jazz owned / lease vehicles currently in possession of entitled employees will be dealt\\n\\nwith as per previous policies until the useful life is completed and the said asset is disposed from\\n\\nFixed Asset Register\\n\\nThe Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice,\\n\\nin which case your employment shall be governed by such revised rules and regulations\\n\\n2', metadata={'source': 'Data\\\\Policies\\\\2.3. Car Policy V7 Nov 2019.pdf'}),\n", + " Document(page_content='1.25 Company Fleet Policy\\n\\nEligibility & Applicability \\n\\nThe policy covers business needs of employees who need to undertake inter/intra city travel through company fleet and/or third party vehicles (Careem), the latter being available in major cities (Karachi, Lahore, Rawalpindi and Islamabad).\\n\\nBusiness Needs:\\n\\nThe business needs for which this facility shall be applicable include: \\n\\nAirport pick and drop facility from home/office to airport and back \\n\\nPick and drop for female employees will be managed as per Female Pick & Drop policy through Pick & Drop vans. In case of unavailability of vans, pool vehicles may be used.\\n\\nGroup employees (delegates/guests) arrangements in line with security requirements\\n\\nCommute to/from office on a public holiday/weekend for L1 and L2 employees \\n\\nCommute to/from office for physically challenged employees \\n\\nCommute back home for females in case of late-sitting i.e. after 19:30 hrs \\n\\nFor all business related out of city movements, arrangements would be made by Admin as per Travel Policy\\n\\nThe company fleet also includes 4x4 vehicles in all 04 regions and HQ to cater to business needs requiring travel to rough terrains and/or remote locations/long distance rides only with the following guidelines: \\n\\nPriority to be accorded to business needs of Regional Business Heads official visits, followed by commercial division employees and any other function .\\n\\nVehicles will be parked in office premises (Islamabad, Lahore, Karachi and Multan ) as part of admin pool and can be availed through regular TAF process for intercity travel followed by a verification signature after each ride by the user.\\n\\nIntra-city commute requirements will not qualify for 4x4 vehicles utilization unless they are made for a business/market visit.\\n\\nUseful Notes\\n\\nFor hardship areas to address safety and security concerns (exceptions to be covered on a separate note), RBHs may use the vehicles to commute to their office destinations and pick and drop from Home \\n\\nAdmin team will ensure maintenance of pool vehicles and drivers’ fitness \\n\\nThe policy only caters to travel for business purposes; any violation would be dealt with as per code of conduct policy\\n\\nPool cars cannot be used for Genset fueling or any heavy items transportation\\n\\nAll employees are required to follow the safety guidelines as instructed by HSSE including the use of seat belt.\\n\\nField staff with Fuel / CMA role based entitlements are not eligible to avail admin / third party vehicles (Careem) within their assigned territories and for movements within city\\n\\nTechnology Pool (228 -vehicle service) to be specifically used for technology division. In case of unavailability of 228 pool vehicles, employees can request for third party pool vehicles (Careem) as well. These include movements within city, between cities/outstation visits, and pick & drop from home and office on holidays / weekends\\n\\nWhile making request through Careem App, employees should follow communicated guidelines as per attached Annexure – A, and mention official purpose clearly\\n\\nFor Intra city travel Careem services should be availed as first travel priority, however pool cars to be utilized during late hours, after 09:00 PM airport pick and drop.\\n\\nAny exception to this policy would need approval from the respective CXO and CPO.\\n\\nAnnexure - A\\n\\nThird Party Vehicle (Careem) Utilization Guidelines \\n\\nEmployees can only avail “Go+” or “Business” option available in Careem App for official travel\\n\\nThe facility may only be utilized for official purpose and by Jazz employees only.\\n\\nEmployees can also the avail the service while coming to office on weekends / holidays for official purpose\\n\\nIn case of late sitting (after 1930Hrs) for business requirement, female employees can avail this service, however within peripheries of Islamabad, Rawalpindi, Lahore & Karachi', metadata={'source': 'Data\\\\Policies\\\\1.25 Company Fleet Policy 11092018.docx'}),\n", + " Document(page_content='1. Organization & Workplace\\n\\n1.29 Talent Policy\\n\\nOwner\\n\\nApproved By\\n\\nVersion 1.0\\n\\nVersion 2.0\\n\\nVersion 3.0\\n\\nVersion 4.0\\n\\nVersion 5.0\\n\\nPeople & Organization\\n\\nCPO, CFO & CEO\\n\\n01/09/2018\\n\\n01/05/2019\\n\\n05/11/2020\\n\\n15/01/2021\\n\\n30/09/2022\\n\\n1.29 Talent Policy\\n\\nThis policy allows identification of talent in line with requirements of different roles within Jazz. It provides\\n\\nfor a transparent process for selection/placement of resources to fill in vacant positions.\\n\\n1.29.1 Talent is acquired/moved in response to vacancies created in the following scenarios in line with\\n\\nbusiness needs.\\n\\n1.29.1.1 Replacement hiring: Vacancy created due to attrition and/or internal mobility;\\n\\nacquisition process to be initiated upon CXO approval\\n\\n1.29.1.2 New headcount and/or New role: Vacancy created due to organizational restructuring\\n\\nand/or new streams/roles introduction/enhancement; acquisition process to be initiated\\n\\nupon ELT approval for L4 and above roles, and CXO and CPO approval for L3 and below.\\n\\nJob grades’ determination for such roles will follow an evaluation exercise along with\\n\\ninternal data points.\\n\\n1.29.2 Talent acquisition process will not be applicable in the following scenarios, in line with Jazz Talent\\n\\nManagement Framework (every such placement to be aligned with CPO, respective CXO,\\n\\nrespective Head of HR BP and Head of Talent)\\n\\n1.29.2.1 Where there is an identified successor (ready now) after weighing the business impact of\\n\\nthis placement; placement/non-placement both to be discussed and aligned\\n\\n1.29.2.2 An agreed talent mobility/placement (on same grade)\\n\\n1.29.2.3 Placement of an employee (not in the above two categories) who seems to best suit the\\n\\nneeds under given business circumstances\\n\\n1.29.2.4 Where current role enhancement qualifies for grade uplift/off-cycle promotion and\\n\\ninternal talent for placement is available. (Job grade determination for such roles will\\n\\nfollow an evaluation exercise)\\n\\n1.29.2.4.1\\n\\nIn such cases, talent placement/promotion at L4 and above, to be tabled at\\n\\nELT by respective CXO, supported by respective HOD whereas for L3 and\\n\\nbelow, CXO and CPO approval to follow.\\n\\n1.29.2.4.2 Promotion criteria as covered in Rewards policy will be applicable.\\n\\n1.29.2.4.3 All off-cycle role evaluations post 1st working day of October will be\\n\\nconsidered in next year Annual Salary Review cycle.\\n\\n1.29.3 Decisions on identification of business-critical roles, talent and succession are made during annual\\n\\ntalent review and subsequent talent health checks for N-2 and N-3 tiers; all L3 and above\\n\\nemployees are calibrated. N-2 discussions take place at the ELT level while those for N-3 tier (and\\n\\nL3 employees) are chaired by respective CXOs, facilitated by respective Head of HRBP.\\n\\n1.29.4 For talent acquisition needs being met internally, following conditions will apply:\\n\\n1.29.4.1\\n\\n1.29.4.2\\n\\n1.29.4.3\\n\\nLine Managers of Internal applicants shortlisted for assessments will be notified about the\\n\\napplication;\\n\\nInternal candidate, if selected, will serve a notice period in the current role as agreed\\n\\nbetween the new and current line manager\\n\\nInternal candidates will be eligible to apply for an open vacancy after having served at\\n\\nleast one (01) year in the current role and grade;\\n\\nUseful Notes\\n\\nExternal Talent Acquisition services can be acquired on a need basis\\n\\nThe minimum job posting time is three (03) working days\\n\\nMinimum educational qualification for Sales force and front-line workforce and Customer\\n\\nExperience Officers, Riggers and Technicians is Intermediate or equivalent, while for all other roles\\n\\nit is a Bachelors/equivalent degree\\n\\nThe sales force and the front-line workforce will not be eligible for other roles until they meet the\\n\\nminimum qualification requirements\\n\\nEx-employees are eligible to apply for open vacancies in line with role specifications with a check\\n\\n\\n\\non past performance and misconduct (if any), in case of latter there will be no hiring\\n\\nIf an ex-employee is being considered for employment on the same role within six months of\\n\\nvoluntary separation, his/her grade and compensation will remain unchanged\\n\\nEmployees separated because of business decisions (such as org restructuring, role reduction, role\\n\\nredundancy) may be reconsidered for rehiring post six (06) months of separation\\n\\nAll employees moving to a new location for work are eligible for relocation benefit as per the\\n\\n\\n\\nrelocation policy\\n\\nIt is strongly recommended to have inclusion of at least one female in the shortlisted pool for\\n\\nevery vacancy\\n\\nA talent acquisition decision along with compensation details should be finalized after mutual\\n\\nconsent of line (hiring manager and one level above) and P&O (Talent and BP)\\n\\nA candidate offer may include a joining allowance (one time) if necessitated by business needs\\n\\nand approved by Head of Rewards, CPO and respective CXO.\\n\\nNotice period buy out can be offered to facilitate early joining of incumbent, if required by\\n\\nbusiness and approved by respective HOD and Head of Rewards for L1 to L3 level roles, whereas\\n\\nrespective CXO and CPO approval is mandatory for L4 and above roles.\\n\\nA shortlisted candidate pool can be re-considered for another vacancy within 03 months if the\\n\\nrole requirements are the same\\n\\nFinalized candidates need to clear the conflict-of-interest checks before being offered\\n\\nConflict of interest disclosure includes sharing details about the following:\\n\\nI.\\n\\nII.\\n\\nParents, spouse, children, other close relatives (working at Jazz)\\n\\nIf the candidate has been an employee or official of a government agency or state-\\n\\nowned enterprise within the last five (05) years\\n\\nHiring at CEO-I positions require approval of People Committee and OpCo Board.', metadata={'source': 'Data\\\\Policies\\\\1.29 Talent Policy V5 13.10.2022.pdf'})]" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sub_docs = ensemble_retriever.get_relevant_documents(\"What cars each grade is entitled what is thier buy back policy?\")\n", + "sub_docs" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2. Benefits\n", + "\n", + "2.3. Car Policy\n", + "\n", + "Owner\n", + "\n", + "Approved By\n", + "\n", + "Version 1.0\n", + "\n", + "Version 2.0\n", + "\n", + "Version 3.0\n", + "\n", + "Version 4.0\n", + "\n", + "Version 5.0\n", + "\n", + "Version 6.0\n", + "\n", + "Version 7.0\n", + "\n", + "People & Organization\n", + "\n", + "CPO, CFO, CEO\n", + "\n", + "22/06/2016\n", + "\n", + "01/04/2017\n", + "\n", + "01/03/2018\n", + "\n", + "01/08/2018\n", + "\n", + "01/11/2018\n", + "\n", + "01/05/2019\n", + "\n", + "01/07/2019\n", + "\n", + "2.3. Car Policy\n", + "\n", + "Features\n", + "\n", + "2.3.1. Fuel\n", + "\n", + "2.3.1.1. You can avail your fuel entitlement through company provided fuel card carrying a pre-defined\n", + "\n", + "monthly limit (in liters).\n", + "\n", + "2.3.1.2. In case your job qualifies for role based fuel, your monthly fuel entitlement will be higher of the\n", + "\n", + "two limits i.e. role based and grade based fuel limit.\n", + "\n", + "2.3.2. Car Allowance\n", + "\n", + "2.3.2.1. In case of entitlement, you can opt for a defined car allowance, which reaches you through\n", + "\n", + "monthly payroll.\n", + "\n", + "2.3.3. Operating Lease\n", + "\n", + "2.3.3.1. In case of eligibility, you have the choice to opt for a vehicle through vendor facilitated operating\n", + "\n", + "lease. Vehicle options, maintenance, buyback, contract terms and further details are available at\n", + "\n", + "the Operating Lease Vehicle Portal.\n", + "\n", + "Useful Notes\n", + "\n", + "Upon resignation, an employee may continue the vehicle lease contract on personal capacity.\n", + "\n", + "Where a resigned employee does not want to continue with the operating lease facility, the\n", + "\n", + "company may continue the lease & offer the specific lease to any interested applicant who\n", + "\n", + "wishes to avail the facility. The buyback period will continue as per the effective date in the\n", + "\n", + "lease contract initially signed-off with the resigned employee.\n", + "\n", + "In case an applicant wishes to utilize & continue the car operating lease specified in above\n", + "\n", + "clause, the lease would continue with deductions from the Salary as per the standard procedure\n", + "\n", + "& without any deduction from the resigned employee’s settlement.\n", + "\n", + "In case no applicant avails the existing lease, the respective dues as per the lease contract would\n", + "\n", + "be deducted from the resigned employee’s settlement.\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice,\n", + "\n", + "in which case your employment shall be governed by such revised rules and regulations\n", + "\n", + "1\n", + "\n", + "Administration would facilitate an employee for renting a vehicle from the vendor during the\n", + "\n", + "time leased vehicle delivery is in process. The rental amount will be deducted from employee’s\n", + "\n", + "salary and accordingly paid to the vendor.\n", + "\n", + "Fuel, Monthly Car Allowance, Driver Allowance\n", + "\n", + "Levels\n", + "\n", + "Fuel\n", + "\n", + "L1\n", + "\n", + "L2\n", + "\n", + "L3\n", + "\n", + "L4\n", + "\n", + "L5\n", + "\n", + "L6\n", + "\n", + "75 Litres\n", + "\n", + "100 Litres\n", + "\n", + "150 Litres\n", + "\n", + "250 Litres\n", + "\n", + "350 Litres\n", + "\n", + "450 Litres\n", + "\n", + "Monthly Car Allowance\n", + "\n", + "(Incl of lube)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "PKR 75,000\n", + "\n", + "PKR 115,000\n", + "\n", + "Driver/Driver Allowance*\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "PKR 310,000\n", + "\n", + "Allowance of PKR 20,000\n", + "\n", + "PKR 485,000\n", + "\n", + "Company Driver\n", + "\n", + "Expats are entitled to a company driver regardless of grade level\n", + "\n", + "Car Entitlements as per Categories and Buy Back Life. The life of the car will be determined from the\n", + "\n", + "date of vehicle registration.\n", + "\n", + "Levels\n", + "\n", + "Vehicle Entitlement*\n", + "\n", + "Buy Back\n", + "\n", + "L3\n", + "\n", + "L4\n", + "\n", + "L5\n", + "\n", + "L6\n", + "\n", + "Suzuki Swift AT\n", + "\n", + "Honda City 1.3 AT\n", + "\n", + "Toyota Corolla 1.3 AT\n", + "\n", + "Toyota Corolla Grande 1.8 AT\n", + "\n", + "Honda Civic 1.8 AT\n", + "\n", + "Category I (Non-Luxury)\n", + "\n", + "2 years old vehicle: 73% of invoice value\n", + "\n", + "3 years old vehicle: 55% of invoice value\n", + "\n", + "4 years old vehicle: 35% of invoice value\n", + "\n", + "5 years old vehicle: 18% of Invoice Value\n", + "\n", + "Toyota Fortuner\n", + "\n", + "Category II (Luxury)\n", + "\n", + "Toyota Land Cruiser\n", + "\n", + "03 Years Old Vehicle: 60% of invoice value\n", + "\n", + "04 Years Old Vehicle: 50% of invoice value\n", + "\n", + "05 Years Old Vehicle: 25% of invoice\n", + "\n", + "value\n", + "\n", + "Vehicle options are subject to availability; available options will be updated on the operating lease\n", + "\n", + "portal.\n", + "\n", + "Buy back of Jazz owned / lease vehicles currently in possession of entitled employees will be dealt\n", + "\n", + "with as per previous policies until the useful life is completed and the said asset is disposed from\n", + "\n", + "Fixed Asset Register\n", + "\n", + "The Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice,\n", + "\n", + "in which case your employment shall be governed by such revised rules and regulations\n", + "\n", + "2\n", + "====================================================================================================================================================================================\n", + "1.25 Company Fleet Policy\n", + "\n", + "Eligibility & Applicability \n", + "\n", + "The policy covers business needs of employees who need to undertake inter/intra city travel through company fleet and/or third party vehicles (Careem), the latter being available in major cities (Karachi, Lahore, Rawalpindi and Islamabad).\n", + "\n", + "Business Needs:\n", + "\n", + "The business needs for which this facility shall be applicable include: \n", + "\n", + "Airport pick and drop facility from home/office to airport and back \n", + "\n", + "Pick and drop for female employees will be managed as per Female Pick & Drop policy through Pick & Drop vans. In case of unavailability of vans, pool vehicles may be used.\n", + "\n", + "Group employees (delegates/guests) arrangements in line with security requirements\n", + "\n", + "Commute to/from office on a public holiday/weekend for L1 and L2 employees \n", + "\n", + "Commute to/from office for physically challenged employees \n", + "\n", + "Commute back home for females in case of late-sitting i.e. after 19:30 hrs \n", + "\n", + "For all business related out of city movements, arrangements would be made by Admin as per Travel Policy\n", + "\n", + "The company fleet also includes 4x4 vehicles in all 04 regions and HQ to cater to business needs requiring travel to rough terrains and/or remote locations/long distance rides only with the following guidelines: \n", + "\n", + "Priority to be accorded to business needs of Regional Business Heads official visits, followed by commercial division employees and any other function .\n", + "\n", + "Vehicles will be parked in office premises (Islamabad, Lahore, Karachi and Multan ) as part of admin pool and can be availed through regular TAF process for intercity travel followed by a verification signature after each ride by the user.\n", + "\n", + "Intra-city commute requirements will not qualify for 4x4 vehicles utilization unless they are made for a business/market visit.\n", + "\n", + "Useful Notes\n", + "\n", + "For hardship areas to address safety and security concerns (exceptions to be covered on a separate note), RBHs may use the vehicles to commute to their office destinations and pick and drop from Home \n", + "\n", + "Admin team will ensure maintenance of pool vehicles and drivers’ fitness \n", + "\n", + "The policy only caters to travel for business purposes; any violation would be dealt with as per code of conduct policy\n", + "\n", + "Pool cars cannot be used for Genset fueling or any heavy items transportation\n", + "\n", + "All employees are required to follow the safety guidelines as instructed by HSSE including the use of seat belt.\n", + "\n", + "Field staff with Fuel / CMA role based entitlements are not eligible to avail admin / third party vehicles (Careem) within their assigned territories and for movements within city\n", + "\n", + "Technology Pool (228 -vehicle service) to be specifically used for technology division. In case of unavailability of 228 pool vehicles, employees can request for third party pool vehicles (Careem) as well. These include movements within city, between cities/outstation visits, and pick & drop from home and office on holidays / weekends\n", + "\n", + "While making request through Careem App, employees should follow communicated guidelines as per attached Annexure – A, and mention official purpose clearly\n", + "\n", + "For Intra city travel Careem services should be availed as first travel priority, however pool cars to be utilized during late hours, after 09:00 PM airport pick and drop.\n", + "\n", + "Any exception to this policy would need approval from the respective CXO and CPO.\n", + "\n", + "Annexure - A\n", + "\n", + "Third Party Vehicle (Careem) Utilization Guidelines \n", + "\n", + "Employees can only avail “Go+” or “Business” option available in Careem App for official travel\n", + "\n", + "The facility may only be utilized for official purpose and by Jazz employees only.\n", + "\n", + "Employees can also the avail the service while coming to office on weekends / holidays for official purpose\n", + "\n", + "In case of late sitting (after 1930Hrs) for business requirement, female employees can avail this service, however within peripheries of Islamabad, Rawalpindi, Lahore & Karachi\n", + "====================================================================================================================================================================================\n", + "1. Organization & Workplace\n", + "\n", + "1.29 Talent Policy\n", + "\n", + "Owner\n", + "\n", + "Approved By\n", + "\n", + "Version 1.0\n", + "\n", + "Version 2.0\n", + "\n", + "Version 3.0\n", + "\n", + "Version 4.0\n", + "\n", + "Version 5.0\n", + "\n", + "People & Organization\n", + "\n", + "CPO, CFO & CEO\n", + "\n", + "01/09/2018\n", + "\n", + "01/05/2019\n", + "\n", + "05/11/2020\n", + "\n", + "15/01/2021\n", + "\n", + "30/09/2022\n", + "\n", + "1.29 Talent Policy\n", + "\n", + "This policy allows identification of talent in line with requirements of different roles within Jazz. It provides\n", + "\n", + "for a transparent process for selection/placement of resources to fill in vacant positions.\n", + "\n", + "1.29.1 Talent is acquired/moved in response to vacancies created in the following scenarios in line with\n", + "\n", + "business needs.\n", + "\n", + "1.29.1.1 Replacement hiring: Vacancy created due to attrition and/or internal mobility;\n", + "\n", + "acquisition process to be initiated upon CXO approval\n", + "\n", + "1.29.1.2 New headcount and/or New role: Vacancy created due to organizational restructuring\n", + "\n", + "and/or new streams/roles introduction/enhancement; acquisition process to be initiated\n", + "\n", + "upon ELT approval for L4 and above roles, and CXO and CPO approval for L3 and below.\n", + "\n", + "Job grades’ determination for such roles will follow an evaluation exercise along with\n", + "\n", + "internal data points.\n", + "\n", + "1.29.2 Talent acquisition process will not be applicable in the following scenarios, in line with Jazz Talent\n", + "\n", + "Management Framework (every such placement to be aligned with CPO, respective CXO,\n", + "\n", + "respective Head of HR BP and Head of Talent)\n", + "\n", + "1.29.2.1 Where there is an identified successor (ready now) after weighing the business impact of\n", + "\n", + "this placement; placement/non-placement both to be discussed and aligned\n", + "\n", + "1.29.2.2 An agreed talent mobility/placement (on same grade)\n", + "\n", + "1.29.2.3 Placement of an employee (not in the above two categories) who seems to best suit the\n", + "\n", + "needs under given business circumstances\n", + "\n", + "1.29.2.4 Where current role enhancement qualifies for grade uplift/off-cycle promotion and\n", + "\n", + "internal talent for placement is available. (Job grade determination for such roles will\n", + "\n", + "follow an evaluation exercise)\n", + "\n", + "1.29.2.4.1\n", + "\n", + "In such cases, talent placement/promotion at L4 and above, to be tabled at\n", + "\n", + "ELT by respective CXO, supported by respective HOD whereas for L3 and\n", + "\n", + "below, CXO and CPO approval to follow.\n", + "\n", + "1.29.2.4.2 Promotion criteria as covered in Rewards policy will be applicable.\n", + "\n", + "1.29.2.4.3 All off-cycle role evaluations post 1st working day of October will be\n", + "\n", + "considered in next year Annual Salary Review cycle.\n", + "\n", + "1.29.3 Decisions on identification of business-critical roles, talent and succession are made during annual\n", + "\n", + "talent review and subsequent talent health checks for N-2 and N-3 tiers; all L3 and above\n", + "\n", + "employees are calibrated. N-2 discussions take place at the ELT level while those for N-3 tier (and\n", + "\n", + "L3 employees) are chaired by respective CXOs, facilitated by respective Head of HRBP.\n", + "\n", + "1.29.4 For talent acquisition needs being met internally, following conditions will apply:\n", + "\n", + "1.29.4.1\n", + "\n", + "1.29.4.2\n", + "\n", + "1.29.4.3\n", + "\n", + "Line Managers of Internal applicants shortlisted for assessments will be notified about the\n", + "\n", + "application;\n", + "\n", + "Internal candidate, if selected, will serve a notice period in the current role as agreed\n", + "\n", + "between the new and current line manager\n", + "\n", + "Internal candidates will be eligible to apply for an open vacancy after having served at\n", + "\n", + "least one (01) year in the current role and grade;\n", + "\n", + "Useful Notes\n", + "\n", + "External Talent Acquisition services can be acquired on a need basis\n", + "\n", + "The minimum job posting time is three (03) working days\n", + "\n", + "Minimum educational qualification for Sales force and front-line workforce and Customer\n", + "\n", + "Experience Officers, Riggers and Technicians is Intermediate or equivalent, while for all other roles\n", + "\n", + "it is a Bachelors/equivalent degree\n", + "\n", + "The sales force and the front-line workforce will not be eligible for other roles until they meet the\n", + "\n", + "minimum qualification requirements\n", + "\n", + "Ex-employees are eligible to apply for open vacancies in line with role specifications with a check\n", + "\n", + "\n", + "\n", + "on past performance and misconduct (if any), in case of latter there will be no hiring\n", + "\n", + "If an ex-employee is being considered for employment on the same role within six months of\n", + "\n", + "voluntary separation, his/her grade and compensation will remain unchanged\n", + "\n", + "Employees separated because of business decisions (such as org restructuring, role reduction, role\n", + "\n", + "redundancy) may be reconsidered for rehiring post six (06) months of separation\n", + "\n", + "All employees moving to a new location for work are eligible for relocation benefit as per the\n", + "\n", + "\n", + "\n", + "relocation policy\n", + "\n", + "It is strongly recommended to have inclusion of at least one female in the shortlisted pool for\n", + "\n", + "every vacancy\n", + "\n", + "A talent acquisition decision along with compensation details should be finalized after mutual\n", + "\n", + "consent of line (hiring manager and one level above) and P&O (Talent and BP)\n", + "\n", + "A candidate offer may include a joining allowance (one time) if necessitated by business needs\n", + "\n", + "and approved by Head of Rewards, CPO and respective CXO.\n", + "\n", + "Notice period buy out can be offered to facilitate early joining of incumbent, if required by\n", + "\n", + "business and approved by respective HOD and Head of Rewards for L1 to L3 level roles, whereas\n", + "\n", + "respective CXO and CPO approval is mandatory for L4 and above roles.\n", + "\n", + "A shortlisted candidate pool can be re-considered for another vacancy within 03 months if the\n", + "\n", + "role requirements are the same\n", + "\n", + "Finalized candidates need to clear the conflict-of-interest checks before being offered\n", + "\n", + "Conflict of interest disclosure includes sharing details about the following:\n", + "\n", + "I.\n", + "\n", + "II.\n", + "\n", + "Parents, spouse, children, other close relatives (working at Jazz)\n", + "\n", + "If the candidate has been an employee or official of a government agency or state-\n", + "\n", + "owned enterprise within the last five (05) years\n", + "\n", + "Hiring at CEO-I positions require approval of People Committee and OpCo Board.\n", + "====================================================================================================================================================================================\n" + ] + } + ], + "source": [ + "for dcs in sub_docs:\n", + " print(dcs.page_content)\n", + " print(\"=========\"*20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Class for Jinn-e" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.vectorstores import Chroma\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.document_loaders import DirectoryLoader\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.retrievers.multi_query import MultiQueryRetriever\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains import RetrievalQA\n", + "import os\n", + "from dotenv import load_dotenv\n", + "import logging\n", + "\n", + "#\n", + "\n", + "## Setting up Log configuration\n", + "logging.basicConfig(\n", + " filename='Logs/chatbot.log', # Name of the log file\n", + " level=logging.INFO, # Logging level (you can use logging.DEBUG for more detailed logs)\n", + " format='%(asctime)s - %(levelname)s - %(message)s'\n", + ")\n", + "\n", + "\n", + "\n", + "\n", + "class Jine:\n", + "\n", + " def __init__(self,OPENAI_API_KEY,VECTOR_STORE_DIRCTORY,VECTOR_STORE_CHECK,DATA_DIRECTORY,DEBUG):\n", + " self.OPENAI_API_KEY =OPENAI_API_KEY\n", + " self.DATA_DIRECTORY = DATA_DIRECTORY\n", + " self.VECTOR_STORE_DIRCTORY = VECTOR_STORE_DIRCTORY\n", + " self.VECTOR_STORE_CHECK = VECTOR_STORE_CHECK\n", + " self.DEBUG = DEBUG\n", + "\n", + " def create_vectorstore(self):\n", + " docs = DirectoryLoader(self.DATA_DIRECTORY).load()\n", + " text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)\n", + " all_splits = text_splitter.split_documents(docs)\n", + " self.vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(),persist_directory=self.VECTOR_STORE_DIRCTORY)\n", + "\n", + " ### Will be used to get multiple questions related the ones asked for better content retrival\n", + " def multi_query_retriver(self):\n", + "\n", + " retriever_from_llm = MultiQueryRetriever.from_llm(retriever=self.vectorstore.as_retriever(),\n", + " llm=ChatOpenAI(temperature=0))\n", + " template = \"\"\"Use the following pieces of context to answer the question at the end. \n", + " If you don't know the answer, just say that you don't know, don't try to make up an answer. \n", + " Use three sentences maximum and keep the answer as concise as possible.\n", + " {context}\n", + " Question: {question}\n", + " Helpful Answer:\"\"\"\n", + " QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + " llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + " self.bot = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=retriever_from_llm,\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", + " )\n", + " \n", + "\n", + " \n", + " def load_vectorstore(self):\n", + " self.vectorstore = Chroma(persist_directory=self.VECTOR_STORE_DIRCTORY,embedding_function=OpenAIEmbeddings())\n", + "\n", + "\n", + " def log(self,user_question,chatbot_reply):\n", + " # Log the user's question\n", + " logging.info(f\"User: {user_question}\")\n", + " # Log the chatbot's reply\n", + " logging.info(f\"JIN-e: {chatbot_reply}\")\n", + "\n", + " # def load_bot()\n", + "\n", + "\n", + " def chat(self,user_question):\n", + "\n", + " if self.VECTOR_STORE_CHECK:\n", + " self.load_vectorstore()\n", + " self.multi_query_retriver()\n", + " \n", + " result = self.bot({\"query\": question})\n", + " response = result[\"result\"]\n", + " self.log(user_question,response) \n", + " return response\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "DATA_DIRECTORY = os.getenv(\"DATA_DIRECTORY\")\n", + "VECTOR_STORE_DIRCTORY = os.getenv(\"VECTOR_STORE_DIRCTORY\")\n", + "VECTOR_STORE_CHECK = os.getenv(\"VECTOR_STORE_CHECK\")\n", + "DEBUG = os.getenv(\"DEBUG\")\n", + "# debug = os.getenv(\"DEBUG\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bool('False') # Have to set a values for this " + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "bot =Jine(OPENAI_API_KEY,VECTOR_STORE_DIRCTORY,VECTOR_STORE_CHECK,DATA_DIRECTORY,DEBUG)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Car policy refers to the guidelines and rules set by a company regarding the use of company-provided cars or car allowances for employees. It includes details about fuel entitlement, car allowances, and operating leases.'" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "question = \"What is car policy?\"\n", + "bot.chat(question)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bot.chat(question)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing Class for JIN-e" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from jine import Jine\n", + "from dotenv import load_dotenv\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv()\n", + "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", + "DATA_DIRECTORY = os.getenv(\"DATA_DIRECTORY\")\n", + "VECTOR_STORE_DIRECTORY = os.getenv(\"VECTOR_STORE_DIRCTORY\")\n", + "VECTOR_STORE_CHECK = os.getenv(\"VECTOR_STORE_CHECK\")\n", + "DEBUG = os.getenv(\"DEBUG\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading Vectorstore\n", + "Using HYDE embeddings vectorstore\n" + ] + } + ], + "source": [ + "jine = Jine(OPENAI_API_KEY, VECTOR_STORE_DIRECTORY, VECTOR_STORE_CHECK, DATA_DIRECTORY, DEBUG)\n", + "jine.load_model()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JIN-e: Hello! How can I assist you today?\n" + ] + } + ], + "source": [ + "while True:\n", + " user_question = input(\"You: \")\n", + " if user_question.lower() in [\"exit\", \"quit\"]:\n", + " break\n", + " response = jine.chat(user_question)\n", + " print(\"JIN-e:\", response)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Vector Store\\\\'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "VECTOR_STORE_DIRECTORY" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Testing Fast Api Wrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chatbot Response:\n", + "I am unable to answer your query, for more information contact your HRBP.\n" + ] + } + ], + "source": [ + "import requests\n", + "import json\n", + "\n", + "# Define the base URL where your FastAPI application is running\n", + "base_url = \"http://localhost:8000\" # Replace with the actual URL where your FastAPI server is running\n", + "\n", + "# Define the user's question\n", + "user_question = \"How does photosynthesis work?\" # Replace with the user's question\n", + "\n", + "# Create a JSON payload with the user's question\n", + "payload = {\n", + " \"user_question\": user_question\n", + "}\n", + "\n", + "# Send a POST request to the /chatbot/ endapoint\n", + "url = f\"{base_url}/chatbot/\"\n", + "headers = {\"Content-Type\": \"application/json\"}\n", + "response = requests.post(url, data=json.dumps(payload), headers=headers)\n", + "\n", + "# Check if the response status code is 200 (OK)\n", + "if response.status_code == 200:\n", + " chatbot_response = response.json()[\"chatbot_response\"]\n", + " print(\"Chatbot Response:\")\n", + " print(chatbot_response)\n", + "else:\n", + " print(f\"Request failed with status code {response.status_code}\")\n", + " print(response.text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Huggingface Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import HuggingFaceEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "## Storing the vector store to the Database \n", + "vectorstore = Chroma.from_documents(documents=all_splits, embedding=HuggingFaceEmbeddings())" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.retrievers.multi_query import MultiQueryRetriever\n", + "\n", + "logging.basicConfig()\n", + "logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)\n", + "\n", + "retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectorstore.as_retriever(),\n", + " llm=ChatOpenAI(temperature=0))\n", + "\n", + "# unique_docs = retriever_from_llm.get_relevant_documents(query=question)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"for notice period exception whose approval is required?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Whose approval is necessary for an exception to the notice period?', '2. Who needs to give approval for a notice period exception?', '3. Which authority is responsible for approving notice period exceptions?']\n" + ] + } + ], + "source": [ + "unique_docs = retriever_from_llm.get_relevant_documents(query=question)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='notice, in which case your employment shall be governed by such revised rules and regulations\\n\\n1', metadata={'source': 'Data\\\\Policies\\\\2.6.Day Care Policy V5.pdf'}),\n", + " Document(page_content='notice, in which case your employment shall be governed by such revised rules and regulations\\n\\n2', metadata={'source': 'Data\\\\Policies\\\\2.2. Life Insurance V5.pdf'}),\n", + " Document(page_content='notice, in which case your employment shall be governed by such revised rules and regulations\\n\\n2', metadata={'source': 'Data\\\\Policies\\\\2.7. Relocation Policy V8 2021.pdf'}),\n", + " Document(page_content='notices, regulator responses, agreements, cheque deliveries, etc. will be processed as\\n\\nurgent cases.\\n\\n2\\n\\nThe Company, reserves the right to change the said applicable policy (ies), rules and regulations at its entire discretion, without advance notice, in\\n\\nwhich case your employment shall be governed by such revised rules and regulations', metadata={'source': 'Data\\\\Policies\\\\1.21 Courier Policy.pdf'}),\n", + " Document(page_content='employees at the time of exit. However, line manager will not to be held responsible in case of non-\\n\\ncompliance of these requirements by the Employee.\\n\\nThe Company, reserves the right to change the said applicable policy (ies), rules and regulations at its sole discretion, without advance notice, in which case\\n\\nyour employment shall be governed by such revised rules and regulations\\n\\n3', metadata={'source': 'Data\\\\Policies\\\\1 20 BYOD Policy V6 _Jun2022.pdf'})]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unique_docs" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chat_models import ChatOpenAI\n", + "\n", + "template = \"\"\"Use the following pieces of context to answer the question at the end. \n", + "If you don't know the answer, just say that you don't know, don't try to make up an answer. \n", + "Use three sentences maximum and keep the answer as concise as possible.\n", + "{context}\n", + "Question: {question}\n", + "Helpful Answer:\"\"\"\n", + "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=retriever_from_llm,\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the minimum qualifications needed for an intern?', '2. What qualifications does an intern need to have?', '3. What kind of qualifications are required from interns?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The minimum qualification for an intern is completion of A levels or equivalent. The intern is required to have a minimum educational qualification of completion of A levels or equivalent.\n" + ] + } + ], + "source": [ + "question = \"what is the minimum qualification for an intern? and what kind of qualification is required from intern to have?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Can you provide information on the concept of parental leaves?', '2. What is the meaning of parental leaves?', '3. Could you explain the purpose and benefits of parental leaves?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The parental leaves include time off for both birthing and adopting parents, with the option to take additional leave in case of unfortunate events such as miscarriage or stillbirth. Adopting parents can also avail leaves upon the arrival of the new family member. Returning mothers may have the option to work from home or have flexible working arrangements, depending on the nature of their job and with prior alignment with their line manager.\n" + ] + } + ], + "source": [ + "question = \"what are the parental leaves?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. Whose approval is necessary for an exception to the notice period?', '2. Who needs to give approval for a notice period exception?', '3. Which authority is responsible for approving notice period exceptions?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The answer is not provided in the given context.\n" + ] + } + ], + "source": [ + "question = \"for notice period exception whose approval is required?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the eligibility criteria for an internal employee to apply for new roles within Jazz?', '2. How does an internal employee become eligible to apply for new roles within Jazz?', '3. Are there any specific requirements for an internal employee to apply for new roles within Jazz?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Internal employees can apply for new roles within Jazz after serving at least one year in their current role and grade.\n" + ] + } + ], + "source": [ + "question = \"when can an internal employee apply for new roles within jazz?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the maximum amount of PF that can be withdrawn from the account? Is it possible to return the withdrawn amount?', '2. Is there a limit on the PF withdrawal amount? Can the withdrawn PF be refunded?', '3. How much PF can I withdraw from my account? If I withdraw it, can I later return the withdrawn amount?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The maximum amount of PF that can be drawn is 2 times the individual PF balance, with a minimum capping at PKR 1 million. It is not mentioned whether the PF can be returned back.\n" + ] + } + ], + "source": [ + "question = \"how much PF can be drawn and can it be returned back?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the fuel limits for each grade?', '2. Can you provide me with the fuel limits for each grade?', '3. I would like to know the fuel limits for each grade. Could you please provide that information?']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I don't know the fuel limit for each grade.\n" + ] + } + ], + "source": [ + "question = \"Give me fuel limit for each grade?\"\n", + "result = qa_chain({\"query\": question})\n", + "print(result[\"result\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using FIAS as vector store." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting faiss-cpu\n", + " Downloading faiss_cpu-1.7.4-cp310-cp310-win_amd64.whl (10.8 MB)\n", + " ---------------------------------------- 10.8/10.8 MB 9.4 MB/s eta 0:00:00\n", + "Installing collected packages: faiss-cpu\n", + "Successfully installed faiss-cpu-1.7.4\n" + ] + } + ], + "source": [ + "!pip install faiss-cpu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Improve document indexing with HyDE\n", + "- Hyde is Better than MultiQuery search" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import OpenAI\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.chains import LLMChain, HypotheticalDocumentEmbedder\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.vectorstores import Chroma\n", + "from langchain.document_loaders import DirectoryLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "base_embeddings = OpenAIEmbeddings()\n", + "llm = OpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# Load with `web_search` prompt\n", + "embeddings_hyde = HypotheticalDocumentEmbedder.from_llm(llm, base_embeddings, \"web_search\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "result = embeddings_hyde.embed_query(\"Where is the Taj Mahal?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n", + "detectron2 is not installed. Cannot use the hi_res partitioning strategy. Falling back to partitioning with the fast strategy.\n" + ] + } + ], + "source": [ + "docs = DirectoryLoader('Data/Policies/').load()\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)\n", + "all_splits = text_splitter.split_documents(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# vectorstore_hyde = Chroma.from_documents(all_splits, embeddings_hyde)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "## Storing the vector store to the Database \n", + "# vectorstore_hyde = Chroma.from_documents(documents=all_splits, embedding=embeddings_hyde,persist_directory=presist_dictroy)\n", + "## Loading the vector spacce form Database.\n", + "vectorstore_hyde = Chroma(persist_directory=presist_dictroy, embedding_function=embeddings_hyde)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comparing HYDE and Simple Vector Store for search" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"what cars are offered at each grade?\"\n", + "docs = vectorstore_hyde.similarity_search(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'})]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "result = embeddings_hyde.embed_query(\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.0037597023800767667,\n", + " -0.010492491101680304,\n", + " 0.008803675368335866,\n", + " -0.046002572719429806,\n", + " -0.04633648540586376,\n", + " 0.016490035540383755,\n", + " -0.01691384507391458,\n", + " -0.004218829064294286,\n", + " 0.009381597374848569,\n", + " -0.007390977699335301,\n", + " 0.016721204715517884,\n", + " 0.005143503902185566,\n", + " -0.013908651385106616,\n", + " 0.0038624440287313533,\n", + " -0.022988445283447664,\n", + " -0.01729912579070797,\n", + " 0.03685856785181685,\n", + " -0.004568792630400981,\n", + " -0.013292201493179097,\n", + " -0.01762019367558421,\n", + " -0.03673014218798254,\n", + " 0.011982245007171809,\n", + " -0.031053663771510087,\n", + " 0.01738902450045008,\n", + " 0.015090180344634361,\n", + " 0.002115836001603382,\n", + " 0.006902954868226015,\n", + " -0.020381375250345567,\n", + " 0.018210957689686773,\n", + " -0.004527054010258046,\n", + " 0.005076079869878984,\n", + " 0.010884193287929934,\n", + " -0.013793066797539551,\n", + " -0.023450781771070683,\n", + " -0.018737508871872182,\n", + " -0.013471998912663314,\n", + " 0.00816153959858339,\n", + " -0.04505221386635881,\n", + " 0.02628902097930691,\n", + " 0.005776007002092373,\n", + " 0.008315652071565269,\n", + " 0.018095373102119706,\n", + " -0.005654001410730379,\n", + " -0.0024802476411559596,\n", + " -0.008636719956441507,\n", + " 0.003926657326309815,\n", + " -0.011558435473640985,\n", + " -0.01755597898102182,\n", + " -0.022076611521823625,\n", + " 0.02722653689346543,\n", + " 0.018930149230268877,\n", + " -0.0019023258674739105,\n", + " -0.032748901905633385,\n", + " 0.017042270737748885,\n", + " -0.0035991684376386482,\n", + " -0.010242058449500076,\n", + " -0.019777768297330526,\n", + " 0.02126752127149941,\n", + " -0.01928974593188255,\n", + " -0.018377912170258512,\n", + " -0.0361393744485895,\n", + " 0.001460857875015066,\n", + " -0.002163996091202556,\n", + " 0.0005879551450844317,\n", + " 0.034033173445138336,\n", + " -0.023373726000241055,\n", + " -0.019109946649753094,\n", + " 0.013677482209972487,\n", + " 0.01296471213884662,\n", + " 0.010556704864920075,\n", + " 0.020085993243294284,\n", + " 0.020869397615793542,\n", + " 0.003197833814374006,\n", + " 0.019508070305458963,\n", + " 0.019713553602768136,\n", + " -0.0027740245136738365,\n", + " -0.026199120406919567,\n", + " -0.0032347567980860704,\n", + " 0.006902954868226015,\n", + " 0.007416663111498947,\n", + " 0.006485566804151429,\n", + " -0.010254901388412554,\n", + " -0.00610028562169673,\n", + " 0.008277124186150455,\n", + " -0.010595232750334891,\n", + " 0.01593779848037339,\n", + " -0.009349490027567374,\n", + " -0.0007091581691797233,\n", + " -0.0062640302988629685,\n", + " -0.018056846148027512,\n", + " 0.018788878764876858,\n", + " 0.018698980055134748,\n", + " 0.03156737387742826,\n", + " 0.010556704864920075,\n", + " 0.0070185389901317695,\n", + " 0.012939027192344282,\n", + " 0.006039282593185078,\n", + " 0.017440396256099992,\n", + " 0.019957565716814743,\n", + " -0.0208950834936185,\n", + " 0.009554973324876547,\n", + " 0.020008937472464655,\n", + " -0.025582670514992047,\n", + " -0.01013289533138925,\n", + " -0.043151492434926345,\n", + " -0.04492378820252451,\n", + " -0.004459629977951465,\n", + " 0.011885924827973461,\n", + " 0.02753476183942919,\n", + " -0.007821208236661055,\n", + " -0.03349377746139521,\n", + " 0.02211514033856106,\n", + " 0.007185494402026127,\n", + " -0.03955553659466105,\n", + " 0.0016599198192833273,\n", + " 0.0024352982862849053,\n", + " 0.012316155830960525,\n", + " -0.0031833859737587776,\n", + " 0.022397679406699865,\n", + " -0.0011269475168876597,\n", + " -0.0008203278800802961,\n", + " -0.004286253096600868,\n", + " 0.013536212675903084,\n", + " 0.010492491101680304,\n", + " -0.00205804370781985,\n", + " -0.002062859809912029,\n", + " -0.023874591304601508,\n", + " -0.050060870635253836,\n", + " 0.005548049027347673,\n", + " -0.026635772879362866,\n", + " 0.008283545655606694,\n", + " 0.0003904985390763982,\n", + " 0.0015924955541460912,\n", + " 0.027175167000460757,\n", + " -0.007917528415859403,\n", + " 0.007814786767204816,\n", + " -0.014833326222997894,\n", + " 0.014370988804052255,\n", + " -0.022179353170478214,\n", + " -0.037192476812960326,\n", + " 0.0031448578555133074,\n", + " 0.02592942427769324,\n", + " -0.0008484212850823688,\n", + " 0.022603162704009038,\n", + " -0.03369926075870438,\n", + " -0.0072047583447335355,\n", + " 0.03113071954233972,\n", + " 0.002358242049793966,\n", + " 0.00010675499430515632,\n", + " -0.003631275319258534,\n", + " 0.011076834577649247,\n", + " -0.022911387649972796,\n", + " -0.00686442651714989,\n", + " 0.0008797253666048972,\n", + " 0.018698980055134748,\n", + " 0.012502374719900981,\n", + " 0.0387079175275994,\n", + " 0.012746385902624969,\n", + " -0.02452957001326646,\n", + " -0.011776761709862636,\n", + " -0.004449997773767107,\n", + " 0.016888159196089624,\n", + " 0.006607572395513423,\n", + " 0.007911106946403164,\n", + " -0.00976045662218572,\n", + " 0.02103635395901052,\n", + " -0.028022786067522405,\n", + " 0.0070249604595880085,\n", + " -0.023155399764019403,\n", + " -0.015218606939791284,\n", + " -0.0019392486183553207,\n", + " 0.0035959577029105287,\n", + " -0.04351109099918525,\n", + " 0.014435201635969407,\n", + " -0.03218382376935578,\n", + " 0.019341115824887224,\n", + " -0.006203027270351317,\n", + " 0.014306775040812484,\n", + " 0.005942962413986731,\n", + " -0.015154393176551513,\n", + " 0.0360109487847552,\n", + " 0.005692529761806503,\n", + " -0.01082640192546902,\n", + " 0.00822575336182316,\n", + " -0.003109540239165302,\n", + " 0.008797253898879627,\n", + " 0.016053383067940456,\n", + " 0.011622649236880755,\n", + " 0.02546708779007022,\n", + " -0.03839969071899041,\n", + " -0.00042099996602072855,\n", + " 0.028176897609181666,\n", + " 0.0043312029171332315,\n", + " -0.014756269520845645,\n", + " -0.6304228008480286,\n", + " -0.013330729378593911,\n", + " -0.00031865965920715696,\n", + " -0.01615612471659504,\n", + " 0.01822380062859925,\n", + " 0.0070313819290442475,\n", + " 0.021306050088236845,\n", + " -0.032235191799715214,\n", + " -0.006980011104716954,\n", + " 0.025839525567951133,\n", + " 0.005673265353437786,\n", + " 0.00982467038542549,\n", + " 9.290895544866954e-05,\n", + " -0.041148031217484525,\n", + " 0.0007713650485914288,\n", + " -0.028074155960527078,\n", + " 0.005053605192443457,\n", + " -0.07089174129695518,\n", + " -0.017260596973970536,\n", + " 0.004022977971169473,\n", + " -0.017440396256099992,\n", + " -0.003202649916466185,\n", + " -0.023206769657024076,\n", + " -0.0073588708177154145,\n", + " -0.012406054540702633,\n", + " 0.0073845562298790616,\n", + " 0.021473004568808584,\n", + " -0.016990900844744212,\n", + " 0.012386790132333916,\n", + " 0.006607572395513423,\n", + " -0.014101291743503311,\n", + " 0.0070378029328391776,\n", + " 0.02256463574991684,\n", + " -0.003933078795766054,\n", + " 0.037860298460537754,\n", + " 0.011481379702811354,\n", + " -0.004132140623618988,\n", + " -0.001661525186647387,\n", + " -0.03716679093513537,\n", + " 0.03174717129691248,\n", + " -0.017735778263151272,\n", + " -0.03303144283641742,\n", + " 0.009850356263250447,\n", + " 0.01136579511524429,\n", + " -0.014576472101361428,\n", + " 0.006890111929313537,\n", + " 0.007923949885315642,\n", + " 0.02621196334583204,\n", + " 0.025197389798198657,\n", + " -0.023450781771070683,\n", + " -0.0070313819290442475,\n", + " -0.021922499980164364,\n", + " -0.009330226550521277,\n", + " -0.03328829416408603,\n", + " 0.014987438695979774,\n", + " -0.016592777189038344,\n", + " 0.01577084399980165,\n", + " -0.02333519718350362,\n", + " 0.007044224402295417,\n", + " 0.01182171106473369,\n", + " -0.003705120821021354,\n", + " -0.00016896185916494597,\n", + " -0.0175302949658421,\n", + " -0.029769394094650375,\n", + " -0.020484116899000152,\n", + " 0.025595513453904525,\n", + " -0.0009054107787685438,\n", + " 0.019636497831938503,\n", + " 0.01988050994598511,\n", + " 0.008020270064513989,\n", + " -0.022898544711060318,\n", + " -0.003181780606394718,\n", + " -0.01363895432455767,\n", + " -0.015025966581394589,\n", + " 0.017581664858846775,\n", + " 0.007281814581224475,\n", + " -0.0032443887694397744,\n", + " -0.015526831885755044,\n", + " -0.003897761412248704,\n", + " -0.006935061284184591,\n", + " 0.030103303055793854,\n", + " -0.016656990020955494,\n", + " -0.0009832696989415135,\n", + " -0.021460161629896106,\n", + " 0.028587864203800013,\n", + " -0.0018413230717929134,\n", + " 0.0021912868707302624,\n", + " -0.02242336528452482,\n", + " 0.0025958319958923697,\n", + " 0.010428277338440532,\n", + " 0.005271930963003798,\n", + " -0.00012210604323757525,\n", + " 0.017491766149104665,\n", + " -0.03048858563523248,\n", + " 0.0033262608751922387,\n", + " 0.010787873108731586,\n", + " -0.0007516996476276789,\n", + " -0.007981742179099173,\n", + " -0.010691552929533239,\n", + " -0.022063770445556387,\n", + " -0.006935061284184591,\n", + " 0.024593782845183616,\n", + " -0.00045190273480607385,\n", + " 0.012688593608841437,\n", + " 0.005824167557352856,\n", + " -0.007493719347989887,\n", + " 0.01966218370976346,\n", + " 0.001818848277942059,\n", + " 0.06339159908252513,\n", + " -0.014974595757067296,\n", + " -0.0001684601818636773,\n", + " 0.005830588561147786,\n", + " 0.017055113676661363,\n", + " -0.005220560138676505,\n", + " -0.0160276971901155,\n", + " -0.02637891968904902,\n", + " 0.011860238950148505,\n", + " 0.008065219419385043,\n", + " 0.004889860515277219,\n", + " -0.007256129169060829,\n", + " 0.028844719256759098,\n", + " 0.0035349549072295314,\n", + " 0.03434139652845686,\n", + " 0.002944190427465659,\n", + " -0.018352228155078792,\n", + " 0.00853397830778692,\n", + " 0.004103244942388532,\n", + " 0.004613742450933345,\n", + " -0.027868672663217905,\n", + " 0.0077313090612576366,\n", + " -0.013433471027248498,\n", + " -0.0048641751031135725,\n", + " -0.008180804006952107,\n", + " -0.01783851991180586,\n", + " 0.02607069474308526,\n", + " 0.022025241628818953,\n", + " 0.0007998597954345164,\n", + " -0.022474737040174733,\n", + " -0.01013931680084549,\n", + " -0.021306050088236845,\n", + " 0.004767854923915225,\n", + " -0.022474737040174733,\n", + " 0.003149673724774832,\n", + " -0.00845692253695729,\n", + " -0.027920044418867817,\n", + " -0.03285164169164274,\n", + " -0.014216876331070376,\n", + " 0.003766123616702351,\n", + " -0.013561897622405422,\n", + " 0.014396673750554593,\n", + " -0.025454244851157742,\n", + " 0.012624380776924285,\n", + " -0.02372047790029701,\n", + " 0.040582953081206914,\n", + " 0.020188734891948872,\n", + " -0.026558717108533237,\n", + " -0.01326651561535414,\n", + " -0.01044112027735301,\n", + " -0.011757497301493919,\n", + " -0.036755824340517024,\n", + " -0.03321124025590164,\n", + " 0.017979788514552644,\n", + " -0.01257943142205323,\n", + " -0.012874813429104511,\n", + " -0.017363338622625124,\n", + " -0.007795522824497408,\n", + " -0.0036922781149395308,\n", + " -0.004186722182674401,\n", + " -0.03583115136527098,\n", + " -0.030385843986577895,\n", + " 0.0011983850840197178,\n", + " 0.027200852878285713,\n", + " 0.015963484358198346,\n", + " 0.03408454147549777,\n", + " 0.01372885303429978,\n", + " 0.007480876409077409,\n", + " 0.011995087946084287,\n", + " -0.01814674485776962,\n", + " -0.013793066797539551,\n", + " -0.022025241628818953,\n", + " 0.009895305618121502,\n", + " 0.0044178908921472205,\n", + " -0.017799991095068426,\n", + " -0.019084262634573378,\n", + " 0.03834832268863097,\n", + " -0.008180804006952107,\n", + " -0.01288765636801699,\n", + " -0.0053907258196376736,\n", + " -0.014216876331070376,\n", + " 0.01654140543338843,\n", + " 0.002920110382666072,\n", + " 0.023283827290498944,\n", + " 0.002974691941721485,\n", + " 0.025582670514992047,\n", + " 0.008655984364810224,\n", + " 0.026712828650192498,\n", + " -0.015462618122515273,\n", + " -0.015475461061427751,\n", + " -0.0031721484022103593,\n", + " 0.0099145690951676,\n", + " -0.02259031976509656,\n", + " 0.002570146583728723,\n", + " 0.014396673750554593,\n", + " 0.010203530564085262,\n", + " 0.004706851895403573,\n", + " -0.020188734891948872,\n", + " 0.012528060597725937,\n", + " -0.010370485044657,\n", + " 0.024966220623064528,\n", + " -0.007224022287440943,\n", + " -0.012444582426117448,\n", + " -0.014422359628379549,\n", + " 0.004026188705897591,\n", + " -0.01823664356751173,\n", + " -0.015038809520307067,\n", + " 0.015359876473860687,\n", + " -0.016708361776605406,\n", + " 0.00033712103464786207,\n", + " 0.00945223167622196,\n", + " -0.0004635414444686842,\n", + " 0.02282148894023069,\n", + " 0.013972864217023769,\n", + " 0.02645597545987865,\n", + " -0.006761684868495303,\n", + " 0.0035221122011477083,\n", + " 0.018930149230268877,\n", + " 0.008585349132114214,\n", + " 0.015128708230049175,\n", + " -0.024259872021394898,\n", + " 0.004995812898659925,\n", + " -0.0037789663227841743,\n", + " 0.012072143716913918,\n", + " 0.007114859169330117,\n", + " 0.02547992886633746,\n", + " 0.03241499294448991,\n", + " 0.017311968729620448,\n", + " 0.012650065723426621,\n", + " -0.016926688012827058,\n", + " 0.04369088841866947,\n", + " -0.009972361388951132,\n", + " -0.015693786366326783,\n", + " 0.010839243933058879,\n", + " 0.012849127551279555,\n", + " -0.001977776736600791,\n", + " 0.015526831885755044,\n", + " 0.00244171952291049,\n", + " -0.0008885547706918985,\n", + " 0.02452957001326646,\n", + " 0.019122789588665572,\n", + " 0.03285164169164274,\n", + " -0.031772857174737434,\n", + " 0.004588057038769698,\n", + " -0.011526329057682408,\n", + " -0.004074348795496765,\n", + " 0.006992853577968123,\n", + " -0.018801721703789336,\n", + " 0.020548329730917306,\n", + " 0.002292423152020789,\n", + " 0.02750907782424947,\n", + " 0.017954104499372923,\n", + " 0.008822939776704583,\n", + " -0.004835278956221806,\n", + " -0.033853376025654115,\n", + " -0.03819420742168123,\n", + " 0.024902007791147374,\n", + " 0.007442348523662594,\n", + " 0.015693786366326783,\n", + " -0.02142163467580391,\n", + " 0.0180054743923776,\n", + " -0.038630859894124535,\n", + " -0.007968899240186695,\n", + " -0.014448044574881885,\n", + " 0.0060778109442612025,\n", + " 0.01228404848367933,\n", + " 0.0070121175206755305,\n", + " 0.0009519656174189851,\n", + " 0.009728350206227145,\n", + " -0.008848624723206919,\n", + " -0.0073781347604228225,\n", + " 0.017478923210192187,\n", + " -0.008341337949390225,\n", + " -0.050600262893706484,\n", + " 0.024362613670049486,\n", + " -0.007153387520406242,\n", + " -0.014974595757067296,\n", + " -0.0027948940565759584,\n", + " 0.00045190273480607385,\n", + " -0.008745883074552332,\n", + " -0.009343068558111135,\n", + " 0.015578202710082338,\n", + " -0.024760737325755355,\n", + " 0.007866157591532108,\n", + " -0.03906751236656784,\n", + " -0.010267743396002414,\n", + " 0.0013292200794687132,\n", + " 0.015513988946842566,\n", + " 0.014666370811103537,\n", + " 0.00800100658746789,\n", + " -0.01884025052052677,\n", + " 0.0037243847637287615,\n", + " 0.004645848866891921,\n", + " -0.020419904067083,\n", + " -0.0013260094611559211,\n", + " -0.016746888730697605,\n", + " 0.00762214640880812,\n", + " 0.02523591861493609,\n", + " -0.030617011299066785,\n", + " -0.005602630586403086,\n", + " 0.0001366544230398802,\n", + " -0.01760735073667173,\n", + " 0.007583618057731996,\n", + " -0.03218382376935578,\n", + " -0.00930454067269632,\n", + " -0.0018975098817970587,\n", + " 0.03690993960746676,\n", + " -0.0011510275616872466,\n", + " 0.00648235606942331,\n", + " -0.0059782800303347355,\n", + " 0.01964934077085098,\n", + " 0.012990398016671576,\n", + " -0.009330226550521277,\n", + " -0.02578815381230122,\n", + " 0.00061925922660696,\n", + " -0.000253844116036518,\n", + " 0.07746720681084872,\n", + " -0.004783908131894513,\n", + " -0.02546708779007022,\n", + " -0.0035349549072295314,\n", + " -0.03220950964718073,\n", + " -0.02302697223753986,\n", + " -0.027406336175594886,\n", + " -0.036242117959889325,\n", + " 0.019341115824887224,\n", + " 0.007872579060988347,\n", + " -0.012136357480153688,\n", + " 0.02065107137957189,\n", + " 0.04209839007055552,\n", + " 0.01593779848037339,\n", + " 0.012027194362042862,\n", + " 0.016271709304162104,\n", + " -0.010466806155177966,\n", + " 0.00518845325705662,\n", + " -0.00541641123180132,\n", + " 0.029307055744382117,\n", + " 0.026687144635012778,\n", + " -0.011654755652839332,\n", + " 0.014756269520845645,\n", + " 0.023656265068379857,\n", + " 0.010852086871971357,\n", + " 0.01839075510917099,\n", + " 0.019790611236243004,\n", + " 0.013054610848588728,\n", + " -0.012367526655287817,\n", + " -0.04004355709746379,\n", + " 0.0014423964995743607,\n", + " -0.011012620814409476,\n", + " -0.01678541754743504,\n", + " 0.0005690924278027117,\n", + " 0.006735999456331657,\n", + " 0.038245579177331145,\n", + " 0.002661650893665546,\n", + " 0.02938411151521175,\n", + " 0.0003038102730240908,\n", + " 0.014358145865139777,\n", + " 0.00769920264529906,\n", + " -0.0030950921657194193,\n", + " 3.915520395811885e-05,\n", + " -0.012791335257496024,\n", + " -0.004308728239697705,\n", + " -0.011038305760911813,\n", + " 0.02011167912111924,\n", + " 0.0024192448454749627,\n", + " 0.004706851895403573,\n", + " -0.011019042283865715,\n", + " 0.018429283925908424,\n", + " 0.02452957001326646,\n", + " -0.03577977960962107,\n", + " -0.011070413108193008,\n", + " 0.03102797789368513,\n", + " -0.017543137904754577,\n", + " -0.026789886283667366,\n", + " 0.01273354389503511,\n", + " 0.0034578986707385915,\n", + " -0.00999162579731985,\n", + " -0.023874591304601508,\n", + " 0.0005586577145593142,\n", + " -0.014370988804052255,\n", + " -0.020484116899000152,\n", + " -0.005981490765062855,\n", + " 0.007872579060988347,\n", + " -0.02745770606859956,\n", + " 0.005348987199494739,\n", + " -0.023617736251642422,\n", + " 0.01922553123732016,\n", + " 0.0053907258196376736,\n", + " -0.0328002736612833,\n", + " 0.008745883074552332,\n", + " -0.013998550094848725,\n", + " 0.02071528607413428,\n", + " 0.0027098109832647196,\n", + " 0.03305712498895191,\n", + " -0.004706851895403573,\n", + " -0.018583395467567685,\n", + " 0.02181975833150978,\n", + " 0.004132140623618988,\n", + " -0.007339606875008007,\n", + " -0.01723491295879082,\n", + " -0.037269534446435194,\n", + " -0.01774862120206375,\n", + " 0.026327547933399108,\n", + " -0.007577196588275757,\n", + " -0.008296388594519172,\n", + " -0.03298007108076751,\n", + " 0.0014905565891735345,\n", + " -0.004459629977951465,\n", + " -0.009330226550521277,\n", + " 0.01479479740626046,\n", + " 0.012823442604777219,\n", + " -0.013330729378593911,\n", + " -0.007975320709642934,\n", + " -0.010685131460077,\n", + " 0.0006429379877731957,\n", + " 0.020907926432530977,\n", + " -0.02789435854104286,\n", + " 0.004443576304310868,\n", + " -0.013240830668851802,\n", + " -0.018827407581614292,\n", + " 0.003987660354821467,\n", + " 0.040531581325557,\n", + " 0.012213414182305938,\n", + " 0.009946676442448794,\n", + " 0.0016968425701647375,\n", + " -0.00045952808426619847,\n", + " -0.004665113275260638,\n", + " 0.015077337405721883,\n", + " -0.007577196588275757,\n", + " 0.018557711452387965,\n", + " -0.01677257460852256,\n", + " 0.021408791736891433,\n", + " 0.032877327569467686,\n", + " 0.010113630923020533,\n", + " 0.0237846925948594,\n", + " 0.02523591861493609,\n", + " -0.0059911225035859045,\n", + " 0.012007930884996765,\n", + " 0.0013765776018011845,\n", + " 0.02050980277682511,\n", + " 0.023887434243513986,\n", + " -0.030154674811443766,\n", + " 0.024439671303524355,\n", + " 0.012457425365029926,\n", + " 0.022230724926128126,\n", + " -0.043665202540844515,\n", + " 0.005088922343130153,\n", + " 0.013253672676441662,\n", + " 0.019931879838989787,\n", + " -0.025364346141415632,\n", + " -0.028408066784315796,\n", + " -0.024645154600833528,\n", + " -0.010710817337901956,\n", + " 0.004299096035513346,\n", + " 0.03213245201370587,\n", + " -0.01257943142205323,\n", + " -0.013523369736990606,\n", + " -0.003181780606394718,\n", + " 0.013690325148884965,\n", + " 0.014974595757067296,\n", + " -0.0038945506775205844,\n", + " 0.04394773974633808,\n", + " -0.03634485774589868,\n", + " -0.009497181031093016,\n", + " 0.006485566804151429,\n", + " 0.025518457683074893,\n", + " 0.03308281086677686,\n", + " 0.01388296550728166,\n", + " 0.008649562895353985,\n", + " -0.029795078109830092,\n", + " -0.002729075158802782,\n", + " 0.022898544711060318,\n", + " -0.02171701668285519,\n", + " -0.0009431362715396476,\n", + " 0.015308505649533392,\n", + " 0.013086718195869923,\n", + " 0.011680441530664288,\n", + " 0.01964934077085098,\n", + " -0.008277124186150455,\n", + " 0.01875035181078466,\n", + " -0.005573734439511319,\n", + " -0.006761684868495303,\n", + " -0.004385784476188644,\n", + " 0.008367022895892563,\n", + " 0.004559160891877932,\n", + " -0.02053548679200483,\n", + " 0.02050980277682511,\n", + " 0.011956560060669471,\n", + " 0.012688593608841437,\n", + " 0.013202301852114368,\n", + " -0.006915797341477183,\n", + " 0.015000280703569633,\n", + " -0.010620918628159847,\n", + " -0.0028703449257028385,\n", + " -0.026353233811224064,\n", + " -0.008771568952377289,\n", + " -0.028639235959449925,\n", + " -0.0066974715709168416,\n", + " -0.01036406450652338,\n", + " 0.0028109472645552465,\n", + " 0.016143281777682563,\n", + " -0.036884253729641804,\n", + " -0.013780223858627073,\n", + " 0.03110503552716,\n", + " 0.03482942075655007,\n", + " 0.011089676585239107,\n", + " 0.011783183179318875,\n", + " 0.013093139665326162,\n", + " -0.01363895432455767,\n", + " 0.0164643496625588,\n", + " -0.021216151378494738,\n", + " 0.0049508635437888705,\n", + " -0.013138089020197216,\n", + " -0.018121058979944663,\n", + " -0.044641247271740465,\n", + " 0.02547992886633746,\n", + " 0.0060778109442612025,\n", + " 0.004588057038769698,\n", + " 0.02431124377704481,\n", + " 0.01411413468241579,\n", + " -0.023386568939153533,\n", + " -0.015295663641943534,\n", + " -0.009420125260263385,\n", + " -0.014422359628379549,\n", + " -0.004443576304310868,\n", + " 0.009709085797858428,\n", + " 0.002896030337866485,\n", + " -0.020175891953036394,\n", + " -0.02753476183942919,\n", + " -0.01342062808833602,\n", + " 0.005692529761806503,\n", + " 0.01930258700814979,\n", + " 0.023681950946204813,\n", + " -0.008418393720219856,\n", + " 0.018352228155078792,\n", + " 0.004636217128368872,\n", + " -0.004205986591043118,\n", + " 0.048391318378955495,\n", + " -0.007224022287440943,\n", + " 0.014075605865678355,\n", + " -0.0024529568616282536,\n", + " 0.011019042283865715,\n", + " -0.005442096643964967,\n", + " 0.021511533385546018,\n", + " -0.0036794354088577072,\n", + " 0.013844437621866844,\n", + " 0.0014889512218094747,\n", + " 0.007744152000170115,\n", + " 0.029332741622207073,\n", + " 0.028253953380011295,\n", + " -0.003287732756946769,\n", + " -0.025364346141415632,\n", + " 0.030385843986577895,\n", + " 0.030154674811443766,\n", + " -0.02075381302822648,\n", + " -0.00915042819971444,\n", + " 0.024657995677100766,\n", + " 0.002915294280573893,\n", + " -0.003499637523712181,\n", + " 0.020805184783876392,\n", + " 0.004530264744986166,\n", + " -0.01326651561535414,\n", + " 0.028331011013486163,\n", + " -0.0031849911082921824,\n", + " 0.0028478702482673114,\n", + " -0.005008655371911093,\n", + " -0.028485122555145424,\n", + " -0.012483110311532264,\n", + " 0.032748901905633385,\n", + " -0.012316155830960525,\n", + " 0.015693786366326783,\n", + " 0.008521135368874442,\n", + " -0.025120334027369025,\n", + " -0.008110168774256098,\n", + " 0.002701784379275076,\n", + " -0.03577977960962107,\n", + " 0.018326542277253836,\n", + " 0.007448769527457523,\n", + " 0.007435927054206355,\n", + " -0.008983472787820083,\n", + " 0.023836062487864074,\n", + " -0.002374295257773254,\n", + " 0.031516002121778346,\n", + " -0.015950641419285868,\n", + " -0.00015401213752162606,\n", + " 0.009953097911905033,\n", + " 0.009895305618121502,\n", + " -0.007281814581224475,\n", + " -0.008816518307248344,\n", + " 0.015192921993288947,\n", + " 0.014281089162987528,\n", + " -0.018044003209115034,\n", + " 0.01326651561535414,\n", + " 0.0014135003526825948,\n", + " -0.02424702908248242,\n", + " -0.007403820172586469,\n", + " -0.01615612471659504,\n", + " 0.014383830811642115,\n", + " 0.010755766692773009,\n", + " -0.032235191799715214,\n", + " 0.01669551883769293,\n", + " -0.0030533535455764846,\n", + " 0.005503099672476619,\n", + " -0.01419119045324542,\n", + " 0.015115865291136697,\n", + " -0.014229718338660234,\n", + " 0.010081524507061958,\n", + " -0.006161288650208382,\n", + " 0.006922218810933422,\n", + " 0.005560891966260151,\n", + " -0.0013083507693972458,\n", + " 0.0038463905879214107,\n", + " 0.007789101355041169,\n", + " -0.010890614757386173,\n", + " -0.014088448804590833,\n", + " -0.0003246796704070537,\n", + " 0.02592942427769324,\n", + " -0.026353233811224064,\n", + " -0.008315652071565269,\n", + " -0.015205764000878806,\n", + " -1.1851911374338901e-06,\n", + " -0.0018381123370647939,\n", + " -0.029307055744382117,\n", + " 0.034290024772806946,\n", + " -0.017504609088017143,\n", + " -0.02188397116342693,\n", + " -0.025492771805249937,\n", + " 0.026134907575002413,\n", + " 0.01586074270954376,\n", + " -0.02058685854765474,\n", + " -0.00010249081727991015,\n", + " 0.0008997921094096621,\n", + " 0.00427019988862158,\n", + " 0.00017959722877693488,\n", + " -0.003217097989912068,\n", + " -0.015680945290059544,\n", + " 0.006061757270620605,\n", + " -0.008354180888302703,\n", + " -0.01173181235499158,\n", + " 0.03498353229820933,\n", + " 0.02021442076977383,\n", + " 0.009651293504074895,\n", + " -0.021139095607665106,\n", + " 0.004437155300515938,\n", + " 0.0035863257315568247,\n", + " -0.006909375872020944,\n", + " -0.021550062202283452,\n", + " -0.002120651870864907,\n", + " 0.024003018831081052,\n", + " 0.0021752334299203198,\n", + " -0.01288765636801699,\n", + " -0.018339385216166314,\n", + " -0.013818751744041888,\n", + " 0.0034354239933030643,\n", + " 0.023964490014343618,\n", + " -0.028485122555145424,\n", + " 0.011956560060669471,\n", + " 0.008835781784294441,\n", + " -0.02409291754082316,\n", + " 0.010004468736232328,\n", + " 0.028331011013486163,\n", + " -0.008874310601031875,\n", + " 0.016438663784733843,\n", + " -0.022230724926128126,\n", + " -0.024812109081405267,\n", + " -0.033005756958592466,\n", + " 0.01281059966586474,\n", + " -0.041918592651071306,\n", + " 0.00891925995590293,\n", + " -0.0041578260357826346,\n", + " -0.023681950946204813,\n", + " -0.03780892670488784,\n", + " -0.013215144791026846,\n", + " -0.060668944461855966,\n", + " -0.0199832515946397,\n", + " -0.018801721703789336,\n", + " 0.039863759677979574,\n", + " 0.011982245007171809,\n", + " 0.04479535881339973,\n", + " 0.025441401912245264,\n", + " 0.023322354244591142,\n", + " -0.006055336266825676,\n", + " -0.004353677594568759,\n", + " -0.025659728148466916,\n", + " 0.024439671303524355,\n", + " 0.0034803733481741186,\n", + " -0.0035156909645221238,\n", + " 0.014602157047863766,\n", + " -0.009420125260263385,\n", + " 0.00525908848975263,\n", + " -0.014075605865678355,\n", + " 0.011417165939571582,\n", + " 0.004459629977951465,\n", + " 0.018313699338341358,\n", + " 0.012457425365029926,\n", + " -0.005560891966260151,\n", + " -0.036036634662580155,\n", + " 0.02531297438576572,\n", + " -0.002006672883492557,\n", + " 0.0190714196956609,\n", + " 0.013433471027248498,\n", + " 0.005589788113151917,\n", + " -0.02784298678539295,\n", + " 0.008951366371861506,\n", + " 0.0008010638209575612,\n", + " -0.01493606787165248,\n", + " -0.009426546729719624,\n", + " 0.01518007905437647,\n", + " -0.0027049951140031953,\n", + " -0.010620918628159847,\n", + " 0.02750907782424947,\n", + " -0.0016647358049601791,\n", + " -0.00518845325705662,\n", + " -0.010319114220329707,\n", + " 0.011866660419604744,\n", + " 0.0031223829452471257,\n", + " 0.024889164852234896,\n", + " 0.0039106041183305275,\n", + " 0.015077337405721883,\n", + " 0.01817242887294934,\n", + " 0.0010073497437411004,\n", + " -0.02027863360169098,\n", + " 0.006183763327643909,\n", + " -0.01555251776358,\n", + " -0.019790611236243004,\n", + " -0.006800213219571428,\n", + " -0.022795803062405733,\n", + " -0.0020098836182206765,\n", + " -0.019931879838989787,\n", + " 0.0010932353843190416,\n", + " -0.002920110382666072,\n", + " 0.003766123616702351,\n", + " -0.010691552929533239,\n", + " -0.004600899512020867,\n", + " -0.013651797263470149,\n", + " 0.005069658400422745,\n", + " -0.004835278956221806,\n", + " 0.007660674294222935,\n", + " -0.0021993134747199067,\n", + " -0.01204645877041158,\n", + " -0.020574015608742263,\n", + " -0.00762214640880812,\n", + " -0.026199120406919567,\n", + " -0.022140826216386016,\n", + " -0.023039815176452337,\n", + " -0.004899492719461577,\n", + " 0.02172985962176767,\n", + " -0.00571500443924203,\n", + " 0.012168464827434883,\n", + " -0.017825676972893383,\n", + " -0.00010374498870520796,\n", + " 0.0033037861977567116,\n", + " 0.006068178740076844,\n", + " 0.1924351097926858,\n", + " -0.00762214640880812,\n", + " -0.015578202710082338,\n", + " 0.013330729378593911,\n", + " 0.008334916479933986,\n", + " 0.0008612639911641921,\n", + " 0.019187004283227962,\n", + " 0.018570554391300443,\n", + " -0.0010458778619865704,\n", + " 0.0006288912561683276,\n", + " -0.00709559522662271,\n", + " 0.021306050088236845,\n", + " -0.004472472451202633,\n", + " -0.0014496205362973023,\n", + " -0.004504579332822519,\n", + " -0.02501759237871444,\n", + " -0.028819033378934142,\n", + " -0.024503884135441505,\n", + " -0.013934336331608952,\n", + " 0.007532247233404702,\n", + " 0.020792341844963914,\n", + " 0.03315986850025173,\n", + " -0.01290049837560685,\n", + " 0.00029839224548188463,\n", + " 0.006341086535353908,\n", + " -0.006045704062641317,\n", + " -0.007615724939351881,\n", + " 0.03667877043233263,\n", + " 0.03167011366343761,\n", + " -0.00011829336850751292,\n", + " -0.028793347501109186,\n", + " 0.01556535977116986,\n", + " -0.00968340085135609,\n", + " -0.015064494466809405,\n", + " -0.019097103710840616,\n", + " 0.026558717108533237,\n", + " -0.031695799541262566,\n", + " -0.00869451225022504,\n", + " -0.002393559433311316,\n", + " -0.010537440456551358,\n", + " 0.007416663111498947,\n", + " 0.027354964419944974,\n", + " 0.0090027371961888,\n", + " -0.026789886283667366,\n", + " 0.019508070305458963,\n", + " 0.03110503552716,\n", + " ...]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='functional areas or\\n\\nconcentrated knowledge of a\\n\\nparticular discipline,\\n\\nconsidered the organization’s\\n\\nexpert within a particular\\n\\ndiscipline\\n\\n15+ Years of Experience\\n\\nManaging complex\\n\\ncommunications through\\n\\ndiscussions and compromise.\\n\\nIssues are of medium-term\\n\\ntactical or limited strategic\\n\\nnature\\n\\nBroad and substantive\\n\\nmanagement experiences\\n\\nacross several line and\\n\\nsupport functions or\\n\\nbusinesses, or recognized\\n\\noutside the organization as\\n\\nhaving paramount capability', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='functional areas or\\n\\nconcentrated knowledge of a\\n\\nparticular discipline,\\n\\nconsidered the organization’s\\n\\nexpert within a particular\\n\\ndiscipline\\n\\n15+ Years of Experience\\n\\nManaging complex\\n\\ncommunications through\\n\\ndiscussions and compromise.\\n\\nIssues are of medium-term\\n\\ntactical or limited strategic\\n\\nnature\\n\\nBroad and substantive\\n\\nmanagement experiences\\n\\nacross several line and\\n\\nsupport functions or\\n\\nbusinesses, or recognized\\n\\noutside the organization as\\n\\nhaving paramount capability', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='functional areas or\\n\\nconcentrated knowledge of a\\n\\nparticular discipline,\\n\\nconsidered the organization’s\\n\\nexpert within a particular\\n\\ndiscipline\\n\\n15+ Years of Experience\\n\\nManaging complex\\n\\ncommunications through\\n\\ndiscussions and compromise.\\n\\nIssues are of medium-term\\n\\ntactical or limited strategic\\n\\nnature\\n\\nBroad and substantive\\n\\nmanagement experiences\\n\\nacross several line and\\n\\nsupport functions or\\n\\nbusinesses, or recognized\\n\\noutside the organization as\\n\\nhaving paramount capability', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='functional areas or\\n\\nconcentrated knowledge of a\\n\\nparticular discipline,\\n\\nconsidered the organization’s\\n\\nexpert within a particular\\n\\ndiscipline\\n\\n15+ Years of Experience\\n\\nManaging complex\\n\\ncommunications through\\n\\ndiscussions and compromise.\\n\\nIssues are of medium-term\\n\\ntactical or limited strategic\\n\\nnature\\n\\nBroad and substantive\\n\\nmanagement experiences\\n\\nacross several line and\\n\\nsupport functions or\\n\\nbusinesses, or recognized\\n\\noutside the organization as\\n\\nhaving paramount capability', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'})]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"what cars are offered at each grade?\"\n", + "docs = vectorstore.similarity_search(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'}),\n", + " Document(page_content='1.4.1.1 Jazz’s grading architecture is structured around six job levels beneath CEO i.e. L1 to L6.\\n\\n1.4.1.2 Leadership roles are defined at job grade Level L3. For business reasons some regional roles\\n\\nmay be required to assume a supervisory role at job grade Level L2.\\n\\n1.4.1.3 In order to allow enriched career experience and growth, grade levels sufficiently enable\\n\\nparallel expert and leadership paths.', metadata={'source': 'Data\\\\Policies\\\\1.4. Grade Title Policy (2021).pdf'})]" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chat_models import ChatOpenAI\n", + "template = \"\"\"Use the following pieces of context to answer the question at the end. \n", + "If you don't know the answer, just say that you don't know, don't try to make up an answer. \n", + "Use three sentences maximum and keep the answer as concise as possible.\n", + "{context}\n", + "Question: {question}\n", + "Helpful Answer:\"\"\"\n", + "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", + "\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=vectorstore_hyde.as_retriever(),\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"what cars are offered to each grade ?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "result = qa_chain({\"query\": query})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The provided context does not mention anything about the specific cars offered to each grade.\n" + ] + } + ], + "source": [ + "print(result['result'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating my cover letter from my Resume" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os \n", + "os.environ[\"OPENAI_API_KEY\"] ='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'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import PyPDFLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pypdf\n", + " Downloading pypdf-3.16.2-py3-none-any.whl (276 kB)\n", + " -------------------------------------- 276.3/276.3 kB 2.1 MB/s eta 0:00:00\n", + "Installing collected packages: pypdf\n", + "Successfully installed pypdf-3.16.2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n", + "WARNING: Ignoring invalid distribution -yodbc (d:\\anaconda3\\envs\\nlp\\lib\\site-packages)\n" + ] + } + ], + "source": [ + "! pip install pypdf" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# doc = \"D:\\Hamza\\Hamza's Resumes\\Hamza's Resume.pdf\"\n", + "\n", + "loader = PyPDFLoader(\"D:\\Hamza\\Hamza's Resumes\\Hamza's Resume.pdf\")\n", + "pages = loader.load_and_split()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Page 1 of 4 \\nHamza Shabbir \\nData Scientist \\nI am fueled by a genuine passion for Artificial Intelligence and its contemporary implications. My expertise lies in distilling valuable insights from\\nextensive datasets, employing statistical, mathematical, and visualization methodologies to illuminate inherent patterns. In essence, I possess the\\nacumen to transform data into compelling narratives. Complementing this, my proficiency as a Content Writer enables me to articulate diverse\\nsubjects within the realm of data, offering a well-informed and perceptive outlook. \\nhamxahbhatti@gmail.com \\n+923208575392 \\nRawalpindi, Pakistan \\nlinkedin.com/in/hamxahbhatti \\nhamxahbhatti \\ngithub.com/hamxahbhatti \\nmedium.com/@hamxahbhatti \\nWORK EXPERIENCE \\nData Scientist \\nWhisper Tube \\n08/2023 - Present\\n, \\n \\nPakistan \\nDeveloped a project leveraging advanced Machine Learning Models, including\\nWhisper, LLMs, GPT-3.5, Bark, and seamlessM4T. \\nDesigned the project to extract knowledge from YouTube videos, involving \\nvideo\\ndownloading and transcription using seamlessM4T to generate \\nword-level\\nsubtitles. \\nUtilized LLMs to establish connections between various topics and words present\\nin the video content. For better content Consumption for different types of\\naudience. \\nEmployed LLMs for the generation of topic-wise and speaker-wise \\nsummaries, as\\nwell as speaker diarization, tailored for different target \\naudiences. \\nUtilized LLMs to create narrations from the video transcripts. \\nImplemented Bark, a state-of-the-art text-to-speech model, to produce audio\\nbooks based on the video content. \\nContact \\n:\\nwhispertube.ai \\nData Scientist \\nOpus Analytics \\n10/2022 - Present\\n, \\n \\nUSA \\nAs a Data Scientist (Consultant) at Opus, I lead a team of developers in \\ncrafting a\\nData Product tailored for People Analytics. This role \\nhighlights my dedication to\\nutilizing data insights for informed decision-making. \\nHarnessing the potential of People Analytics, I am engaged in developing \\na tool\\nthat generates personalized analytics from data, exemplifying my \\ncommitment to\\ndata-driven innovation. \\nUsing Natural Language Processing tools and techniques, I process \\ndiverse data\\ntypes to enhance organizational insights and refine People \\nProcesses, contributing\\nto informed decision-making and continuous \\nimprovement \\nResearch on adopting best ML practices including Data Version Control (DVC),\\nFeature Stores, MLOps using Azure DevOps & AzureML. \\nContact \\n:\\nsomayael@rightfoot.org \\nACHIEVEMENTS \\nHigh Frequency Algorithmic Trading\\n (02/2020)\\n \\nAlhamdulillah ! In my Second Attempt, I was able to write an\\nalpha that was accepted and is now being used for trading. This\\nis a huge achievement as nobody \\nin Radix has written Alpha in\\nhis second attempt that was accepted for trading. \\nInvoice Reader\\n (07/2021)\\n \\nDesigned Computer Vision based \\ninvoice reader that can extract\\nall the relevant information from given invoices.This product is\\ncurrently being used by AutoSphere.ai. \\nCONFERENCES & COURSES \\n2022 GLOBAL DIGITAL HUMAN RESOURCES\\nTRANSFORMATION & ANALYTICS\\n (03/2022 - 03/2022)\\n \\nCorporate World Intelligence \\nMachine Learning for Trading\\n (02/2020 - Present)\\n \\nCoursera \\nAI Institute \"Geometry of Deep Learning\" 2019\\n[Workshop] \\nMicrosoft/youtube.com \\nMIT 6.S191 \\nMIT/youtube.com \\nMaking Sense of Data\\n (10/2019 - 12/2020)\\n \\nCoursera \\nMachine Learning A-Z™ Hands-On Python In\\nNeural Networks \\nSuperDatascience.com \\nData Analysis with Excel \\nUdemy \\nCareer in DataScience A-Z \\nSuperDatascience.com \\nMathematical Foundation For Machine Learning\\nand AI \\nUdemy \\nData Science A-Z \\nSuperDatascience.com \\nReinforcement Learning Course by David Silver \\nyoutube.com \\nMachine Learning with Python \\nCognitive Class (An IBM Initiative) \\nData Visualization with R \\nCognitive Class (An IBM Initiative) \\nAchievements/Tasks \\nAchievements/Tasks', metadata={'source': \"D:\\\\Hamza\\\\Hamza's Resumes\\\\Hamza's Resume.pdf\", 'page': 0}),\n", + " Document(page_content='Page 2 of 4 \\nWORK EXPERIENCE \\nExpert People Analytics (Data Scientist) \\nJazz (VEON) \\n11/2021 - Present\\n, \\n \\nIslamabad, Pakistan \\nIntroduced \\nbest practices\\n \\nand robust \\ndata architectures\\n , elevating data \\nquality\\nand \\nscalability\\n \\nwithin the \\nPeople Analytics\\n \\nteam \\nUsing data science life cycles standards such as \\nCRISP-DM\\n to organize & manage\\nprojects. \\nLed \\ndata-driven\\n \\ninitiatives, utilizing advanced analytics to extract \\nactionable\\ninsights\\n \\nfrom intricate \\nworkforce datasets\\n . \\nCollaborated across \\ndepartments\\n, aligning \\ndata strategies\\n \\nwith \\norganizational\\ngoals\\n \\nand facilitating informed \\ntalent decisions.\\n \\nDesigned and implemented a groundbreaking project utilizing \\nLarge Language\\nModels (LLMs)\\n \\nto create an interactive system that efficiently answered user\\nqueries regarding \\npolicies\\n \\nand \\nday-to-day employee inquiries\\n. This innovative\\nsolution significantly improved information \\naccessibility\\n \\nand streamlined \\ninternal\\ncommunication processes\\n. \\nLed the \\n\"VEON Data Insights Enhancement\"\\n \\ninitiative, collaborating with \\n8\\nglobal sister companies\\n \\nto \\noptimize data collection\\n, establish \\nbest practices\\n,\\nand \\nrevamp the Insights portal\\n, thereby \\nelevating cross-company insights\\n \\nand\\ninformed decision-making\\n \\nwithin the organization. \\nData Scientist \\nMercurial Minds \\n06/2019 - 11/2021\\n, \\n \\nIslamabad, Pakistan \\nDesigned \\na Deep Learning and OpenCV based Product that is used to \\nreplace\\nAmazon Textract\\n for invoice processing by our RPA team at \\nMercurial Minds. \\nDesigned a Deep learning based model that was used for \\nMask Detection\\n in a\\nproject Affiliated with Uber. \\nAnalyzed kushal Radio\\'s dataset of Telenor \\nto Study customer\\'s behavior and\\nContent Consumption\\n, visualized its results using different Visualizing\\nTechniques. \\nDesigned a \\nChat bot\\n by using different Machine Learning Models and \\ndifferent\\nprepossessing techniques that is currently being used at Human \\nResource\\nDepartment of HBL. \\nImproved Biocare Lab\\'s \\nmarketing campaigns\\n by identifying potential \\npatients for\\ndifferent lab tests through analyzing Biocare lab test data. \\nContact \\n:\\nhr@mercurialminds.com \\nQuantitative Researcher \\nRadix Trading LLC \\n01/2020 - 09/2020\\n, \\n \\nChicago,USA \\nWorked as Quantitative Researcher (Quant) to Capture Price movement in \\nHigh\\nfrequency Algorithmic trading\\n. \\nDesigned an Alpha in my Second Attempt that is currently being used for trading\\nin \\nlive Markets\\n at Radix Trading LLC. \\nWorked as a Consultant as well to train new Hiring in KDS (Radix\\'s Affiliated\\nCompany) to guide them in new field of Quantitative Research. \\nHands on experience on AWS Athena, RDS, EC2, S3, Glue. \\nEDUCATION \\nSoftware Engineering \\nCOMSATS University \\n07/2015 - 07/2019\\n, \\n \\nPark Road, Islamabad \\nPre-Engineering \\nPunjab College of Information and Technology \\n2012 - 2014\\n, \\n \\nCommercial Market, Rawalpindi \\nCONFERENCES & COURSES \\nPython for Data Science \\nCognitive Class (An IBM Initiative) \\nCERTIFICATES \\nPython 101 for Data Science\\n (05/2019 - 05/2019)\\n \\nReceived Certificate for Python for Data Science from Cognitive\\nClass (An IBM Initiative) \\nInternational ECO-Internship Program 2017\\n (05/2017 - 07/2017)\\n \\nSOFT SKILLS \\nAdaptability \\nProblem Solving \\nTeam Work \\nInterpersonal Skills \\nIntegrity \\nTECHNICAL SKILLS \\nDeep Learning \\nComputer Vision \\nData Analysis & Pattern mining \\nNatural Language Processing \\nLLama Index \\nData Visualization \\nMachine Learning \\nR \\nPower BI \\nPython \\nLangchain \\nTableau \\nLANGUAGES \\nEnglish \\nProfessional Working Proficiency \\nUrdu \\nNative or Bilingual Proficiency \\nPunjabi \\nNative or Bilingual Proficiency \\nAchievements \\nAchievements \\nAchievements/Tasks', metadata={'source': \"D:\\\\Hamza\\\\Hamza's Resumes\\\\Hamza's Resume.pdf\", 'page': 1}),\n", + " Document(page_content='Page 3 of 4 \\nPROJECTS \\nPeople\\'s Bot\\n (02/2023 - 04/2023)\\n \\nImplemented the \"People\\'s Bot\" by leveraging a robust technology stack \\nthat incorporated\\nLangchain\\n, \\nLarge Language Models\\n (LLMs), and Natural \\nLanguage Processing. \\nEngineered a dynamic solution that expedited and enhanced employee query \\nresolution,\\nfacilitated by the power of Langchain and LLMs. \\nSeamlessly integrated Natural Language Processing techniques, enabling \\nthe system to\\ncomprehensively understand and respond to a diverse range \\nof employee inquiries. \\nDelivered a transformative impact, optimizing productivity and fostering \\nemployee\\nsatisfaction by providing immediate and accurate responses. \\nStreamlined internal communication workflow\\ns, optimizing resource allocation for HR\\nteams. \\nVEON Data Insights Enhancement\\n (05/2023 - 08/2023)\\n \\nCollaborated with \\nVEON\\n, the parent company, to optimize data collection across \\n8 sister\\ncompanies\\n \\nin diverse global locations. \\nEmployed a strategic approach to enhance data \\nquality\\n \\nand \\nconsistency\\n \\nby introducing\\nstandardized \\nbest practices\\n \\nwithin each sister company\\'s data collection process. \\nLed workshops and provided guidance on \\nPeople Analytics methodologies\\n , ensuring each\\nsister company leveraged data effectively for \\nstrategic decision-making\\n. \\nRevamped the \\nInsights portal\\n, enabling seamless visualization of \\ndata points\\n \\ncollected from\\nall 8 countries. \\nAchieved streamlined data processes, fostering enhanced cross-company \\ninsights\\n \\nand\\nsupporting \\ninformed decision-making\\n \\nwithin VEON. \\nLearning & Development Insights\\n (11/2021 - 01/2021)\\n \\nDesigned \\nLearning \\n& Development insights\\n that is helping out L&D to make better\\nunderstand and design well suited courses for each Team. \\nCombined different Data Streams from different L&D Platforms \\nthat are currently being\\nused at Jazz. ETL was performed on the source data before creating the Data warehouse. \\nMined Hidden Gems by doing search Analysis and \\nHuman Behavior Analysis \\nto highlight all\\nthe important areas that require improvement. \\nThese insights are helping our L&D to better plan, keep Track of \\nOrganization Learning KPIs\\nalong with Content \\nConsumption/Pathway Creation \\nand Training \\nneeds of the Employees. \\nRPA Intelligent Invoice Reader\\n (02/2021 - 07/2021)\\n \\nDesigned \\na \\nDeep Learning and OpenCV\\n based Product that is used to replace Amazon\\nTextract for invoice processing by our RPA team at Mercurial Minds. \\nI have used \\nCascadeTabnet for \\nTable Detection\\n in invoice and OpenCV was used to detect\\nand preserve Table Structure in any invoice Image. \\nDifferent OCRs Frameworks were used for data extraction from table like AWS Textract ,\\nGoogle Vision API. \\nTesseract OCR\\n was fine-tuned on our sample data for \\nbetter results. \\nTransfer Learning\\n was used on Pre-Trained InceptionResNetV2 to classify different Types of\\nTemplates by using visual layout features and extracting relevant data from each type of\\ninvoice. \\nNatural Language Processing Techniques\\n were also used \\nfor using text that is present in\\ndifferent Templates to get \\nMore Accurate Template Classification. \\nKushal Radio Telenor Churn Prediction\\n (09/2020 - 12/2020)\\n \\nKushal Radio service is used by the \\nTelenor\\n to provide Entertainment and News Service to\\nusers \\nthrough IVR. \\nDue to the streaming nature of this service, 2 to 3 GB data was generated on a daily basis. I\\nhad to use \\nBig Data\\n Tools like to \\nApache Spark and Apache Superset\\n. \\nPySpark\\n was used to create end to end ETL pipeline. \\nA complete end to end pipeline was designed for processing raw data, Features Engineering\\nand Data visualization. \\nSpark MLlib\\n, \\nScikit-learn\\n and \\nTensorflow \\nwere used to create different ML Models for User\\nChurn Prediction. \\nMlflow\\n was used to track each machine learning model\\'s Performance and later to deploy\\nthese Machine Learning models. \\nPharm Evo Data Analysis\\n (04/2020 - 04/2020)\\n \\nPharm Evo (Pvt.) limited\\n, is a healthcare company, which is engaged in \\nthe creation,', metadata={'source': \"D:\\\\Hamza\\\\Hamza's Resumes\\\\Hamza's Resume.pdf\", 'page': 2}),\n", + " Document(page_content='these Machine Learning models. \\nPharm Evo Data Analysis\\n (04/2020 - 04/2020)\\n \\nPharm Evo (Pvt.) limited\\n, is a healthcare company, which is engaged in \\nthe creation,\\ndevelopment, manufacture and marketing of pharmaceutical \\nproducts, including over-the-\\ncounter (OTC) medicines, medical equipment \\nand infant formulas. \\nThe Data set that was provided by PharmEvo contained \\ntime series data \\nabout the usage\\nquantity of different Raw Material that they were using \\nto create different Pharmaceutical\\nProducts and their forecast according \\nto their\\n \\nForecasting System\\n so that they can order\\nRaw Material ahead \\nof time. \\nThe purpose of this Analysis was to find out \\nPatterns\\n in usage of Raw \\nMaterial in\\npharmaceutical products. While analyzing data, I found out \\nthat current forecasting system\\nthey are using to forecast expected \\nquantity of each Raw Material is not capturing trend of\\nthe data and \\nforecast was way \\noff then actual usage of raw material. \\nI highlighted this issue in my \\nData Analysis Report\\n, along with different \\nsuggestions about\\nusing different time series models to improve their \\ncurrent forecasting system. \\nINTERESTS \\nHiking \\nArtificial Intelligence \\nChess \\nFootball \\nVideo Games \\nAnimal Lover \\nWriting', metadata={'source': \"D:\\\\Hamza\\\\Hamza's Resumes\\\\Hamza's Resume.pdf\", 'page': 2}),\n", + " Document(page_content=\"Page 4 of 4 \\nPROJECTS \\nHigh Frequency Algorithmic Trading\\n (01/2020 - 09/2020)\\n \\nAim of this project is to come up with new Alpha(\\nAlgorithm that can capture different\\nphenomenons happening in Market that can predict price movement\\n) Weekly. Most of\\nthe techniques that are being used are Statistical in nature. \\nEvery Week we submit our new \\nAlphas\\n. Since this is \\nHigh Frequency Trading\\n,\\nDecision/Prediction that an alpha make should be in very low in \\nlatency(under 2\\nmicroseconds). For this purpose, C++ was used for writing \\nsignals(alphas) \\nStreaming Stock Market\\n data was used to create different Signals that can capture Price\\nmovement in \\nLive Market\\n. \\nWhen an alpha is written it is then tested against all the other alphas that Radix has(They\\ncould be Thousands in number, even more than that). For an alpha to be accepted it has to\\ncapture a behavior of markets in a way that nobody has captured it before That Makes it very\\nchallenging, and it should also add relative improvement in prediction of all the other written\\nAlphas. If Alpha is accepted by Radix, then it will be used for \\nTrading\\n. \\nMask Detection\\n (05/2020 - 06/2020)\\n \\nFor this Computer Vision problem, I designed a model based on Deep learning that was using\\nmobile net as pre-trained model and used Transfer learning to train this model on mask\\ndetection data set \\n(that was converted in to classification dateset). \\nThis model was classifying \\nwhether a person is wearing a mask or not.in the end of this\\npipeline, \\nviola Jones Algorithm \\nwas to detect face in Video Stream. \\nThis model is being used in one of the projects with \\nUber \\nat Mercurial minds. \\nKushal Radio Telenor\\n (10/2019 - 01/2020)\\n \\nKushal Radio service is used by the Telenor to provide Entertainment and News Service to\\nusers \\nthrough IVR. Currently, I am working on \\nContent Based Analysis\\n to determine which\\ncontent is mostly listened by the users. User's Content choice with respect to their Geo\\nlocation. \\nBehavior and Community based Analysis\\n were the main POIS in this project. \\nBiocare Labs Data Analysis\\n (07/2019 - 10/2019)\\n \\nData Quality Report and Exploratory Data Analysis was performed on dataset that was\\nprovided by Bio Care Labs. Community Analysis, Behavior Analysis of the patients of Specific\\nDiseases along with many Analysis techniques that were used to make Bio Care lab's\\nMarketing campaigns better and Effective. \\nHBL Human Resource \\nChatbot\\n (06/2019 - 01/2020)\\n \\nHBL is one of the Biggest Banks of Pakistan. Their Human resource Department receives\\nmore than 5000 request daily from their employees. \\nThe idea was to design a Chatbot that process all the requirement of Employees through\\ninteractive session with Chatbot.\\n \\nCurrently, this Chatbot Handle all the queries of 2.5 Million Employees of the HBL related to\\nHuman Resource Department. \\nHuman Resource Chatbot is Designed Using \\nState-of-The-art Transformers Models.\", metadata={'source': \"D:\\\\Hamza\\\\Hamza's Resumes\\\\Hamza's Resume.pdf\", 'page': 3})]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pages" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def doc_summary(docs):\n", + " print(f'You have {len(docs)} documents')\n", + " num_words = sum([len(doc.page_content.split(\" \")) for doc in docs])\n", + " print(f\"You have {num_words} words in documents\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You have 5 documents\n", + "You have 2084 words in documents\n" + ] + } + ], + "source": [ + "doc_summary(pages)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains.llm import LLMChain\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.chains.combine_documents.stuff import StuffDocumentsChain\n", + "from langchain.chains.mapreduce import MapReduceChain\n", + "from langchain.chains import RefineDocumentsChain\n", + "\n", + "from langchain.chat_models import ChatOpenAI \n", + "# Define prompt\n", + "prompt_template = \"\"\"This text below represent the a resume of Data Scientist with all the details\n", + " prensent in the text below. Your job is to create a professional resume which is verbose yet cover all the important\n", + " projects , jobs and skills and technologies that are mentioned in the text.Dont select few projects\n", + " and provide complete details about these projects..\n", + " \"{text}\"\n", + " \"\"\"\n", + "\n", + "prompt = PromptTemplate.from_template(prompt_template)\n", + "\n", + "# Define LLM chain\n", + "llm = ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo-16k\")\n", + "llm_chain = LLMChain(llm=llm, prompt=prompt)\n", + "\n", + "# Define StuffDocumentsChain\n", + "stuff_chain = StuffDocumentsChain(\n", + " llm_chain=llm_chain, document_variable_name=\"text\"\n", + ")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import display, Markdown\n", + "\n", + "# display(Markdown(f\"Text: {prompt.format(text=pages)}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "output =stuff_chain.run(pages)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Text: Hamza Shabbir\n", + "Data Scientist\n", + "Email: hamxahbhatti@gmail.com\n", + "Phone: +923208575392\n", + "Location: Rawalpindi, Pakistan\n", + "LinkedIn: linkedin.com/in/hamxahbhatti\n", + "GitHub: github.com/hamxahbhatti\n", + "Medium: medium.com/@hamxahbhatti\n", + "\n", + "Summary:\n", + "Passionate Data Scientist with expertise in Artificial Intelligence and a strong ability to distill valuable insights from extensive datasets. Skilled in statistical analysis, mathematical modeling, and data visualization. Proficient in content writing, providing a well-informed and perceptive outlook on diverse data subjects.\n", + "\n", + "Work Experience:\n", + "Data Scientist, Whisper Tube, Pakistan\n", + "08/2023 - Present\n", + "- Developed a project leveraging advanced Machine Learning Models, including Whisper, LLMs, GPT-3.5, Bark, and seamlessM4T.\n", + "- Designed a project to extract knowledge from YouTube videos, involving video downloading and transcription using seamlessM4T to generate word-level subtitles.\n", + "- Utilized LLMs to establish connections between various topics and words present in the video content for better content consumption for different types of audiences.\n", + "- Employed LLMs for the generation of topic-wise and speaker-wise summaries, as well as speaker diarization, tailored for different target audiences.\n", + "- Utilized LLMs to create narrations from the video transcripts.\n", + "- Implemented Bark, a state-of-the-art text-to-speech model, to produce audio books based on the video content.\n", + "\n", + "Data Scientist, Opus Analytics, USA\n", + "10/2022 - Present\n", + "- Led a team of developers in crafting a Data Product tailored for People Analytics, utilizing data insights for informed decision-making.\n", + "- Developed a tool that generates personalized analytics from data using Natural Language Processing tools and techniques to enhance organizational insights and refine People Processes.\n", + "- Conducted research on adopting best ML practices including Data Version Control (DVC), Feature Stores, MLOps using Azure DevOps & AzureML.\n", + "\n", + "Expert People Analytics (Data Scientist), Jazz (VEON), Islamabad, Pakistan\n", + "11/2021 - Present\n", + "- Introduced best practices and robust data architectures within the People Analytics team, elevating data quality and scalability.\n", + "- Led data-driven initiatives, utilizing advanced analytics to extract actionable insights from intricate workforce datasets.\n", + "- Collaborated across departments, aligning data strategies with organizational goals and facilitating informed talent decisions.\n", + "- Designed and implemented a groundbreaking project utilizing Large Language Models (LLMs) to create an interactive system that efficiently answered user queries regarding policies and day-to-day employee inquiries.\n", + "- Led the \"VEON Data Insights Enhancement\" initiative, collaborating with 8 global sister companies to optimize data collection, establish best practices, and revamp the Insights portal, thereby elevating cross-company insights and informed decision-making within the organization.\n", + "\n", + "Data Scientist, Mercurial Minds, Islamabad, Pakistan\n", + "06/2019 - 11/2021\n", + "- Designed a Deep Learning and OpenCV based Product to replace Amazon Textract for invoice processing by the RPA team.\n", + "- Developed a Deep learning based model for Mask Detection in a project affiliated with Uber.\n", + "- Analyzed Kushal Radio's dataset of Telenor to study customer's behavior and content consumption, visualized results using different visualization techniques.\n", + "- Designed a Chatbot using different Machine Learning Models and preprocessing techniques, currently being used at the Human Resource Department of HBL.\n", + "- Improved Biocare Lab's marketing campaigns by identifying potential patients for different lab tests through analyzing Biocare lab test data.\n", + "\n", + "Quantitative Researcher, Radix Trading LLC, Chicago, USA\n", + "01/2020 - 09/2020\n", + "- Worked as a Quantitative Researcher to capture price movement in High-frequency Algorithmic trading.\n", + "- Designed an Alpha in my Second Attempt that is currently being used for trading in live markets at Radix Trading LLC.\n", + "- Worked as a Consultant to train new hires in KDS (Radix's Affiliated Company) in the field of Quantitative Research.\n", + "- Hands-on experience with AWS Athena, RDS, EC2, S3, Glue.\n", + "\n", + "Education:\n", + "Software Engineering, COMSATS University, Islamabad, Pakistan\n", + "07/2015 - 07/2019\n", + "\n", + "Pre-Engineering, Punjab College of Information and Technology, Rawalpindi, Pakistan\n", + "2012 - 2014\n", + "\n", + "Certificates:\n", + "Python 101 for Data Science, Cognitive Class (An IBM Initiative), 05/2019\n", + "International ECO-Internship Program 2017, 05/2017 - 07/2017\n", + "\n", + "Skills:\n", + "- Deep Learning\n", + "- Computer Vision\n", + "- Data Analysis & Pattern mining\n", + "- Natural Language Processing\n", + "- LLama Index\n", + "- Data Visualization\n", + "- Machine Learning\n", + "- R\n", + "- Power BI\n", + "- Python\n", + "- Langchain\n", + "- Tableau\n", + "\n", + "Languages:\n", + "- English: Professional Working Proficiency\n", + "- Urdu: Native or Bilingual Proficiency\n", + "- Punjabi: Native or Bilingual Proficiency\n", + "\n", + "Interests:\n", + "- Hiking\n", + "- Artificial Intelligence\n", + "- Chess\n", + "- Football\n", + "- Video Games\n", + "- Animal Lover\n", + "- Writing\n", + "\n", + "Projects:\n", + "- People's Bot: Implemented a robust technology stack incorporating Langchain, Large Language Models (LLMs), and Natural Language Processing to create an interactive system for efficient employee query resolution and streamlined internal communication workflows.\n", + "- VEON Data Insights Enhancement: Collaborated with VEON and 8 sister companies to optimize data collection, establish best practices, and revamp the Insights portal, elevating cross-company insights and informed decision-making.\n", + "- Learning & Development Insights: Designed insights to improve learning and development initiatives by analyzing data from different platforms and highlighting areas for improvement.\n", + "- RPA Intelligent Invoice Reader: Developed a deep learning and OpenCV-based product to replace Amazon Textract for invoice processing.\n", + "- Kushal Radio Telenor Churn Prediction: Analyzed Kushal Radio's dataset of Telenor to predict user churn using big data tools like Apache Spark and Apache Superset.\n", + "- Pharm Evo Data Analysis: Analyzed time series data from Pharm Evo to identify patterns in the usage of raw materials and provided suggestions for improving their forecasting system.\n", + "\n", + "References available upon request." + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(Markdown(f\"Text: {output}\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### MAP Reduce" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains.summarize import load_summarize_chain" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "map_prompts = \"\"\"This text below represent the a resume of Data Scientist with all the details\n", + " prensent in the text below. Your job is to create a perfect resume which is concise yet cover all the important\n", + " projects , jobs and skills and technologies that are mentioned in the text.:\n", + " \"{text}\"\n", + " \"\"\"\n", + "combine_prompt = \"\"\"\n", + " Below Text contains resume that was written by you. You job is to combine all the resume chunks and Return a perfect resume which is concise yet cover all the important\n", + " projects , jobs and skills and technologies that are mentioned in the text.\n", + " {text}\n", + "\n", + " \"\"\"\n", + "map_template = PromptTemplate(template=map_prompts,input_variables=['text']\n", + " )\n", + " # combine_template = PromptTemplate(template=combine_prompt,input_variables=['Summary_type','Summary_strategy','Target_Person_type','Response_lenght','Writing_style','text']\n", + " # )\n", + "combine_template = PromptTemplate(template=combine_prompt,input_variables=['text'])\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "chain = load_summarize_chain(\n", + " llm=llm, map_prompt=map_template, combine_prompt=combine_template, chain_type='map_reduce', verbose=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "output = chain.run(input_documents = pages)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "Text: Resume:\n", + "\n", + "Name: Hamza Shabbir\n", + "Contact Information: \n", + "Email: hamxahbhatti@gmail.com\n", + "Phone: +923208575392\n", + "Location: Rawalpindi, Pakistan\n", + "LinkedIn: linkedin.com/in/hamxahbhatti\n", + "GitHub: github.com/hamxahbhatti\n", + "Medium: medium.com/@hamxahbhatti\n", + "\n", + "Summary:\n", + "Passionate Data Scientist with expertise in statistical analysis, mathematical modeling, and data visualization. Skilled in distilling valuable insights from extensive datasets. Proficient in content writing and offering a well-informed and perceptive outlook.\n", + "\n", + "Work Experience:\n", + "Data Scientist, Whisper Tube, 08/2023 - Present, Pakistan\n", + "- Developed a project leveraging advanced Machine Learning Models, including Whisper, LLMs, GPT-3.5, Bark, and seamlessM4T.\n", + "- Designed the project to extract knowledge from YouTube videos, involving video downloading and transcription using seamlessM4T to generate word-level subtitles.\n", + "- Utilized LLMs to establish connections between various topics and words present in the video content for better content consumption for different types of audiences.\n", + "- Employed LLMs for the generation of topic-wise and speaker-wise summaries, as well as speaker diarization, tailored for different target audiences.\n", + "- Utilized LLMs to create narrations from the video transcripts.\n", + "- Implemented Bark, a state-of-the-art text-to-speech model, to produce audio books based on the video content.\n", + "Contact: whispertube.ai\n", + "\n", + "Data Scientist, Opus Analytics, 10/2022 - Present, USA\n", + "- Led a team of developers in crafting a Data Product tailored for People Analytics.\n", + "- Developed a tool that generates personalized analytics from data using Natural Language Processing tools and techniques.\n", + "- Processed diverse data types to enhance organizational insights and refine People Processes, contributing to informed decision-making and continuous improvement.\n", + "- Researched and adopted best ML practices including Data Version Control (DVC), Feature Stores, MLOps using Azure DevOps & AzureML.\n", + "Contact: somayael@rightfoot.org\n", + "\n", + "Achievements:\n", + "- High Frequency Algorithmic Trading (02/2020): Wrote an alpha that was accepted and is now being used for trading, a rare achievement in Radix.\n", + "- Invoice Reader (07/2021): Designed a Computer Vision-based invoice reader that can extract all relevant information from given invoices, currently being used by AutoSphere.ai.\n", + "\n", + "Conferences & Courses:\n", + "- 2022 GLOBAL DIGITAL HUMAN RESOURCES TRANSFORMATION & ANALYTICS (03/2022): Corporate World Intelligence\n", + "- Machine Learning for Trading (02/2020 - Present): Coursera\n", + "- AI Institute \"Geometry of Deep Learning\" 2019 [Workshop]: Microsoft/youtube.com, MIT 6.S191/MIT/youtube.com\n", + "- Making Sense of Data (10/2019 - 12/2020): Coursera\n", + "- Machine Learning A-Z™ Hands-On Python In Neural Networks: SuperDatascience.com\n", + "- Data Analysis with Excel: Udemy\n", + "- Career in DataScience A-Z: SuperDatascience.com\n", + "- Mathematical Foundation For Machine Learning and AI: Udemy\n", + "- Data Science A-Z: SuperDatascience.com\n", + "- Reinforcement Learning Course by David Silver: youtube.com\n", + "- Machine Learning with Python: Cognitive Class (An IBM Initiative)\n", + "- Data Visualization with R: Cognitive Class (An IBM Initiative)\n", + "\n", + "Education:\n", + "- Software Engineering - COMSATS University (07/2015 - 07/2019)\n", + "- Pre-Engineering - Punjab College of Information and Technology (2012 - 2014)\n", + "\n", + "Skills:\n", + "- Statistical analysis\n", + "- Mathematical modeling\n", + "- Data visualization\n", + "- Machine learning\n", + "- Natural Language Processing\n", + "- Content writing\n", + "- Python\n", + "- R\n", + "- Deep learning\n", + "- Computer vision\n", + "\n", + "Languages:\n", + "- English (Professional Working Proficiency)\n", + "- Urdu (Native or Bilingual Proficiency)\n", + "- Punjabi (Native or Bilingual Proficiency)\n", + "\n", + "References: Available upon request" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(Markdown(f\"Text: {output}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}