Lauredecaudin commited on
Commit
d3747c9
1 Parent(s): b024450

Update pages/4-Create your own bot (advanced).py

Browse files
pages/4-Create your own bot (advanced).py CHANGED
@@ -87,77 +87,93 @@ def developer_guide():
87
  # Call the function to display the developer guide page
88
  #developer_guide()
89
  import streamlit as st
90
- from transformers import RagRetriever, RagSequenceForGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
91
- from PyPDF2 import PdfReader
92
- import torch
93
-
94
- # Load the tokenizer and the custom model (GPT-Neo-125M)
95
- @st.cache_resource
96
- def load_gpt_neo_rag():
97
- tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
98
-
99
- # Load GPT-Neo as the generator
100
- custom_generator = AutoModelForSeq2SeqLM.from_pretrained("EleutherAI/gpt-neo-125M")
101
-
102
- # Initialize RAG retriever
103
- retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
104
-
105
- # Initialize RAG with GPT-Neo as the generator
106
- rag_model = RagSequenceForGeneration.from_pretrained(
107
- "facebook/rag-token-nq", retriever=retriever, generator=custom_generator
108
- )
109
-
110
- return tokenizer, rag_model
111
-
112
- tokenizer, rag_model = load_gpt_neo_rag()
113
-
114
- # Function to read resume PDF
115
  def read_pdf(file):
116
- pdf_reader = PdfReader(file)
117
  text = ""
118
  for page in pdf_reader.pages:
119
  text += page.extract_text()
120
  return text
121
 
122
- # Function to generate a contextualized answer using RAG with GPT-Neo
123
- def generate_answer(question, resume_text, name="The candidate"):
124
- """
125
- Uses RAG with GPT-Neo to generate answers based on the resume.
126
- """
127
- # Add context instruction to guide the model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  context_instruction = (
129
  f"You are {name}, and your professional experience is outlined in the following resume. "
130
  "Answer the question as if you are the candidate, providing details from the resume where relevant."
131
  )
132
 
133
- # Combine the question with the context instruction
134
- full_question = f"{context_instruction} Question: {question}"
 
135
 
136
- # Tokenize the input
137
- inputs = tokenizer(full_question, resume_text, return_tensors="pt", truncation=True, padding="longest")
 
 
138
 
139
- # Generate the response
140
- outputs = rag_model.generate(**inputs)
141
 
142
- # Decode the generated response
143
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
144
 
 
145
  return answer
146
 
147
  # Streamlit app UI
148
- st.title("Resume-based Q&A Bot (RAG with GPT-Neo)")
149
 
150
  st.write("Upload your resume and ask questions about your professional experience!")
151
 
152
  # File uploader for the resume
153
  uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
154
 
155
- # If a file is uploaded, extract the text
156
  if uploaded_file is not None:
157
  resume_text = read_pdf(uploaded_file)
158
- st.write("Resume successfully uploaded!")
159
- st.write("Extracted Resume Text:")
160
- st.text(resume_text) # Display the extracted resume text for reference
 
 
 
 
161
 
162
  # Text input for questions
163
  question = st.text_input("Ask a question about the resume")
@@ -168,7 +184,7 @@ if uploaded_file is not None:
168
  # Generate and display the answer when the button is clicked
169
  if st.button("Generate Answer"):
170
  if question:
171
- answer = generate_answer(question, resume_text, candidate_name)
172
  st.write("Answer:")
173
  st.write(answer)
174
  else:
 
87
  # Call the function to display the developer guide page
88
  #developer_guide()
89
  import streamlit as st
90
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
91
+ from langchain_community.document_loaders import TextLoader
92
+ from langchain_community.vectorstores import FAISS
93
+ from langchain_core.prompts import ChatPromptTemplate
94
+ from langchain_core.output_parsers import StrOutputParser
95
+ from langchain_together import TogetherEmbeddings
96
+ from langchain_community.llms import Together
97
+ import PyPDF2
98
+ import os
99
+
100
+ # Function to read text from PDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def read_pdf(file):
102
+ pdf_reader = PyPDF2.PdfReader(file)
103
  text = ""
104
  for page in pdf_reader.pages:
105
  text += page.extract_text()
106
  return text
107
 
108
+ # Load and split resume data
109
+ def load_and_split_resume(text):
110
+ documents = [text] # Wrapping text in a list to be consistent with TextLoader input
111
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
112
+ docs = text_splitter.split_documents(documents)
113
+ return docs
114
+
115
+ # Create vector store and retriever
116
+ def setup_vector_store(docs):
117
+ vectorstore = FAISS.from_documents(docs, TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval"))
118
+ retriever = vectorstore.as_retriever()
119
+ return retriever
120
+
121
+ # Set up language model
122
+ def setup_model():
123
+ model = Together(
124
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
125
+ temperature=0.0,
126
+ max_tokens=500,
127
+ top_k=0
128
+ )
129
+ return model
130
+
131
+ # Generate answer based on context and question
132
+ def generate_answer(question, retriever, model, name="The candidate"):
133
  context_instruction = (
134
  f"You are {name}, and your professional experience is outlined in the following resume. "
135
  "Answer the question as if you are the candidate, providing details from the resume where relevant."
136
  )
137
 
138
+ # Retrieve relevant documents
139
+ context_docs = retriever.retrieve(question)
140
+ context = " ".join([doc.page_content for doc in context_docs])
141
 
142
+ # Prepare the prompt
143
+ template = """<s>[INST] answer from context only as if the person is responding (use 'I' instead of 'you' in response). Always answer in short. If asked about greeting, greet back.
144
+ {context}
145
+ Question: {question} [/INST]"""
146
 
147
+ prompt = ChatPromptTemplate.from_template(template)
 
148
 
149
+ # Create the chain with the retriever, prompt, and model
150
+ chain = (
151
+ {"context": context, "question": question}
152
+ | prompt
153
+ | model
154
+ | StrOutputParser()
155
+ )
156
 
157
+ answer = chain.invoke()
158
  return answer
159
 
160
  # Streamlit app UI
161
+ st.title("Resume-based Q&A Bot (Streamlit with Together)")
162
 
163
  st.write("Upload your resume and ask questions about your professional experience!")
164
 
165
  # File uploader for the resume
166
  uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
167
 
 
168
  if uploaded_file is not None:
169
  resume_text = read_pdf(uploaded_file)
170
+
171
+ # Load and process the resume
172
+ docs = load_and_split_resume(resume_text)
173
+ retriever = setup_vector_store(docs)
174
+ model = setup_model()
175
+
176
+ st.write("Resume successfully uploaded and processed!")
177
 
178
  # Text input for questions
179
  question = st.text_input("Ask a question about the resume")
 
184
  # Generate and display the answer when the button is clicked
185
  if st.button("Generate Answer"):
186
  if question:
187
+ answer = generate_answer(question, retriever, model, candidate_name)
188
  st.write("Answer:")
189
  st.write(answer)
190
  else: