Spaces:
Running
Running
Lauredecaudin
commited on
Commit
•
d3747c9
1
Parent(s):
b024450
Update pages/4-Create your own bot (advanced).py
Browse files
pages/4-Create your own bot (advanced).py
CHANGED
@@ -87,77 +87,93 @@ def developer_guide():
|
|
87 |
# Call the function to display the developer guide page
|
88 |
#developer_guide()
|
89 |
import streamlit as st
|
90 |
-
from
|
91 |
-
from
|
92 |
-
import
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
# Initialize RAG retriever
|
103 |
-
retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
|
104 |
-
|
105 |
-
# Initialize RAG with GPT-Neo as the generator
|
106 |
-
rag_model = RagSequenceForGeneration.from_pretrained(
|
107 |
-
"facebook/rag-token-nq", retriever=retriever, generator=custom_generator
|
108 |
-
)
|
109 |
-
|
110 |
-
return tokenizer, rag_model
|
111 |
-
|
112 |
-
tokenizer, rag_model = load_gpt_neo_rag()
|
113 |
-
|
114 |
-
# Function to read resume PDF
|
115 |
def read_pdf(file):
|
116 |
-
pdf_reader = PdfReader(file)
|
117 |
text = ""
|
118 |
for page in pdf_reader.pages:
|
119 |
text += page.extract_text()
|
120 |
return text
|
121 |
|
122 |
-
#
|
123 |
-
def
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
context_instruction = (
|
129 |
f"You are {name}, and your professional experience is outlined in the following resume. "
|
130 |
"Answer the question as if you are the candidate, providing details from the resume where relevant."
|
131 |
)
|
132 |
|
133 |
-
#
|
134 |
-
|
|
|
135 |
|
136 |
-
#
|
137 |
-
|
|
|
|
|
138 |
|
139 |
-
|
140 |
-
outputs = rag_model.generate(**inputs)
|
141 |
|
142 |
-
#
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
144 |
|
|
|
145 |
return answer
|
146 |
|
147 |
# Streamlit app UI
|
148 |
-
st.title("Resume-based Q&A Bot (
|
149 |
|
150 |
st.write("Upload your resume and ask questions about your professional experience!")
|
151 |
|
152 |
# File uploader for the resume
|
153 |
uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
|
154 |
|
155 |
-
# If a file is uploaded, extract the text
|
156 |
if uploaded_file is not None:
|
157 |
resume_text = read_pdf(uploaded_file)
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
161 |
|
162 |
# Text input for questions
|
163 |
question = st.text_input("Ask a question about the resume")
|
@@ -168,7 +184,7 @@ if uploaded_file is not None:
|
|
168 |
# Generate and display the answer when the button is clicked
|
169 |
if st.button("Generate Answer"):
|
170 |
if question:
|
171 |
-
answer = generate_answer(question,
|
172 |
st.write("Answer:")
|
173 |
st.write(answer)
|
174 |
else:
|
|
|
87 |
# Call the function to display the developer guide page
|
88 |
#developer_guide()
|
89 |
import streamlit as st
|
90 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
91 |
+
from langchain_community.document_loaders import TextLoader
|
92 |
+
from langchain_community.vectorstores import FAISS
|
93 |
+
from langchain_core.prompts import ChatPromptTemplate
|
94 |
+
from langchain_core.output_parsers import StrOutputParser
|
95 |
+
from langchain_together import TogetherEmbeddings
|
96 |
+
from langchain_community.llms import Together
|
97 |
+
import PyPDF2
|
98 |
+
import os
|
99 |
+
|
100 |
+
# Function to read text from PDF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
def read_pdf(file):
|
102 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
103 |
text = ""
|
104 |
for page in pdf_reader.pages:
|
105 |
text += page.extract_text()
|
106 |
return text
|
107 |
|
108 |
+
# Load and split resume data
|
109 |
+
def load_and_split_resume(text):
|
110 |
+
documents = [text] # Wrapping text in a list to be consistent with TextLoader input
|
111 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
112 |
+
docs = text_splitter.split_documents(documents)
|
113 |
+
return docs
|
114 |
+
|
115 |
+
# Create vector store and retriever
|
116 |
+
def setup_vector_store(docs):
|
117 |
+
vectorstore = FAISS.from_documents(docs, TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval"))
|
118 |
+
retriever = vectorstore.as_retriever()
|
119 |
+
return retriever
|
120 |
+
|
121 |
+
# Set up language model
|
122 |
+
def setup_model():
|
123 |
+
model = Together(
|
124 |
+
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
125 |
+
temperature=0.0,
|
126 |
+
max_tokens=500,
|
127 |
+
top_k=0
|
128 |
+
)
|
129 |
+
return model
|
130 |
+
|
131 |
+
# Generate answer based on context and question
|
132 |
+
def generate_answer(question, retriever, model, name="The candidate"):
|
133 |
context_instruction = (
|
134 |
f"You are {name}, and your professional experience is outlined in the following resume. "
|
135 |
"Answer the question as if you are the candidate, providing details from the resume where relevant."
|
136 |
)
|
137 |
|
138 |
+
# Retrieve relevant documents
|
139 |
+
context_docs = retriever.retrieve(question)
|
140 |
+
context = " ".join([doc.page_content for doc in context_docs])
|
141 |
|
142 |
+
# Prepare the prompt
|
143 |
+
template = """<s>[INST] answer from context only as if the person is responding (use 'I' instead of 'you' in response). Always answer in short. If asked about greeting, greet back.
|
144 |
+
{context}
|
145 |
+
Question: {question} [/INST]"""
|
146 |
|
147 |
+
prompt = ChatPromptTemplate.from_template(template)
|
|
|
148 |
|
149 |
+
# Create the chain with the retriever, prompt, and model
|
150 |
+
chain = (
|
151 |
+
{"context": context, "question": question}
|
152 |
+
| prompt
|
153 |
+
| model
|
154 |
+
| StrOutputParser()
|
155 |
+
)
|
156 |
|
157 |
+
answer = chain.invoke()
|
158 |
return answer
|
159 |
|
160 |
# Streamlit app UI
|
161 |
+
st.title("Resume-based Q&A Bot (Streamlit with Together)")
|
162 |
|
163 |
st.write("Upload your resume and ask questions about your professional experience!")
|
164 |
|
165 |
# File uploader for the resume
|
166 |
uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
|
167 |
|
|
|
168 |
if uploaded_file is not None:
|
169 |
resume_text = read_pdf(uploaded_file)
|
170 |
+
|
171 |
+
# Load and process the resume
|
172 |
+
docs = load_and_split_resume(resume_text)
|
173 |
+
retriever = setup_vector_store(docs)
|
174 |
+
model = setup_model()
|
175 |
+
|
176 |
+
st.write("Resume successfully uploaded and processed!")
|
177 |
|
178 |
# Text input for questions
|
179 |
question = st.text_input("Ask a question about the resume")
|
|
|
184 |
# Generate and display the answer when the button is clicked
|
185 |
if st.button("Generate Answer"):
|
186 |
if question:
|
187 |
+
answer = generate_answer(question, retriever, model, candidate_name)
|
188 |
st.write("Answer:")
|
189 |
st.write(answer)
|
190 |
else:
|