Spaces:
Running
Running
Lauredecaudin
commited on
Commit
•
b024450
1
Parent(s):
2ac107e
Update pages/4-Create your own bot (advanced).py
Browse files
pages/4-Create your own bot (advanced).py
CHANGED
@@ -86,60 +86,92 @@ def developer_guide():
|
|
86 |
|
87 |
# Call the function to display the developer guide page
|
88 |
#developer_guide()
|
89 |
-
|
90 |
import streamlit as st
|
91 |
-
import
|
92 |
-
from
|
93 |
import torch
|
94 |
|
95 |
-
#
|
96 |
-
MODEL_NAME = "facebook/rag-sequence-nq"
|
97 |
-
|
98 |
@st.cache_resource
|
99 |
-
def
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
|
105 |
|
106 |
-
|
107 |
-
|
|
|
108 |
text = ""
|
109 |
-
for
|
110 |
-
page = pdf_reader.pages[page_num]
|
111 |
text += page.extract_text()
|
112 |
return text
|
113 |
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
116 |
context_instruction = (
|
117 |
-
f"
|
118 |
"Answer the question as if you are the candidate, providing details from the resume where relevant."
|
119 |
)
|
120 |
-
|
121 |
# Combine the question with the context instruction
|
122 |
full_question = f"{context_instruction} Question: {question}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
# Streamlit app layout
|
128 |
-
st.title("Resume-based Q&A Bot")
|
129 |
-
|
130 |
-
st.write("Upload your resume as a PDF, and the bot will answer questions about your professional experiences.")
|
131 |
|
132 |
-
|
133 |
|
134 |
-
|
135 |
-
|
136 |
-
resume_text = extract_text_from_pdf(uploaded_file)
|
137 |
-
#st.text_area("Extracted Resume Text", value=resume_text, height=250)
|
138 |
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
# Call the function to display the developer guide page
|
88 |
#developer_guide()
|
|
|
89 |
import streamlit as st
|
90 |
+
from transformers import RagRetriever, RagSequenceForGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
|
91 |
+
from PyPDF2 import PdfReader
|
92 |
import torch
|
93 |
|
94 |
+
# Load the tokenizer and the custom model (GPT-Neo-125M)
|
|
|
|
|
95 |
@st.cache_resource
|
96 |
+
def load_gpt_neo_rag():
|
97 |
+
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
|
98 |
+
|
99 |
+
# Load GPT-Neo as the generator
|
100 |
+
custom_generator = AutoModelForSeq2SeqLM.from_pretrained("EleutherAI/gpt-neo-125M")
|
101 |
+
|
102 |
+
# Initialize RAG retriever
|
103 |
+
retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
|
104 |
+
|
105 |
+
# Initialize RAG with GPT-Neo as the generator
|
106 |
+
rag_model = RagSequenceForGeneration.from_pretrained(
|
107 |
+
"facebook/rag-token-nq", retriever=retriever, generator=custom_generator
|
108 |
+
)
|
109 |
+
|
110 |
+
return tokenizer, rag_model
|
111 |
|
112 |
+
tokenizer, rag_model = load_gpt_neo_rag()
|
113 |
|
114 |
+
# Function to read resume PDF
|
115 |
+
def read_pdf(file):
|
116 |
+
pdf_reader = PdfReader(file)
|
117 |
text = ""
|
118 |
+
for page in pdf_reader.pages:
|
|
|
119 |
text += page.extract_text()
|
120 |
return text
|
121 |
|
122 |
+
# Function to generate a contextualized answer using RAG with GPT-Neo
|
123 |
+
def generate_answer(question, resume_text, name="The candidate"):
|
124 |
+
"""
|
125 |
+
Uses RAG with GPT-Neo to generate answers based on the resume.
|
126 |
+
"""
|
127 |
+
# Add context instruction to guide the model
|
128 |
context_instruction = (
|
129 |
+
f"You are {name}, and your professional experience is outlined in the following resume. "
|
130 |
"Answer the question as if you are the candidate, providing details from the resume where relevant."
|
131 |
)
|
132 |
+
|
133 |
# Combine the question with the context instruction
|
134 |
full_question = f"{context_instruction} Question: {question}"
|
135 |
+
|
136 |
+
# Tokenize the input
|
137 |
+
inputs = tokenizer(full_question, resume_text, return_tensors="pt", truncation=True, padding="longest")
|
138 |
+
|
139 |
+
# Generate the response
|
140 |
+
outputs = rag_model.generate(**inputs)
|
141 |
+
|
142 |
+
# Decode the generated response
|
143 |
+
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
144 |
+
|
145 |
+
return answer
|
146 |
|
147 |
+
# Streamlit app UI
|
148 |
+
st.title("Resume-based Q&A Bot (RAG with GPT-Neo)")
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
st.write("Upload your resume and ask questions about your professional experience!")
|
151 |
|
152 |
+
# File uploader for the resume
|
153 |
+
uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
|
|
|
|
|
154 |
|
155 |
+
# If a file is uploaded, extract the text
|
156 |
+
if uploaded_file is not None:
|
157 |
+
resume_text = read_pdf(uploaded_file)
|
158 |
+
st.write("Resume successfully uploaded!")
|
159 |
+
st.write("Extracted Resume Text:")
|
160 |
+
st.text(resume_text) # Display the extracted resume text for reference
|
161 |
|
162 |
+
# Text input for questions
|
163 |
+
question = st.text_input("Ask a question about the resume")
|
164 |
+
|
165 |
+
# Name input for the person in the resume
|
166 |
+
candidate_name = st.text_input("Enter the candidate's name (optional)", "The candidate")
|
167 |
+
|
168 |
+
# Generate and display the answer when the button is clicked
|
169 |
+
if st.button("Generate Answer"):
|
170 |
+
if question:
|
171 |
+
answer = generate_answer(question, resume_text, candidate_name)
|
172 |
+
st.write("Answer:")
|
173 |
+
st.write(answer)
|
174 |
+
else:
|
175 |
+
st.write("Please enter a question.")
|
176 |
+
else:
|
177 |
+
st.write("Please upload a PDF resume to get started.")
|