Spaces:

Lauredecaudin
/

resume_guide

Running

App Files Files Community

Lauredecaudin commited on 2 days ago

Commit

d3747c9

•

1 Parent(s): b024450

Update pages/4-Create your own bot (advanced).py

Browse files

Files changed (1) hide show

pages/4-Create your own bot (advanced).py +62 -46

pages/4-Create your own bot (advanced).py CHANGED Viewed

@@ -87,77 +87,93 @@ def developer_guide():
 # Call the function to display the developer guide page
 #developer_guide()
 import streamlit as st
-from transformers import RagRetriever, RagSequenceForGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
-from PyPDF2 import PdfReader
-import torch
-# Load the tokenizer and the custom model (GPT-Neo-125M)
-@st.cache_resource
-def load_gpt_neo_rag():
-    tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")
-    # Load GPT-Neo as the generator
-    custom_generator = AutoModelForSeq2SeqLM.from_pretrained("EleutherAI/gpt-neo-125M")
-    # Initialize RAG retriever
-    retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="exact", use_dummy_dataset=True)
-    # Initialize RAG with GPT-Neo as the generator
-    rag_model = RagSequenceForGeneration.from_pretrained(
-        "facebook/rag-token-nq", retriever=retriever, generator=custom_generator
-    )
-    return tokenizer, rag_model
-tokenizer, rag_model = load_gpt_neo_rag()
-# Function to read resume PDF
 def read_pdf(file):
-    pdf_reader = PdfReader(file)
     text = ""
     for page in pdf_reader.pages:
         text += page.extract_text()
     return text
-# Function to generate a contextualized answer using RAG with GPT-Neo
-def generate_answer(question, resume_text, name="The candidate"):
-    """
-    Uses RAG with GPT-Neo to generate answers based on the resume.
-    """
-    # Add context instruction to guide the model
     context_instruction = (
         f"You are {name}, and your professional experience is outlined in the following resume. "
         "Answer the question as if you are the candidate, providing details from the resume where relevant."
     )
-    # Combine the question with the context instruction
-    full_question = f"{context_instruction} Question: {question}"
-    # Tokenize the input
-    inputs = tokenizer(full_question, resume_text, return_tensors="pt", truncation=True, padding="longest")
-    # Generate the response
-    outputs = rag_model.generate(**inputs)
-    # Decode the generated response
-    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return answer
 # Streamlit app UI
-st.title("Resume-based Q&A Bot (RAG with GPT-Neo)")
 st.write("Upload your resume and ask questions about your professional experience!")
 # File uploader for the resume
 uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
-# If a file is uploaded, extract the text
 if uploaded_file is not None:
     resume_text = read_pdf(uploaded_file)
-    st.write("Resume successfully uploaded!")
-    st.write("Extracted Resume Text:")
-    st.text(resume_text)  # Display the extracted resume text for reference
     # Text input for questions
     question = st.text_input("Ask a question about the resume")
@@ -168,7 +184,7 @@ if uploaded_file is not None:
     # Generate and display the answer when the button is clicked
     if st.button("Generate Answer"):
         if question:
-            answer = generate_answer(question, resume_text, candidate_name)
             st.write("Answer:")
             st.write(answer)
         else:

 # Call the function to display the developer guide page
 #developer_guide()
 import streamlit as st
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import TextLoader
+from langchain_community.vectorstores import FAISS
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_together import TogetherEmbeddings
+from langchain_community.llms import Together
+import PyPDF2
+import os
+# Function to read text from PDF
 def read_pdf(file):
+    pdf_reader = PyPDF2.PdfReader(file)
     text = ""
     for page in pdf_reader.pages:
         text += page.extract_text()
     return text
+# Load and split resume data
+def load_and_split_resume(text):
+    documents = [text]  # Wrapping text in a list to be consistent with TextLoader input
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    docs = text_splitter.split_documents(documents)
+    return docs
+# Create vector store and retriever
+def setup_vector_store(docs):
+    vectorstore = FAISS.from_documents(docs, TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval"))
+    retriever = vectorstore.as_retriever()
+    return retriever
+# Set up language model
+def setup_model():
+    model = Together(
+        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+        temperature=0.0,
+        max_tokens=500,
+        top_k=0
+    )
+    return model
+# Generate answer based on context and question
+def generate_answer(question, retriever, model, name="The candidate"):
     context_instruction = (
         f"You are {name}, and your professional experience is outlined in the following resume. "
         "Answer the question as if you are the candidate, providing details from the resume where relevant."
     )
+    # Retrieve relevant documents
+    context_docs = retriever.retrieve(question)
+    context = " ".join([doc.page_content for doc in context_docs])
+    # Prepare the prompt
+    template = """<s>[INST] answer from context only as if the person is responding (use 'I' instead of 'you' in response). Always answer in short. If asked about greeting, greet back.
+    {context}
+    Question: {question} [/INST]"""
+    prompt = ChatPromptTemplate.from_template(template)
+    # Create the chain with the retriever, prompt, and model
+    chain = (
+        {"context": context, "question": question}
+        | prompt
+        | model
+        | StrOutputParser()
+    )
+    answer = chain.invoke()
     return answer
 # Streamlit app UI
+st.title("Resume-based Q&A Bot (Streamlit with Together)")
 st.write("Upload your resume and ask questions about your professional experience!")
 # File uploader for the resume
 uploaded_file = st.file_uploader("Upload your resume (PDF format)", type=["pdf"])
 if uploaded_file is not None:
     resume_text = read_pdf(uploaded_file)
+    # Load and process the resume
+    docs = load_and_split_resume(resume_text)
+    retriever = setup_vector_store(docs)
+    model = setup_model()
+    st.write("Resume successfully uploaded and processed!")
     # Text input for questions
     question = st.text_input("Ask a question about the resume")
     # Generate and display the answer when the button is clicked
     if st.button("Generate Answer"):
         if question:
+            answer = generate_answer(question, retriever, model, candidate_name)
             st.write("Answer:")
             st.write(answer)
         else: