|
import re |
|
import streamlit as st |
|
from PyPDF2 import PdfReader |
|
from dotenv import load_dotenv |
|
from FindKeyword import FindKeyWords |
|
from PreprocessText import preprocess_text |
|
from model_Responce import model_prediction |
|
from streamlit_extras.add_vertical_space import add_vertical_space |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings |
|
from langchain.vectorstores import FAISS |
|
|
|
|
|
|
|
from htmlTemplates import css, bot_template, user_template |
|
from InstructorEmbedding import INSTRUCTOR |
|
import numpy as np |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
def get_text_chunks(text): |
|
text_splitter = CharacterTextSplitter( |
|
separator="\n", |
|
chunk_size=1000, |
|
chunk_overlap=200, |
|
length_function=len |
|
) |
|
chunks = text_splitter.split_text(text) |
|
return chunks |
|
|
|
|
|
def encode_question(question): |
|
embeddings = HuggingFaceInstructEmbeddings() |
|
question_vector = embeddings.embed_query(question) |
|
return question_vector |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_vector_store(text_chunks): |
|
|
|
|
|
|
|
embeddings = HuggingFaceInstructEmbeddings() |
|
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) |
|
new_db = FAISS.load_local("faiss_index_V2", embeddings) |
|
new_db.merge_from(vectorstore) |
|
new_db.save_local('faiss_index_V2') |
|
|
|
return st.write("vector Store is Saved") |
|
|
|
def button_function(all_text): |
|
|
|
|
|
for item in all_text: |
|
text = item['text'] |
|
|
|
pred = model_prediction(text) |
|
|
|
item['prediction'] = pred |
|
return all_text |
|
|
|
def get_pdf_text(pdfs,preprocess=True): |
|
if preprocess: |
|
all_text = [] |
|
for pdf in pdfs: |
|
|
|
|
|
pdf_reader = PdfReader(pdf) |
|
|
|
|
|
filename = pdf.name |
|
|
|
text = "" |
|
|
|
for page in pdf_reader.pages: |
|
|
|
text += page.extract_text() |
|
|
|
text = preprocess_text(text) |
|
|
|
all_text.append({"filename": filename, "text": text}) |
|
return all_text |
|
|
|
else: |
|
text = "" |
|
for pdf in pdfs: |
|
|
|
|
|
pdf_reader = PdfReader(pdf) |
|
|
|
|
|
for page in pdf_reader.pages: |
|
|
|
text += page.extract_text() |
|
|
|
|
|
return text |
|
|
|
def filter_keywords(all_text, keywords): |
|
filtered_text = [] |
|
for item in all_text: |
|
filename = item['filename'] |
|
text = item['text'] |
|
filtered_text_with_keywords = FindKeyWords(keywords, text) |
|
filtered_text.append({"filename": filename, "text": filtered_text_with_keywords}) |
|
return filtered_text |
|
|
|
|
|
|
|
def main(): |
|
|
|
load_dotenv() |
|
st.header("Resume Filter using Keywords π¬") |
|
|
|
|
|
with st.sidebar: |
|
st.title('π€π¬ LLM Chat App') |
|
|
|
pdfs = st.file_uploader("Upload your Resumes", type='pdf',accept_multiple_files=True) |
|
|
|
|
|
|
|
|
|
|
|
functionality = st.radio("Choose functionality:", ("Make Predictions", "Filter Keywords","Predict the Suitable canditate","Ask Questions")) |
|
if functionality == "Ask Questions": |
|
if st.button('Process'): |
|
with st.spinner("Processing"): |
|
|
|
raw_text = get_pdf_text(pdfs, preprocess=False) |
|
|
|
|
|
text_chunks = get_text_chunks(raw_text) |
|
|
|
|
|
save_vector_store(text_chunks) |
|
add_vertical_space(5) |
|
st.write('Made with β€οΈ by Fazni Farook') |
|
|
|
|
|
if pdfs is not None: |
|
all_text = get_pdf_text(pdfs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if functionality == "Make Predictions": |
|
if st.button('Make Prediction'): |
|
with st.spinner("Progressing"): |
|
all_text = button_function(all_text) |
|
|
|
for item in all_text: |
|
filename = item["filename"] |
|
text = item["text"] |
|
pred = item["prediction"] |
|
st.markdown(f"**Filename: {filename}**") |
|
|
|
st.markdown(f"**Prediction: {pred}**") |
|
st.markdown("---") |
|
|
|
elif functionality == "Filter Keywords": |
|
|
|
keyword_input = st.text_input("Keyword") |
|
keywords = [keyword.strip() for keyword in keyword_input.split(",")] |
|
|
|
if st.button('Filter Keywords'): |
|
with st.spinner("Progressing"): |
|
filtered_text = filter_keywords(all_text, keywords) |
|
|
|
for item in filtered_text: |
|
filename = item["filename"] |
|
text = item["text"] |
|
st.markdown(f"**Filename: {filename}**") |
|
st.markdown(text, unsafe_allow_html=True) |
|
st.markdown("---") |
|
|
|
elif functionality == "Predict the Suitable canditate": |
|
|
|
keyword = st.text_input("Keyword") |
|
|
|
if st.button('Filter Resumes'): |
|
with st.spinner("Progressing"): |
|
all_text = button_function(all_text) |
|
|
|
count = 0 |
|
for item in all_text: |
|
filename = item["filename"] |
|
prediction = item["prediction"] |
|
if keyword.lower()==prediction.lower(): |
|
count+=1 |
|
st.markdown(f"**Filename: {filename}**") |
|
st.markdown(prediction, unsafe_allow_html=True) |
|
st.markdown("---") |
|
|
|
if count==0: |
|
st.markdown("No match found") |
|
|
|
elif functionality == "Ask Questions": |
|
|
|
embeddings = HuggingFaceInstructEmbeddings() |
|
|
|
new_db = FAISS.load_local("faiss_index_V2", embeddings) |
|
|
|
st.write(css,unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
|
question = st.text_input("Ask Question") |
|
|
|
if st.button('Ask Question'): |
|
with st.spinner("Processing"): |
|
if question: |
|
|
|
question_vector = encode_question(question) |
|
|
|
|
|
output = new_db.similarity_search_by_vector(question_vector) |
|
page_content = output[0].page_content |
|
st.write(page_content) |
|
|
|
if __name__=='__main__': |
|
main() |