WorkGenius / app.py
mbosse99's picture
Updated GPT Engine
03d5dfe verified
import streamlit as st
from streamlit_js_eval import streamlit_js_eval
from azure.storage.blob import BlobServiceClient
from azure.cosmos import CosmosClient, exceptions
from PyPDF2 import PdfReader
import io
import openai
import json
import os
import uuid
import time
import calendar
import re
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_base = "https://tensora-oai-france.openai.azure.com/"
openai.api_type = "azure"
openai.api_version = "2023-12-01-preview"
connection_string = os.getenv("CONNECTION")
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
def upload_blob(pdf_name, json_data, pdf_data_jobdescription,pdf_data_cvs, pre_generated_bool, custom_questions):
try:
container_name = "jobdescriptions"
# json_blob_name = f"{pdf_name}_jsondata.json"
pdf_blob_name_jobdescription = f"{pdf_name}.pdf"
container_client = blob_service_client.get_container_client(container_name)
# json_blob_client = container_client.get_blob_client(json_blob_name)
# json_blob_client.upload_blob(json_data.encode('utf-8'), overwrite=True)
pdf_blob_client = container_client.get_blob_client(pdf_blob_name_jobdescription)
pdf_blob_client.upload_blob(pdf_data_jobdescription, overwrite=True)
upload_job_db_item(pdf_name,len(pdf_data_cvs),json.loads(json_data),pre_generated_bool, custom_questions)
if pre_generated_bool:
for i,question in enumerate(custom_questions):
question_nr_for_id = i+1
question_id = pdf_name + "-question-nr-" + str(question_nr_for_id)+str(calendar.timegm(time.gmtime()))
upload_question_db_item(question_id, pdf_name, question,st.session_state["pdf_data_jobdescription_string"])
links = []
names = []
for i,cv in enumerate(pdf_data_cvs):
cv_nr_for_id = i+1
cv_session_state_string = "cv-"+str(cv_nr_for_id)
session_state_name = st.session_state[cv_session_state_string]
names.append(session_state_name)
cv_id = pdf_name + "-cv-nr-" + str(cv_nr_for_id)+str(calendar.timegm(time.gmtime()))
upload_db_item(session_state_name, json.loads(json_data), pdf_name, cv_id)
pdf_blob_name_cv = f"{cv_id}.pdf"
pdf_blob_client = container_client.get_blob_client(pdf_blob_name_cv)
pdf_blob_client.upload_blob(pdf_data_cvs[i], overwrite=True)
links.append("https://tensora.ai/workgenius/cv-evaluation2/?job="+cv_id)
st.success('Data and PDF files have been successfully uploaded. The link to the chatbot for the potential candidate is the following: ')
for i,link in enumerate(links):
st.write("Link for the candidate "+names[i]+": ")
st.write(link)
return True
except Exception as e:
print(f"Fehler beim Hochladen der Daten: {str(e)}")
return False
def upload_job_db_item(id, number_of_applicants, data, pre_generated_bool, custom_questions):
endpoint = "https://wg-candidate-data.documents.azure.com:443/"
key = os.getenv("CONNECTION_DB")
client = CosmosClient(endpoint, key)
database = client.get_database_client("ToDoList")
container = database.get_container_client("JobData")
job_item = {
"id": id,
'partitionKey' : 'wg-job-data-v1',
"title": data["title"],
"number_of_applicants": number_of_applicants,
"every_interview_conducted": False,
"evaluation_email": data["email"],
"question_one": data["question_one"],
"question_two": data["question_two"],
"question_three": data["question_three"],
"pre_generated": pre_generated_bool,
"custom_questions": custom_questions
}
try:
# Fügen Sie das Element in den Container ein
container.create_item(body=job_item)
print("Eintrag erfolgreich in die Cosmos DB eingefügt. Container: Job Data")
except exceptions.CosmosHttpResponseError as e:
print(f"Fehler beim Schreiben in die Cosmos DB: {str(e)}")
except Exception as e:
print(f"Allgemeiner Fehler: {str(e)}")
def upload_db_item(name, data, job_description_id, cv_id):
endpoint = "https://wg-candidate-data.documents.azure.com:443/"
key = os.getenv("CONNECTION_DB")
client = CosmosClient(endpoint, key)
database = client.get_database_client("ToDoList")
container = database.get_container_client("Items")
candidate_item = {
"id": cv_id,
'partitionKey' : 'wg-candidate-data-v1',
"name": name,
"title": data["title"],
"interview_conducted": False,
"ai_summary": "",
"evaluation_email": data["email"],
"question_one": data["question_one"],
"question_two": data["question_two"],
"question_three": data["question_three"],
"job_description_id": job_description_id,
}
try:
# Fügen Sie das Element in den Container ein
container.create_item(body=candidate_item)
print("Eintrag erfolgreich in die Cosmos DB eingefügt. Container: Items(candidate Data)")
except exceptions.CosmosHttpResponseError as e:
print(f"Fehler beim Schreiben in die Cosmos DB: {str(e)}")
except Exception as e:
print(f"Allgemeiner Fehler: {str(e)}")
def upload_question_db_item(id, job_id, question, job_content):
endpoint = "https://wg-candidate-data.documents.azure.com:443/"
key = os.getenv("CONNECTION_DB")
client = CosmosClient(endpoint, key)
database = client.get_database_client("ToDoList")
container = database.get_container_client("Questions")
question_item = {
"id": id,
"partitionKey" : "wg-question-data-v1",
"job_id": job_id,
"question_content": question,
"job_description": job_content,
}
try:
# Fügen Sie das Element in den Container ein
container.create_item(body=question_item)
print("Eintrag erfolgreich in die Cosmos DB eingefügt. Container: Questions(Question Data)")
except exceptions.CosmosHttpResponseError as e:
print(f"Fehler beim Schreiben in die Cosmos DB: {str(e)}")
except Exception as e:
print(f"Allgemeiner Fehler: {str(e)}")
st.markdown(
"""
<style>
[data-testid=column]{
text-align: center;
display: flex;
align-items: center;
justify-content: center;
}
</style>
""",
unsafe_allow_html=True,
)
col1, col2 = st.columns([2, 1])
if "ai_questions" not in st.session_state:
st.session_state["ai_questions"] = None
if "pdf_data_cvs" not in st.session_state:
st.session_state["pdf_data_cvs"] = None
if "pdf_data_cvs_string" not in st.session_state:
st.session_state["pdf_data_cvs_string"] = None
if "pdf_data_cvs_names" not in st.session_state:
st.session_state["pdf_data_cvs_names"] = []
if "pdf_data_jobdescription" not in st.session_state:
st.session_state["pdf_data_jobdescription"] = None
if "pdf_data_jobdescription_string" not in st.session_state:
st.session_state["pdf_data_jobdescription_string"] = None
if "final_question_string" not in st.session_state:
st.session_state["final_question_string"] = []
def adjust_numbering(lst):
return [f"{i + 1}. {item.split('. ', 1)[1]}" for i, item in enumerate(lst)]
with open("sys_prompt_frontend.txt") as f:
sys_prompt = f.read()
col1.title("Job description upload")
col2.image("https://www.workgenius.com/wp-content/uploads/2023/03/WorkGenius_navy-1.svg")
st.write("Please upload the job description and resume(s) as PDF and enter the job title for the position. To receive the evaluation of the potential candidate(s), please provide your email address.")
upload_success = True
#This container represents the form
with st.container():
#Form section for the files, names, title and mail
uploaded_file_jobdescription = st.file_uploader("Upload the job description:", type=["pdf"], key="job")
job_title = st.text_input("Enter the job title:", key="title")
email = st.text_input("Enter the email:" , key="mail")
uploaded_file_cvs = st.file_uploader("Upload the resume(s):", type=["pdf"],accept_multiple_files=True, key="cvs")
for i,cv in enumerate(st.session_state["cvs"]):
st.text_input(label="Enter the name of the "+str(i+1)+". CV (File: "+cv.name+")", value=cv.name,key="cv-"+str(i+1))
#Form section for the interview mode (pre generated or not) and additional questions
if len(job_title) > 0 and len(email) > 0 and uploaded_file_jobdescription and len(uploaded_file_cvs)>0:
st.write("Activate the toggle to generate and select the questions in advance. Otherwise the questions will be generated automatically during the interview.")
if not st.session_state["pdf_data_cvs"] and not st.session_state["pdf_data_cvs_string"] and not st.session_state["pdf_data_jobdescription"] and not st.session_state["pdf_data_jobdescription_string"]:
pdf_data_jobdescription = uploaded_file_jobdescription.read()
pdf_data_jobdescription_string = ""
pdf_reader_job = PdfReader(io.BytesIO(pdf_data_jobdescription))
for page_num in range(len(pdf_reader_job.pages)):
page = pdf_reader_job.pages[page_num]
pdf_data_jobdescription_string += page.extract_text()
pdf_data_cvs = []
pdf_data_cvs_string = ""
for i,cv in enumerate(st.session_state["cvs"]):
print(cv.name)
st.session_state["pdf_data_cvs_names"].append(cv.name)
# print(cv.name)
# print(cv.size)
cv_data_bytes = cv.read()
# print(len(cv_data_bytes))
pdf_data_cvs.append(cv_data_bytes)
pdf_reader_cvs = PdfReader(io.BytesIO(cv_data_bytes))
pdf_data_cvs_string += "CV "+str(i+1)+": "
for page_num in range(len(pdf_reader_cvs.pages)):
page = pdf_reader_cvs.pages[page_num]
pdf_data_cvs_string += page.extract_text()
pdf_data_cvs_string += "\n"
st.session_state["pdf_data_cvs"] = pdf_data_cvs
st.session_state["pdf_data_cvs_string"] = pdf_data_cvs_string
st.session_state["pdf_data_jobdescription"] = pdf_data_jobdescription
st.session_state["pdf_data_jobdescription_string"] = pdf_data_jobdescription_string
if st.session_state["pdf_data_cvs"]:
pdf_data_cvs_string = ""
initial_cv_length = len(st.session_state["pdf_data_cvs"])
for i,cv in enumerate(st.session_state["cvs"]):
if cv.name not in st.session_state["pdf_data_cvs_names"]:
st.session_state["pdf_data_cvs_names"].append(cv.name)
print("At second:"+cv.name)
cv_data_bytes = cv.read()
st.session_state["pdf_data_cvs"].append(cv_data_bytes)
pdf_reader_cvs = PdfReader(io.BytesIO(cv_data_bytes))
pdf_data_cvs_string += "CV "+str(i+1+initial_cv_length)+": "
for page_num in range(len(pdf_reader_cvs.pages)):
page = pdf_reader_cvs.pages[page_num]
pdf_data_cvs_string += page.extract_text()
pdf_data_cvs_string += "\n"
st.session_state["pdf_data_cvs_string"] += pdf_data_cvs_string
for i,name in enumerate(st.session_state["pdf_data_cvs_names"]):
# print(name)
found = False
for j,cv in enumerate(st.session_state["cvs"]):
# print(cv.name)
if name == cv.name:
found = True
if not found:
print("gelöscht: "+name)
del st.session_state["pdf_data_cvs"][i]
del st.session_state["pdf_data_cvs_names"][i]
pre_generate = st.toggle("Activate to pre generate questions", key="pre_toggle")
if pre_generate:
system = sys_prompt.format(job=st.session_state["pdf_data_jobdescription_string"], resume=st.session_state["pdf_data_cvs_string"], n=15)
if not st.session_state["ai_questions"]:
try:
# st.write("The questions are generated. This may take a short moment...")
st.info("The questions are generated. This may take a short moment.", icon="ℹ️")
with st.spinner("Loading..."):
res = openai.ChatCompletion.create(
engine="gpt-4-1106",
temperature=0.2,
messages=[
{
"role": "system",
"content": system,
},
],
)
st.session_state["ai_questions"] = [item for item in res.choices[0]["message"]["content"].split("\n") if len(item) > 0]
for i,q in enumerate(res.choices[0]["message"]["content"].split("\n")):
st.session_state["disable_row_"+str(i)] = False
st.rerun()
except Exception as e:
print(f"Fehler beim generieren der Fragen: {str(e)}")
st.error("An error has occurred. Please reload the page or contact the admin.", icon="🚨")
else:
if len(st.session_state["final_question_string"]) <= 0:
for i,question in enumerate(st.session_state["ai_questions"]):
cols = st.columns([5,1])
with cols[1]:
# if st.button("Accept",use_container_width=True,key="btn_accept_row_"+str(i)):
# print("accept")
# pattern = re.compile(r"^[1-9][0-9]?\.")
# questions_length = len(st.session_state["final_question_string"])
# question_from_text_area = st.session_state["text_area_"+str(i)]
# question_to_append = str(questions_length+1)+"."+re.sub(pattern, "", question_from_text_area)
# st.session_state["final_question_string"].append(question_to_append)
# st.session_state["disable_row_"+str(i)] = True
# st.rerun()
if st.button("Delete",use_container_width=True,key="btn_del_row_"+str(i)):
print("delete")
st.session_state["ai_questions"].remove(question)
st.rerun()
with cols[0]:
st.text_area(label="Question "+str(i+1)+":",value=question,label_visibility="collapsed",key="text_area_"+str(i),disabled=st.session_state["disable_row_"+str(i)])
st.write("If you are satisfied with the questions, then accept them. You can still sort them afterwards.")
if st.button("Accept all questions",use_container_width=True,key="accept_all_questions"):
print("accept all")
for i,question in enumerate(st.session_state["ai_questions"]):
print("accept")
pattern = re.compile(r"^[1-9][0-9]?\.")
questions_length = len(st.session_state["final_question_string"])
question_from_text_area = st.session_state["text_area_"+str(i)]
question_to_append = str(questions_length+1)+"."+re.sub(pattern, "", question_from_text_area)
st.session_state["final_question_string"].append(question_to_append)
st.session_state["disable_row_"+str(i)] = True
st.rerun()
for i,final_q in enumerate(st.session_state["final_question_string"]):
cols_final = st.columns([5,1])
with cols_final[1]:
if st.button("Up",use_container_width=True,key="btn_up_row_"+str(i),disabled=True if i == 0 else False):
if i > 0:
# Tausche das aktuelle Element mit dem vorherigen Element
st.session_state.final_question_string[i], st.session_state.final_question_string[i - 1] = \
st.session_state.final_question_string[i - 1], st.session_state.final_question_string[i]
st.session_state.final_question_string = adjust_numbering(st.session_state.final_question_string)
st.rerun()
if st.button("Down",use_container_width=True,key="btn_down_row_"+str(i), disabled=True if i == len(st.session_state["final_question_string"])-1 else False):
if i < len(st.session_state.final_question_string) - 1:
# Tausche das aktuelle Element mit dem nächsten Element
st.session_state.final_question_string[i], st.session_state.final_question_string[i + 1] = \
st.session_state.final_question_string[i + 1], st.session_state.final_question_string[i]
st.session_state.final_question_string = adjust_numbering(st.session_state.final_question_string)
st.rerun()
with cols_final[0]:
st.write(final_q)
else:
with st.expander("Enter up to three predefined questions if needed. Otherwise leave it blank:"):
question_one = st.text_input("Enter the first question:")
question_two = st.text_input("Enter the second question:")
question_three = st.text_input("Enter the third question:")
#Form section for Submit and Clear
col_submit_btn, col_empty, col_clear_btn = st.columns([1,4, 1])
if col_clear_btn.button("Clear " ,use_container_width=True):
streamlit_js_eval(js_expressions="parent.window.location.reload()")
#Code to handle the input
if col_submit_btn.button("Submit", use_container_width=True):
if len(job_title) > 0 and len(email) > 0 and uploaded_file_jobdescription and len(uploaded_file_cvs)>0:
data = {
"title": job_title,
"email": email,
"question_one": "",
"question_two": "",
"question_three": "",
}
if not st.session_state["pre_toggle"]:
if question_one:
data["question_one"] = question_one
if question_two:
data["question_two"] = question_two
if question_three:
data["question_three"] = question_three
json_data = json.dumps(data, ensure_ascii=False)
# Eine zufällige UUID generieren
random_uuid = uuid.uuid4()
# Die UUID als String darstellen
uuid_string = str(random_uuid)
pdf_name = uuid_string
print(st.session_state["final_question_string"])
# pdf_data_cv = uploaded_file_cv.read()
print(len(st.session_state["pdf_data_cvs"]))
print(st.session_state["pdf_data_cvs_names"])
upload_success = upload_blob(pdf_name, json_data, st.session_state["pdf_data_jobdescription"],st.session_state["pdf_data_cvs"],st.session_state["pre_toggle"],st.session_state["final_question_string"])
else:
st.write("Please fill out both fields and upload a PDF file.")
if not upload_success:
st.error('An error has occurred. Please contact the administrator. Sorry for the inconvenience.', icon="🚨")