Spaces:

tensora
/

WorkGenius

Sleeping

App Files Files Community

WorkGenius / app.py

mbosse99

Updated GPT Engine

03d5dfe verified about 1 year ago

raw

history blame contribute delete

20.2 kB

	import streamlit as st
	from streamlit_js_eval import streamlit_js_eval
	from azure.storage.blob import BlobServiceClient
	from azure.cosmos import CosmosClient, exceptions
	from PyPDF2 import PdfReader
	import io
	import openai
	import json
	import os
	import uuid
	import time
	import calendar
	import re

	openai.api_key = os.getenv("OPENAI_API_KEY")
	openai.api_base = "https://tensora-oai-france.openai.azure.com/"
	openai.api_type = "azure"
	openai.api_version = "2023-12-01-preview"

	connection_string = os.getenv("CONNECTION")
	blob_service_client = BlobServiceClient.from_connection_string(connection_string)


	def upload_blob(pdf_name, json_data, pdf_data_jobdescription,pdf_data_cvs, pre_generated_bool, custom_questions):
	try:
	container_name = "jobdescriptions"
	# json_blob_name = f"{pdf_name}_jsondata.json"
	pdf_blob_name_jobdescription = f"{pdf_name}.pdf"

	container_client = blob_service_client.get_container_client(container_name)

	# json_blob_client = container_client.get_blob_client(json_blob_name)
	# json_blob_client.upload_blob(json_data.encode('utf-8'), overwrite=True)

	pdf_blob_client = container_client.get_blob_client(pdf_blob_name_jobdescription)
	pdf_blob_client.upload_blob(pdf_data_jobdescription, overwrite=True)

	upload_job_db_item(pdf_name,len(pdf_data_cvs),json.loads(json_data),pre_generated_bool, custom_questions)
	if pre_generated_bool:
	for i,question in enumerate(custom_questions):
	question_nr_for_id = i+1
	question_id = pdf_name + "-question-nr-" + str(question_nr_for_id)+str(calendar.timegm(time.gmtime()))
	upload_question_db_item(question_id, pdf_name, question,st.session_state["pdf_data_jobdescription_string"])
	links = []
	names = []
	for i,cv in enumerate(pdf_data_cvs):

	cv_nr_for_id = i+1
	cv_session_state_string = "cv-"+str(cv_nr_for_id)
	session_state_name = st.session_state[cv_session_state_string]
	names.append(session_state_name)
	cv_id = pdf_name + "-cv-nr-" + str(cv_nr_for_id)+str(calendar.timegm(time.gmtime()))
	upload_db_item(session_state_name, json.loads(json_data), pdf_name, cv_id)
	pdf_blob_name_cv = f"{cv_id}.pdf"
	pdf_blob_client = container_client.get_blob_client(pdf_blob_name_cv)
	pdf_blob_client.upload_blob(pdf_data_cvs[i], overwrite=True)
	links.append("https://tensora.ai/workgenius/cv-evaluation2/?job="+cv_id)

	st.success('Data and PDF files have been successfully uploaded. The link to the chatbot for the potential candidate is the following: ')
	for i,link in enumerate(links):
	st.write("Link for the candidate "+names[i]+": ")
	st.write(link)

	return True
	except Exception as e:
	print(f"Fehler beim Hochladen der Daten: {str(e)}")
	return False

	def upload_job_db_item(id, number_of_applicants, data, pre_generated_bool, custom_questions):
	endpoint = "https://wg-candidate-data.documents.azure.com:443/"
	key = os.getenv("CONNECTION_DB")
	client = CosmosClient(endpoint, key)
	database = client.get_database_client("ToDoList")
	container = database.get_container_client("JobData")
	job_item = {
	"id": id,
	'partitionKey' : 'wg-job-data-v1',
	"title": data["title"],
	"number_of_applicants": number_of_applicants,
	"every_interview_conducted": False,
	"evaluation_email": data["email"],
	"question_one": data["question_one"],
	"question_two": data["question_two"],
	"question_three": data["question_three"],
	"pre_generated": pre_generated_bool,
	"custom_questions": custom_questions
	}
	try:
	# Fügen Sie das Element in den Container ein
	container.create_item(body=job_item)
	print("Eintrag erfolgreich in die Cosmos DB eingefügt. Container: Job Data")
	except exceptions.CosmosHttpResponseError as e:
	print(f"Fehler beim Schreiben in die Cosmos DB: {str(e)}")
	except Exception as e:
	print(f"Allgemeiner Fehler: {str(e)}")

	def upload_db_item(name, data, job_description_id, cv_id):

	endpoint = "https://wg-candidate-data.documents.azure.com:443/"
	key = os.getenv("CONNECTION_DB")
	client = CosmosClient(endpoint, key)
	database = client.get_database_client("ToDoList")
	container = database.get_container_client("Items")
	candidate_item = {
	"id": cv_id,
	'partitionKey' : 'wg-candidate-data-v1',
	"name": name,
	"title": data["title"],
	"interview_conducted": False,
	"ai_summary": "",
	"evaluation_email": data["email"],
	"question_one": data["question_one"],
	"question_two": data["question_two"],
	"question_three": data["question_three"],
	"job_description_id": job_description_id,
	}

	try:
	# Fügen Sie das Element in den Container ein
	container.create_item(body=candidate_item)
	print("Eintrag erfolgreich in die Cosmos DB eingefügt. Container: Items(candidate Data)")
	except exceptions.CosmosHttpResponseError as e:
	print(f"Fehler beim Schreiben in die Cosmos DB: {str(e)}")
	except Exception as e:
	print(f"Allgemeiner Fehler: {str(e)}")

	def upload_question_db_item(id, job_id, question, job_content):
	endpoint = "https://wg-candidate-data.documents.azure.com:443/"
	key = os.getenv("CONNECTION_DB")
	client = CosmosClient(endpoint, key)
	database = client.get_database_client("ToDoList")
	container = database.get_container_client("Questions")
	question_item = {
	"id": id,
	"partitionKey" : "wg-question-data-v1",
	"job_id": job_id,
	"question_content": question,
	"job_description": job_content,
	}
	try:
	# Fügen Sie das Element in den Container ein
	container.create_item(body=question_item)
	print("Eintrag erfolgreich in die Cosmos DB eingefügt. Container: Questions(Question Data)")
	except exceptions.CosmosHttpResponseError as e:
	print(f"Fehler beim Schreiben in die Cosmos DB: {str(e)}")
	except Exception as e:
	print(f"Allgemeiner Fehler: {str(e)}")

	st.markdown(
	"""
	<style>
	[data-testid=column]{
	text-align: center;
	display: flex;
	align-items: center;
	justify-content: center;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)
	col1, col2 = st.columns([2, 1])

	if "ai_questions" not in st.session_state:
	st.session_state["ai_questions"] = None
	if "pdf_data_cvs" not in st.session_state:
	st.session_state["pdf_data_cvs"] = None
	if "pdf_data_cvs_string" not in st.session_state:
	st.session_state["pdf_data_cvs_string"] = None
	if "pdf_data_cvs_names" not in st.session_state:
	st.session_state["pdf_data_cvs_names"] = []
	if "pdf_data_jobdescription" not in st.session_state:
	st.session_state["pdf_data_jobdescription"] = None
	if "pdf_data_jobdescription_string" not in st.session_state:
	st.session_state["pdf_data_jobdescription_string"] = None
	if "final_question_string" not in st.session_state:
	st.session_state["final_question_string"] = []

	def adjust_numbering(lst):
	return [f"{i + 1}. {item.split('. ', 1)[1]}" for i, item in enumerate(lst)]

	with open("sys_prompt_frontend.txt") as f:
	sys_prompt = f.read()

	col1.title("Job description upload")
	col2.image("https://www.workgenius.com/wp-content/uploads/2023/03/WorkGenius_navy-1.svg")

	st.write("Please upload the job description and resume(s) as PDF and enter the job title for the position. To receive the evaluation of the potential candidate(s), please provide your email address.")
	upload_success = True

	#This container represents the form
	with st.container():

	#Form section for the files, names, title and mail
	uploaded_file_jobdescription = st.file_uploader("Upload the job description:", type=["pdf"], key="job")
	job_title = st.text_input("Enter the job title:", key="title")
	email = st.text_input("Enter the email:" , key="mail")
	uploaded_file_cvs = st.file_uploader("Upload the resume(s):", type=["pdf"],accept_multiple_files=True, key="cvs")
	for i,cv in enumerate(st.session_state["cvs"]):
	st.text_input(label="Enter the name of the "+str(i+1)+". CV (File: "+cv.name+")", value=cv.name,key="cv-"+str(i+1))

	#Form section for the interview mode (pre generated or not) and additional questions
	if len(job_title) > 0 and len(email) > 0 and uploaded_file_jobdescription and len(uploaded_file_cvs)>0:
	st.write("Activate the toggle to generate and select the questions in advance. Otherwise the questions will be generated automatically during the interview.")
	if not st.session_state["pdf_data_cvs"] and not st.session_state["pdf_data_cvs_string"] and not st.session_state["pdf_data_jobdescription"] and not st.session_state["pdf_data_jobdescription_string"]:
	pdf_data_jobdescription = uploaded_file_jobdescription.read()
	pdf_data_jobdescription_string = ""
	pdf_reader_job = PdfReader(io.BytesIO(pdf_data_jobdescription))
	for page_num in range(len(pdf_reader_job.pages)):
	page = pdf_reader_job.pages[page_num]
	pdf_data_jobdescription_string += page.extract_text()
	pdf_data_cvs = []
	pdf_data_cvs_string = ""
	for i,cv in enumerate(st.session_state["cvs"]):
	print(cv.name)
	st.session_state["pdf_data_cvs_names"].append(cv.name)
	# print(cv.name)
	# print(cv.size)
	cv_data_bytes = cv.read()
	# print(len(cv_data_bytes))
	pdf_data_cvs.append(cv_data_bytes)
	pdf_reader_cvs = PdfReader(io.BytesIO(cv_data_bytes))
	pdf_data_cvs_string += "CV "+str(i+1)+": "
	for page_num in range(len(pdf_reader_cvs.pages)):
	page = pdf_reader_cvs.pages[page_num]
	pdf_data_cvs_string += page.extract_text()
	pdf_data_cvs_string += "\n"
	st.session_state["pdf_data_cvs"] = pdf_data_cvs
	st.session_state["pdf_data_cvs_string"] = pdf_data_cvs_string
	st.session_state["pdf_data_jobdescription"] = pdf_data_jobdescription
	st.session_state["pdf_data_jobdescription_string"] = pdf_data_jobdescription_string
	if st.session_state["pdf_data_cvs"]:
	pdf_data_cvs_string = ""
	initial_cv_length = len(st.session_state["pdf_data_cvs"])
	for i,cv in enumerate(st.session_state["cvs"]):
	if cv.name not in st.session_state["pdf_data_cvs_names"]:
	st.session_state["pdf_data_cvs_names"].append(cv.name)
	print("At second:"+cv.name)
	cv_data_bytes = cv.read()
	st.session_state["pdf_data_cvs"].append(cv_data_bytes)
	pdf_reader_cvs = PdfReader(io.BytesIO(cv_data_bytes))
	pdf_data_cvs_string += "CV "+str(i+1+initial_cv_length)+": "
	for page_num in range(len(pdf_reader_cvs.pages)):
	page = pdf_reader_cvs.pages[page_num]
	pdf_data_cvs_string += page.extract_text()
	pdf_data_cvs_string += "\n"
	st.session_state["pdf_data_cvs_string"] += pdf_data_cvs_string
	for i,name in enumerate(st.session_state["pdf_data_cvs_names"]):
	# print(name)
	found = False
	for j,cv in enumerate(st.session_state["cvs"]):
	# print(cv.name)
	if name == cv.name:
	found = True
	if not found:
	print("gelöscht: "+name)
	del st.session_state["pdf_data_cvs"][i]
	del st.session_state["pdf_data_cvs_names"][i]
	pre_generate = st.toggle("Activate to pre generate questions", key="pre_toggle")
	if pre_generate:

	system = sys_prompt.format(job=st.session_state["pdf_data_jobdescription_string"], resume=st.session_state["pdf_data_cvs_string"], n=15)
	if not st.session_state["ai_questions"]:
	try:
	# st.write("The questions are generated. This may take a short moment...")
	st.info("The questions are generated. This may take a short moment.", icon="ℹ️")
	with st.spinner("Loading..."):
	res = openai.ChatCompletion.create(
	engine="gpt-4-1106",
	temperature=0.2,
	messages=[
	{
	"role": "system",
	"content": system,
	},
	],
	)
	st.session_state["ai_questions"] = [item for item in res.choices[0]["message"]["content"].split("\n") if len(item) > 0]
	for i,q in enumerate(res.choices[0]["message"]["content"].split("\n")):
	st.session_state["disable_row_"+str(i)] = False
	st.rerun()
	except Exception as e:
	print(f"Fehler beim generieren der Fragen: {str(e)}")
	st.error("An error has occurred. Please reload the page or contact the admin.", icon="🚨")
	else:
	if len(st.session_state["final_question_string"]) <= 0:
	for i,question in enumerate(st.session_state["ai_questions"]):
	cols = st.columns([5,1])
	with cols[1]:
	# if st.button("Accept",use_container_width=True,key="btn_accept_row_"+str(i)):
	# print("accept")
	# pattern = re.compile(r"^[1-9][0-9]?\.")
	# questions_length = len(st.session_state["final_question_string"])
	# question_from_text_area = st.session_state["text_area_"+str(i)]
	# question_to_append = str(questions_length+1)+"."+re.sub(pattern, "", question_from_text_area)
	# st.session_state["final_question_string"].append(question_to_append)
	# st.session_state["disable_row_"+str(i)] = True
	# st.rerun()
	if st.button("Delete",use_container_width=True,key="btn_del_row_"+str(i)):
	print("delete")
	st.session_state["ai_questions"].remove(question)
	st.rerun()
	with cols[0]:
	st.text_area(label="Question "+str(i+1)+":",value=question,label_visibility="collapsed",key="text_area_"+str(i),disabled=st.session_state["disable_row_"+str(i)])
	st.write("If you are satisfied with the questions, then accept them. You can still sort them afterwards.")
	if st.button("Accept all questions",use_container_width=True,key="accept_all_questions"):
	print("accept all")
	for i,question in enumerate(st.session_state["ai_questions"]):
	print("accept")
	pattern = re.compile(r"^[1-9][0-9]?\.")
	questions_length = len(st.session_state["final_question_string"])
	question_from_text_area = st.session_state["text_area_"+str(i)]
	question_to_append = str(questions_length+1)+"."+re.sub(pattern, "", question_from_text_area)
	st.session_state["final_question_string"].append(question_to_append)
	st.session_state["disable_row_"+str(i)] = True
	st.rerun()
	for i,final_q in enumerate(st.session_state["final_question_string"]):
	cols_final = st.columns([5,1])
	with cols_final[1]:
	if st.button("Up",use_container_width=True,key="btn_up_row_"+str(i),disabled=True if i == 0 else False):
	if i > 0:
	# Tausche das aktuelle Element mit dem vorherigen Element
	st.session_state.final_question_string[i], st.session_state.final_question_string[i - 1] = \
	st.session_state.final_question_string[i - 1], st.session_state.final_question_string[i]
	st.session_state.final_question_string = adjust_numbering(st.session_state.final_question_string)
	st.rerun()
	if st.button("Down",use_container_width=True,key="btn_down_row_"+str(i), disabled=True if i == len(st.session_state["final_question_string"])-1 else False):
	if i < len(st.session_state.final_question_string) - 1:
	# Tausche das aktuelle Element mit dem nächsten Element
	st.session_state.final_question_string[i], st.session_state.final_question_string[i + 1] = \
	st.session_state.final_question_string[i + 1], st.session_state.final_question_string[i]
	st.session_state.final_question_string = adjust_numbering(st.session_state.final_question_string)
	st.rerun()
	with cols_final[0]:
	st.write(final_q)
	else:
	with st.expander("Enter up to three predefined questions if needed. Otherwise leave it blank:"):
	question_one = st.text_input("Enter the first question:")
	question_two = st.text_input("Enter the second question:")
	question_three = st.text_input("Enter the third question:")

	#Form section for Submit and Clear
	col_submit_btn, col_empty, col_clear_btn = st.columns([1,4, 1])
	if col_clear_btn.button("Clear " ,use_container_width=True):
	streamlit_js_eval(js_expressions="parent.window.location.reload()")

	#Code to handle the input
	if col_submit_btn.button("Submit", use_container_width=True):
	if len(job_title) > 0 and len(email) > 0 and uploaded_file_jobdescription and len(uploaded_file_cvs)>0:
	data = {
	"title": job_title,
	"email": email,
	"question_one": "",
	"question_two": "",
	"question_three": "",
	}
	if not st.session_state["pre_toggle"]:
	if question_one:
	data["question_one"] = question_one
	if question_two:
	data["question_two"] = question_two
	if question_three:
	data["question_three"] = question_three

	json_data = json.dumps(data, ensure_ascii=False)

	# Eine zufällige UUID generieren
	random_uuid = uuid.uuid4()

	# Die UUID als String darstellen
	uuid_string = str(random_uuid)

	pdf_name = uuid_string

	print(st.session_state["final_question_string"])

	# pdf_data_cv = uploaded_file_cv.read()
	print(len(st.session_state["pdf_data_cvs"]))
	print(st.session_state["pdf_data_cvs_names"])
	upload_success = upload_blob(pdf_name, json_data, st.session_state["pdf_data_jobdescription"],st.session_state["pdf_data_cvs"],st.session_state["pre_toggle"],st.session_state["final_question_string"])
	else:
	st.write("Please fill out both fields and upload a PDF file.")


	if not upload_success:
	st.error('An error has occurred. Please contact the administrator. Sorry for the inconvenience.', icon="🚨")