Spaces:

mirukulla
/

Resonate-Meetings-chat-bot

Runtime error

madhuroopa

added new application files

e9ec229 9 months ago

12.6 kB

	# Description: This file contains the main Streamlit application for the Resonate project.
	# Run command: streamlit run app.py

	import os
	import pandas as pd
	import streamlit as st
	from dotenv import load_dotenv
	from streamlit import session_state as ss
	from streamlit_chat import message
	from src.clustering.resonate_bert_summarizer import summarize_runner
	from src.clustering.resonate_clustering import Clustering
	from src.langchain.resonate_langchain_functions import LangChain
	from src.utils.resonate_streamlitUtils import (
	aws_transcribe,
	convert_video_to_audio,
	pinecone_init_upsert,
	transcript_text_editor_minutes_to_hhmmss,
	)

	def initialize_session_state():
	# Initialize API keys in session state if not present
	if "api_keys" not in ss:
	ss.api_keys = {}
	ss.api_keys["openai_api_key"] = None
	ss.api_keys["pinecone_api_key"] = None
	ss.api_keys["aws_access_key"] = None
	ss.api_keys["aws_secret_access_key"] = None

	if "api_key_set" not in ss:
	ss.api_key_set = False
	if "add_meeting" not in ss:
	ss.add_meeting = False
	if "Clustering_obj" not in ss:
	ss.Clustering_obj = Clustering()
	# Initialize - Main Screen - Transcript Editor
	if "transcript_speaker_editor" not in ss:
	ss.transcript_speaker_editor = False
	if "transcript_text_editor" not in ss:
	ss.transcript_text_editor = False
	if "meeting_name" not in ss:
	ss.meeting_name = ""
	if "df_transcript_speaker" not in ss:
	ss.df_transcript_speaker = pd.DataFrame()
	if "df_transcript_text" not in ss:
	ss.df_transcript_text = pd.DataFrame()
	if "updated_df" not in ss:
	ss.updated_transcript_df_to_embed = pd.DataFrame()
	if "chat_view" not in ss:
	ss.chat_view = True
	if "langchain_obj" not in ss and ss.api_key_set:
	ss.langchain_obj = LangChain()
	if "query" not in ss:
	ss.query = ""
	if "responses" not in ss:
	ss["responses"] = ["How can I assist you?"]
	if "requests" not in ss:
	ss["requests"] = []


	def chat_view():
	st.header("Chat")
	response_container = st.container()
	textcontainer = st.container()
	with textcontainer:
	query = st.text_input(
	"Chat Here",
	placeholder="Message Resonate ... ",
	value=ss.query,
	key="query_input",
	)
	# Clear button
	if st.button("Clear"):
	ss.langchain_obj.conversation_bufw.memory.clear() # Clear conversation buffer
	ss.query = ""
	ss.requests = []
	ss.responses = []
	ss["responses"] = ["How can I assist you?"]
	st.rerun()

	elif query:
	with st.spinner("typing..."):
	uuid_list = ss.Clustering_obj.uuid_for_query(query=query)
	print(f"Meeting Unique ID : {uuid_list}")
	response = ss.langchain_obj.chat(
	query=query, in_filter=uuid_list, complete_db_flag=False
	)
	response = response["response"]
	ss.requests.append(query)
	ss.responses.append(response)
	ss.query = ""
	with response_container:
	if ss["responses"]:
	for i in range(len(ss["responses"])):
	message(ss["responses"][i], key=str(i))
	if i < len(ss["requests"]):
	message(
	ss["requests"][i],
	is_user=True,
	key=str(i) + "_user",
	)


	def api_keys_input():
	with st.form("keys_input_form"):
	# Retrieve values from session state
	openai_api_key = st.text_input(
	"OpenAPI Key:",
	type="password",
	value=ss.api_keys.get(
	"openai_api_key", ""
	), # Use default value if key is not present
	)
	pinecone_api_key = st.text_input(
	"Pinecone Key:",
	type="password",
	value=ss.api_keys.get(
	"pinecone_api_key", ""
	), # Use default value if key is not present
	)
	aws_access_key = st.text_input(
	"AWS Access Key:",
	type="password",
	value=ss.api_keys.get(
	"aws_access_key", ""
	), # Use default value if key is not present
	)
	aws_secret_access_key = st.text_input(
	"AWS Secret Access Key:",
	type="password",
	value=ss.api_keys.get(
	"aws_secret_access_key", ""
	), # Use default value if key is not present
	)
	# Add a button to save the keys
	save_button = st.form_submit_button("Save API Keys")
	if save_button:
	# Update session state with provided keys
	ss.api_keys["openai_api_key"] = openai_api_key
	ss.api_keys["pinecone_api_key"] = pinecone_api_key
	ss.api_keys["aws_access_key"] = aws_access_key
	ss.api_keys["aws_secret_access_key"] = aws_secret_access_key
	# Set environment variables only if the keys are not None
	if openai_api_key:
	os.environ["OPENAI_API_KEY"] = ss.api_keys["openai_api_key"]
	if pinecone_api_key:
	os.environ["PINECONE_API_KEY"] = ss.api_keys["pinecone_api_key"]
	if aws_access_key:
	os.environ["AWS_ACCESS_KEY"] = ss.api_keys["aws_access_key"]
	if aws_secret_access_key:
	os.environ["AWS_SECRET_ACCESS_KEY"] = ss.api_keys[
	"aws_secret_access_key"
	]

	ss.api_key_set = True
	print("API KEYS ARE: ", ss.api_keys)
	st.rerun()


	def add_meeting():
	with st.form("add_meeting_form"):
	uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp4"])
	# Get user input
	meeting_name = st.text_input("Enter Meeting Name:")
	save_meeting_button = st.form_submit_button("Save Meeting")
	if save_meeting_button:
	if not meeting_name:
	st.warning("Please enter Meeting Name.")
	elif uploaded_file is None:
	st.warning("Please upload a meeting recording.")
	elif meeting_name and uploaded_file:
	with st.spinner("Processing..."):
	file_name = uploaded_file.name.replace(" ", "_")
	if file_name.endswith(".mp4") or file_name.endswith(".mpeg4"):
	print("in video")
	with open("data/videoFiles/" + file_name, "wb") as f:
	f.write(uploaded_file.getbuffer())
	f.close()
	# Convert video file to audio file
	audio_path = "data/audioFiles/" + file_name[:-4] + ".wav"
	convert_video_to_audio(
	"data/videoFiles/" + file_name, audio_path
	)
	file_name = file_name[:-4] + ".wav"
	elif file_name.endswith(".wav"):
	print("in audio")
	with open("data/audioFiles/" + file_name, "wb") as f:
	f.write(uploaded_file.getbuffer())
	f.close()
	ss.df_transcript_speaker = aws_transcribe(file_name)
	ss.meeting_name = meeting_name
	ss.transcript_speaker_editor = True


	def transcript_speaker_editor():
	ss.add_meeting = False
	with st.form("transcript_speaker_editor_form"):
	st.write("Transcript Speaker Editor:")
	st.dataframe(ss.df_transcript_speaker)
	df = ss.df_transcript_speaker.copy(deep=True)
	# Create a list of unique speaker labels
	speaker_labels = df["speaker_label"].unique()
	# Create a dictionary to store the updated values
	updated_speaker_names = {}
	# Display text input boxes for each speaker label
	for speaker_label in speaker_labels:
	new_name = st.text_input(
	f"Edit speaker label '{speaker_label}'", speaker_label
	)
	updated_speaker_names[speaker_label] = new_name
	# Update the DataFrame with the new speaker label names
	for old_name, new_name in updated_speaker_names.items():
	df["speaker_label"] = df["speaker_label"].replace(old_name, new_name)
	update_speaker_button = st.form_submit_button("Update Speakers")
	if update_speaker_button and df is not None:
	ss.df_transcript_speaker = pd.DataFrame()
	ss.df_transcript_text = df.copy(deep=True)
	del df
	ss.transcript_text_editor = True
	ss.transcript_speaker_editor = False
	st.rerun()


	# Function to update the text column
	def transcript_text_editor_update_text(row_index, new_text):
	ss.updated_transcript_df_to_embed.at[row_index, "text"] = new_text


	def transcript_text_editor():
	ss.transcript_speaker_editor = False
	st.write("Transcript Text Editor:")
	st.write(ss.df_transcript_text)
	df = ss.df_transcript_text.copy(deep=True)
	ss.updated_transcript_df_to_embed = df.copy(deep=True)
	# Convert start_time and end_time to HH:MM:SS format
	df["start_time"] = df["start_time"].apply(transcript_text_editor_minutes_to_hhmmss)
	df["end_time"] = df["end_time"].apply(transcript_text_editor_minutes_to_hhmmss)
	row_index = st.number_input(
	"Enter the row index:",
	min_value=0,
	max_value=len(df) - 1,
	value=0,
	step=1,
	)
	new_text = st.text_area("Enter the new text:", df.at[row_index, "text"])
	update_text_button_inner = st.button("Update Text")
	if update_text_button_inner:
	transcript_text_editor_update_text(row_index, new_text)
	st.success("Text updated successfully!")
	# Display the updated dataframe
	st.header("Updated Transcript")
	st.table(ss.updated_transcript_df_to_embed)
	update_text_button = st.button("Finish Transcript Editing")
	if update_text_button:
	with st.spinner("Uploading..."):
	ss.df_transcript_text = pd.DataFrame()
	meeting_summary, meeting_uuid = summarize_runner(
	ss.updated_transcript_df_to_embed
	)
	ss.Clustering_obj.create_Cluster()
	pinecone_init_upsert(
	ss.updated_transcript_df_to_embed,
	meeting_title=ss.meeting_name,
	meeting_summary=meeting_summary,
	meeting_uuid=meeting_uuid,
	)
	ss.meeting_name = "unnamed"
	st.success("Pinecone upsert completed successfully!")
	ss.transcript_text_editor = False
	ss.updated_transcript_df_to_embed = pd.DataFrame()
	ss.chat_view = True
	st.rerun()


	def init_streamlit():
	initialize_session_state()
	if os.path.exists("./config/.env"):
	load_dotenv("./config/.env")

	else:
	print(".env file does not exist, API keys must be set manually.")

	# Set initial state of the sidebar
	st.set_page_config(
	initial_sidebar_state="collapsed",
	layout="wide",
	)
	st.title("RESONATE")

	# Initializing sidebar and its components
	with st.sidebar:
	api_keys_input()
	if st.button("Upload Meeting / Chat"):
	ss.add_meeting = not ss.add_meeting
	ss.chat_view = not ss.chat_view
	ss.transcript_speaker_editor = False
	ss.transcript_text_editor = False

	if not ss.api_key_set:
	st.header("Pre-requisites:")
	st.write("Please set the API keys to enable the chat view.")
	st.write("Please ensure that you have already run the 'pinecone_sample_dataloader.py'")

	if ss.add_meeting and ss.api_key_set:
	add_meeting()
	if ss.transcript_speaker_editor:
	transcript_speaker_editor()
	if ss.df_transcript_text is not None and ss.transcript_text_editor:
	transcript_text_editor()
	if ss.chat_view and ss.api_key_set:
	chat_view() # Chat view



	if __name__ == "__main__":
	# Please ensure you have data loaded in Pinecone before running the Streamlit app
	# Please refer https://github.com/SartajBhuvaji/Resonate/blob/master/init_one_time_utils/PREREQUISITE.md
	init_streamlit()

	# Test questions:
	# What was discussed about cyberbullying?
	# What is one new feature planned for GitLab's code search?
	# What is the goal of defining maintainability for the new diffs architecture?