Spaces:
Runtime error
Runtime error
# Description: This file contains the main Streamlit application for the Resonate project. | |
# Run command: streamlit run app.py | |
import os | |
import pandas as pd | |
import streamlit as st | |
from dotenv import load_dotenv | |
from streamlit import session_state as ss | |
from streamlit_chat import message | |
from src.clustering.resonate_bert_summarizer import summarize_runner | |
from src.clustering.resonate_clustering import Clustering | |
from src.langchain.resonate_langchain_functions import LangChain | |
from src.utils.resonate_streamlitUtils import ( | |
aws_transcribe, | |
convert_video_to_audio, | |
pinecone_init_upsert, | |
transcript_text_editor_minutes_to_hhmmss, | |
) | |
def initialize_session_state(): | |
# Initialize API keys in session state if not present | |
if "api_keys" not in ss: | |
ss.api_keys = {} | |
ss.api_keys["openai_api_key"] = None | |
ss.api_keys["pinecone_api_key"] = None | |
ss.api_keys["aws_access_key"] = None | |
ss.api_keys["aws_secret_access_key"] = None | |
if "api_key_set" not in ss: | |
ss.api_key_set = False | |
if "add_meeting" not in ss: | |
ss.add_meeting = False | |
if "Clustering_obj" not in ss: | |
ss.Clustering_obj = Clustering() | |
# Initialize - Main Screen - Transcript Editor | |
if "transcript_speaker_editor" not in ss: | |
ss.transcript_speaker_editor = False | |
if "transcript_text_editor" not in ss: | |
ss.transcript_text_editor = False | |
if "meeting_name" not in ss: | |
ss.meeting_name = "" | |
if "df_transcript_speaker" not in ss: | |
ss.df_transcript_speaker = pd.DataFrame() | |
if "df_transcript_text" not in ss: | |
ss.df_transcript_text = pd.DataFrame() | |
if "updated_df" not in ss: | |
ss.updated_transcript_df_to_embed = pd.DataFrame() | |
if "chat_view" not in ss: | |
ss.chat_view = True | |
if "langchain_obj" not in ss and ss.api_key_set: | |
ss.langchain_obj = LangChain() | |
if "query" not in ss: | |
ss.query = "" | |
if "responses" not in ss: | |
ss["responses"] = ["How can I assist you?"] | |
if "requests" not in ss: | |
ss["requests"] = [] | |
def chat_view(): | |
st.header("Chat") | |
response_container = st.container() | |
textcontainer = st.container() | |
with textcontainer: | |
query = st.text_input( | |
"Chat Here", | |
placeholder="Message Resonate ... ", | |
value=ss.query, | |
key="query_input", | |
) | |
# Clear button | |
if st.button("Clear"): | |
ss.langchain_obj.conversation_bufw.memory.clear() # Clear conversation buffer | |
ss.query = "" | |
ss.requests = [] | |
ss.responses = [] | |
ss["responses"] = ["How can I assist you?"] | |
st.rerun() | |
elif query: | |
with st.spinner("typing..."): | |
uuid_list = ss.Clustering_obj.uuid_for_query(query=query) | |
print(f"Meeting Unique ID : {uuid_list}") | |
response = ss.langchain_obj.chat( | |
query=query, in_filter=uuid_list, complete_db_flag=False | |
) | |
response = response["response"] | |
ss.requests.append(query) | |
ss.responses.append(response) | |
ss.query = "" | |
with response_container: | |
if ss["responses"]: | |
for i in range(len(ss["responses"])): | |
message(ss["responses"][i], key=str(i)) | |
if i < len(ss["requests"]): | |
message( | |
ss["requests"][i], | |
is_user=True, | |
key=str(i) + "_user", | |
) | |
def api_keys_input(): | |
with st.form("keys_input_form"): | |
# Retrieve values from session state | |
openai_api_key = st.text_input( | |
"OpenAPI Key:", | |
type="password", | |
value=ss.api_keys.get( | |
"openai_api_key", "" | |
), # Use default value if key is not present | |
) | |
pinecone_api_key = st.text_input( | |
"Pinecone Key:", | |
type="password", | |
value=ss.api_keys.get( | |
"pinecone_api_key", "" | |
), # Use default value if key is not present | |
) | |
aws_access_key = st.text_input( | |
"AWS Access Key:", | |
type="password", | |
value=ss.api_keys.get( | |
"aws_access_key", "" | |
), # Use default value if key is not present | |
) | |
aws_secret_access_key = st.text_input( | |
"AWS Secret Access Key:", | |
type="password", | |
value=ss.api_keys.get( | |
"aws_secret_access_key", "" | |
), # Use default value if key is not present | |
) | |
# Add a button to save the keys | |
save_button = st.form_submit_button("Save API Keys") | |
if save_button: | |
# Update session state with provided keys | |
ss.api_keys["openai_api_key"] = openai_api_key | |
ss.api_keys["pinecone_api_key"] = pinecone_api_key | |
ss.api_keys["aws_access_key"] = aws_access_key | |
ss.api_keys["aws_secret_access_key"] = aws_secret_access_key | |
# Set environment variables only if the keys are not None | |
if openai_api_key: | |
os.environ["OPENAI_API_KEY"] = ss.api_keys["openai_api_key"] | |
if pinecone_api_key: | |
os.environ["PINECONE_API_KEY"] = ss.api_keys["pinecone_api_key"] | |
if aws_access_key: | |
os.environ["AWS_ACCESS_KEY"] = ss.api_keys["aws_access_key"] | |
if aws_secret_access_key: | |
os.environ["AWS_SECRET_ACCESS_KEY"] = ss.api_keys[ | |
"aws_secret_access_key" | |
] | |
ss.api_key_set = True | |
print("API KEYS ARE: ", ss.api_keys) | |
st.rerun() | |
def add_meeting(): | |
with st.form("add_meeting_form"): | |
uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp4"]) | |
# Get user input | |
meeting_name = st.text_input("Enter Meeting Name:") | |
save_meeting_button = st.form_submit_button("Save Meeting") | |
if save_meeting_button: | |
if not meeting_name: | |
st.warning("Please enter Meeting Name.") | |
elif uploaded_file is None: | |
st.warning("Please upload a meeting recording.") | |
elif meeting_name and uploaded_file: | |
with st.spinner("Processing..."): | |
file_name = uploaded_file.name.replace(" ", "_") | |
if file_name.endswith(".mp4") or file_name.endswith(".mpeg4"): | |
print("in video") | |
with open("data/videoFiles/" + file_name, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
f.close() | |
# Convert video file to audio file | |
audio_path = "data/audioFiles/" + file_name[:-4] + ".wav" | |
convert_video_to_audio( | |
"data/videoFiles/" + file_name, audio_path | |
) | |
file_name = file_name[:-4] + ".wav" | |
elif file_name.endswith(".wav"): | |
print("in audio") | |
with open("data/audioFiles/" + file_name, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
f.close() | |
ss.df_transcript_speaker = aws_transcribe(file_name) | |
ss.meeting_name = meeting_name | |
ss.transcript_speaker_editor = True | |
def transcript_speaker_editor(): | |
ss.add_meeting = False | |
with st.form("transcript_speaker_editor_form"): | |
st.write("Transcript Speaker Editor:") | |
st.dataframe(ss.df_transcript_speaker) | |
df = ss.df_transcript_speaker.copy(deep=True) | |
# Create a list of unique speaker labels | |
speaker_labels = df["speaker_label"].unique() | |
# Create a dictionary to store the updated values | |
updated_speaker_names = {} | |
# Display text input boxes for each speaker label | |
for speaker_label in speaker_labels: | |
new_name = st.text_input( | |
f"Edit speaker label '{speaker_label}'", speaker_label | |
) | |
updated_speaker_names[speaker_label] = new_name | |
# Update the DataFrame with the new speaker label names | |
for old_name, new_name in updated_speaker_names.items(): | |
df["speaker_label"] = df["speaker_label"].replace(old_name, new_name) | |
update_speaker_button = st.form_submit_button("Update Speakers") | |
if update_speaker_button and df is not None: | |
ss.df_transcript_speaker = pd.DataFrame() | |
ss.df_transcript_text = df.copy(deep=True) | |
del df | |
ss.transcript_text_editor = True | |
ss.transcript_speaker_editor = False | |
st.rerun() | |
# Function to update the text column | |
def transcript_text_editor_update_text(row_index, new_text): | |
ss.updated_transcript_df_to_embed.at[row_index, "text"] = new_text | |
def transcript_text_editor(): | |
ss.transcript_speaker_editor = False | |
st.write("Transcript Text Editor:") | |
st.write(ss.df_transcript_text) | |
df = ss.df_transcript_text.copy(deep=True) | |
ss.updated_transcript_df_to_embed = df.copy(deep=True) | |
# Convert start_time and end_time to HH:MM:SS format | |
df["start_time"] = df["start_time"].apply(transcript_text_editor_minutes_to_hhmmss) | |
df["end_time"] = df["end_time"].apply(transcript_text_editor_minutes_to_hhmmss) | |
row_index = st.number_input( | |
"Enter the row index:", | |
min_value=0, | |
max_value=len(df) - 1, | |
value=0, | |
step=1, | |
) | |
new_text = st.text_area("Enter the new text:", df.at[row_index, "text"]) | |
update_text_button_inner = st.button("Update Text") | |
if update_text_button_inner: | |
transcript_text_editor_update_text(row_index, new_text) | |
st.success("Text updated successfully!") | |
# Display the updated dataframe | |
st.header("Updated Transcript") | |
st.table(ss.updated_transcript_df_to_embed) | |
update_text_button = st.button("Finish Transcript Editing") | |
if update_text_button: | |
with st.spinner("Uploading..."): | |
ss.df_transcript_text = pd.DataFrame() | |
meeting_summary, meeting_uuid = summarize_runner( | |
ss.updated_transcript_df_to_embed | |
) | |
ss.Clustering_obj.create_Cluster() | |
pinecone_init_upsert( | |
ss.updated_transcript_df_to_embed, | |
meeting_title=ss.meeting_name, | |
meeting_summary=meeting_summary, | |
meeting_uuid=meeting_uuid, | |
) | |
ss.meeting_name = "unnamed" | |
st.success("Pinecone upsert completed successfully!") | |
ss.transcript_text_editor = False | |
ss.updated_transcript_df_to_embed = pd.DataFrame() | |
ss.chat_view = True | |
st.rerun() | |
def init_streamlit(): | |
initialize_session_state() | |
if os.path.exists("./config/.env"): | |
load_dotenv("./config/.env") | |
else: | |
print(".env file does not exist, API keys must be set manually.") | |
# Set initial state of the sidebar | |
st.set_page_config( | |
initial_sidebar_state="collapsed", | |
layout="wide", | |
) | |
st.title("RESONATE") | |
# Initializing sidebar and its components | |
with st.sidebar: | |
api_keys_input() | |
if st.button("Upload Meeting / Chat"): | |
ss.add_meeting = not ss.add_meeting | |
ss.chat_view = not ss.chat_view | |
ss.transcript_speaker_editor = False | |
ss.transcript_text_editor = False | |
if not ss.api_key_set: | |
st.header("Pre-requisites:") | |
st.write("Please set the API keys to enable the chat view.") | |
st.write("Please ensure that you have already run the 'pinecone_sample_dataloader.py'") | |
if ss.add_meeting and ss.api_key_set: | |
add_meeting() | |
if ss.transcript_speaker_editor: | |
transcript_speaker_editor() | |
if ss.df_transcript_text is not None and ss.transcript_text_editor: | |
transcript_text_editor() | |
if ss.chat_view and ss.api_key_set: | |
chat_view() # Chat view | |
if __name__ == "__main__": | |
# Please ensure you have data loaded in Pinecone before running the Streamlit app | |
# Please refer https://github.com/SartajBhuvaji/Resonate/blob/master/init_one_time_utils/PREREQUISITE.md | |
init_streamlit() | |
# Test questions: | |
# What was discussed about cyberbullying? | |
# What is one new feature planned for GitLab's code search? | |
# What is the goal of defining maintainability for the new diffs architecture? |