madhuroopa
added new application files
e9ec229
raw
history blame
12.6 kB
# Description: This file contains the main Streamlit application for the Resonate project.
# Run command: streamlit run app.py
import os
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from streamlit import session_state as ss
from streamlit_chat import message
from src.clustering.resonate_bert_summarizer import summarize_runner
from src.clustering.resonate_clustering import Clustering
from src.langchain.resonate_langchain_functions import LangChain
from src.utils.resonate_streamlitUtils import (
aws_transcribe,
convert_video_to_audio,
pinecone_init_upsert,
transcript_text_editor_minutes_to_hhmmss,
)
def initialize_session_state():
# Initialize API keys in session state if not present
if "api_keys" not in ss:
ss.api_keys = {}
ss.api_keys["openai_api_key"] = None
ss.api_keys["pinecone_api_key"] = None
ss.api_keys["aws_access_key"] = None
ss.api_keys["aws_secret_access_key"] = None
if "api_key_set" not in ss:
ss.api_key_set = False
if "add_meeting" not in ss:
ss.add_meeting = False
if "Clustering_obj" not in ss:
ss.Clustering_obj = Clustering()
# Initialize - Main Screen - Transcript Editor
if "transcript_speaker_editor" not in ss:
ss.transcript_speaker_editor = False
if "transcript_text_editor" not in ss:
ss.transcript_text_editor = False
if "meeting_name" not in ss:
ss.meeting_name = ""
if "df_transcript_speaker" not in ss:
ss.df_transcript_speaker = pd.DataFrame()
if "df_transcript_text" not in ss:
ss.df_transcript_text = pd.DataFrame()
if "updated_df" not in ss:
ss.updated_transcript_df_to_embed = pd.DataFrame()
if "chat_view" not in ss:
ss.chat_view = True
if "langchain_obj" not in ss and ss.api_key_set:
ss.langchain_obj = LangChain()
if "query" not in ss:
ss.query = ""
if "responses" not in ss:
ss["responses"] = ["How can I assist you?"]
if "requests" not in ss:
ss["requests"] = []
def chat_view():
st.header("Chat")
response_container = st.container()
textcontainer = st.container()
with textcontainer:
query = st.text_input(
"Chat Here",
placeholder="Message Resonate ... ",
value=ss.query,
key="query_input",
)
# Clear button
if st.button("Clear"):
ss.langchain_obj.conversation_bufw.memory.clear() # Clear conversation buffer
ss.query = ""
ss.requests = []
ss.responses = []
ss["responses"] = ["How can I assist you?"]
st.rerun()
elif query:
with st.spinner("typing..."):
uuid_list = ss.Clustering_obj.uuid_for_query(query=query)
print(f"Meeting Unique ID : {uuid_list}")
response = ss.langchain_obj.chat(
query=query, in_filter=uuid_list, complete_db_flag=False
)
response = response["response"]
ss.requests.append(query)
ss.responses.append(response)
ss.query = ""
with response_container:
if ss["responses"]:
for i in range(len(ss["responses"])):
message(ss["responses"][i], key=str(i))
if i < len(ss["requests"]):
message(
ss["requests"][i],
is_user=True,
key=str(i) + "_user",
)
def api_keys_input():
with st.form("keys_input_form"):
# Retrieve values from session state
openai_api_key = st.text_input(
"OpenAPI Key:",
type="password",
value=ss.api_keys.get(
"openai_api_key", ""
), # Use default value if key is not present
)
pinecone_api_key = st.text_input(
"Pinecone Key:",
type="password",
value=ss.api_keys.get(
"pinecone_api_key", ""
), # Use default value if key is not present
)
aws_access_key = st.text_input(
"AWS Access Key:",
type="password",
value=ss.api_keys.get(
"aws_access_key", ""
), # Use default value if key is not present
)
aws_secret_access_key = st.text_input(
"AWS Secret Access Key:",
type="password",
value=ss.api_keys.get(
"aws_secret_access_key", ""
), # Use default value if key is not present
)
# Add a button to save the keys
save_button = st.form_submit_button("Save API Keys")
if save_button:
# Update session state with provided keys
ss.api_keys["openai_api_key"] = openai_api_key
ss.api_keys["pinecone_api_key"] = pinecone_api_key
ss.api_keys["aws_access_key"] = aws_access_key
ss.api_keys["aws_secret_access_key"] = aws_secret_access_key
# Set environment variables only if the keys are not None
if openai_api_key:
os.environ["OPENAI_API_KEY"] = ss.api_keys["openai_api_key"]
if pinecone_api_key:
os.environ["PINECONE_API_KEY"] = ss.api_keys["pinecone_api_key"]
if aws_access_key:
os.environ["AWS_ACCESS_KEY"] = ss.api_keys["aws_access_key"]
if aws_secret_access_key:
os.environ["AWS_SECRET_ACCESS_KEY"] = ss.api_keys[
"aws_secret_access_key"
]
ss.api_key_set = True
print("API KEYS ARE: ", ss.api_keys)
st.rerun()
def add_meeting():
with st.form("add_meeting_form"):
uploaded_file = st.file_uploader("Choose a file", type=["wav", "mp4"])
# Get user input
meeting_name = st.text_input("Enter Meeting Name:")
save_meeting_button = st.form_submit_button("Save Meeting")
if save_meeting_button:
if not meeting_name:
st.warning("Please enter Meeting Name.")
elif uploaded_file is None:
st.warning("Please upload a meeting recording.")
elif meeting_name and uploaded_file:
with st.spinner("Processing..."):
file_name = uploaded_file.name.replace(" ", "_")
if file_name.endswith(".mp4") or file_name.endswith(".mpeg4"):
print("in video")
with open("data/videoFiles/" + file_name, "wb") as f:
f.write(uploaded_file.getbuffer())
f.close()
# Convert video file to audio file
audio_path = "data/audioFiles/" + file_name[:-4] + ".wav"
convert_video_to_audio(
"data/videoFiles/" + file_name, audio_path
)
file_name = file_name[:-4] + ".wav"
elif file_name.endswith(".wav"):
print("in audio")
with open("data/audioFiles/" + file_name, "wb") as f:
f.write(uploaded_file.getbuffer())
f.close()
ss.df_transcript_speaker = aws_transcribe(file_name)
ss.meeting_name = meeting_name
ss.transcript_speaker_editor = True
def transcript_speaker_editor():
ss.add_meeting = False
with st.form("transcript_speaker_editor_form"):
st.write("Transcript Speaker Editor:")
st.dataframe(ss.df_transcript_speaker)
df = ss.df_transcript_speaker.copy(deep=True)
# Create a list of unique speaker labels
speaker_labels = df["speaker_label"].unique()
# Create a dictionary to store the updated values
updated_speaker_names = {}
# Display text input boxes for each speaker label
for speaker_label in speaker_labels:
new_name = st.text_input(
f"Edit speaker label '{speaker_label}'", speaker_label
)
updated_speaker_names[speaker_label] = new_name
# Update the DataFrame with the new speaker label names
for old_name, new_name in updated_speaker_names.items():
df["speaker_label"] = df["speaker_label"].replace(old_name, new_name)
update_speaker_button = st.form_submit_button("Update Speakers")
if update_speaker_button and df is not None:
ss.df_transcript_speaker = pd.DataFrame()
ss.df_transcript_text = df.copy(deep=True)
del df
ss.transcript_text_editor = True
ss.transcript_speaker_editor = False
st.rerun()
# Function to update the text column
def transcript_text_editor_update_text(row_index, new_text):
ss.updated_transcript_df_to_embed.at[row_index, "text"] = new_text
def transcript_text_editor():
ss.transcript_speaker_editor = False
st.write("Transcript Text Editor:")
st.write(ss.df_transcript_text)
df = ss.df_transcript_text.copy(deep=True)
ss.updated_transcript_df_to_embed = df.copy(deep=True)
# Convert start_time and end_time to HH:MM:SS format
df["start_time"] = df["start_time"].apply(transcript_text_editor_minutes_to_hhmmss)
df["end_time"] = df["end_time"].apply(transcript_text_editor_minutes_to_hhmmss)
row_index = st.number_input(
"Enter the row index:",
min_value=0,
max_value=len(df) - 1,
value=0,
step=1,
)
new_text = st.text_area("Enter the new text:", df.at[row_index, "text"])
update_text_button_inner = st.button("Update Text")
if update_text_button_inner:
transcript_text_editor_update_text(row_index, new_text)
st.success("Text updated successfully!")
# Display the updated dataframe
st.header("Updated Transcript")
st.table(ss.updated_transcript_df_to_embed)
update_text_button = st.button("Finish Transcript Editing")
if update_text_button:
with st.spinner("Uploading..."):
ss.df_transcript_text = pd.DataFrame()
meeting_summary, meeting_uuid = summarize_runner(
ss.updated_transcript_df_to_embed
)
ss.Clustering_obj.create_Cluster()
pinecone_init_upsert(
ss.updated_transcript_df_to_embed,
meeting_title=ss.meeting_name,
meeting_summary=meeting_summary,
meeting_uuid=meeting_uuid,
)
ss.meeting_name = "unnamed"
st.success("Pinecone upsert completed successfully!")
ss.transcript_text_editor = False
ss.updated_transcript_df_to_embed = pd.DataFrame()
ss.chat_view = True
st.rerun()
def init_streamlit():
initialize_session_state()
if os.path.exists("./config/.env"):
load_dotenv("./config/.env")
else:
print(".env file does not exist, API keys must be set manually.")
# Set initial state of the sidebar
st.set_page_config(
initial_sidebar_state="collapsed",
layout="wide",
)
st.title("RESONATE")
# Initializing sidebar and its components
with st.sidebar:
api_keys_input()
if st.button("Upload Meeting / Chat"):
ss.add_meeting = not ss.add_meeting
ss.chat_view = not ss.chat_view
ss.transcript_speaker_editor = False
ss.transcript_text_editor = False
if not ss.api_key_set:
st.header("Pre-requisites:")
st.write("Please set the API keys to enable the chat view.")
st.write("Please ensure that you have already run the 'pinecone_sample_dataloader.py'")
if ss.add_meeting and ss.api_key_set:
add_meeting()
if ss.transcript_speaker_editor:
transcript_speaker_editor()
if ss.df_transcript_text is not None and ss.transcript_text_editor:
transcript_text_editor()
if ss.chat_view and ss.api_key_set:
chat_view() # Chat view
if __name__ == "__main__":
# Please ensure you have data loaded in Pinecone before running the Streamlit app
# Please refer https://github.com/SartajBhuvaji/Resonate/blob/master/init_one_time_utils/PREREQUISITE.md
init_streamlit()
# Test questions:
# What was discussed about cyberbullying?
# What is one new feature planned for GitLab's code search?
# What is the goal of defining maintainability for the new diffs architecture?