import os from git import Repo import streamlit as st import time from PIL import Image import base64 from transformers import pipeline import spacy import googleapiclient import numpy as np from sentence_transformers import SentenceTransformer from matplotlib import colormaps from matplotlib.colors import ListedColormap GITHUB_PAT = os.environ['GITHUB'] SENTIMENT = os.environ['SENTIMENT'] EMBEDDING = os.environ['EMBEDDING'] if not os.path.exists('repo_directory'): try: Repo.clone_from(f'https://marcus-t-s:{GITHUB_PAT}@github.com/marcus-t-s/yt-comment-analyser.git', 'repo_directory' ) except: st.error("Error: Oops there's an issue on our end, please wait a moment and try again.") st.stop() # from repo_directory.all_utils import * from repo_directory.utils.chart_utils import * from repo_directory.youtube_comment_class import * # Streamlit configuration st.set_page_config( page_title="ViewerVoice | YouTube Comment Analyser", layout="wide", page_icon=Image.open('images/page_icon.png') ) # Define and load cached resources @st.cache_resource def load_models(): sentiment_pipeline = pipeline("sentiment-analysis", model=r"cardiffnlp/twitter-roberta-base-sentiment") embedding_model = SentenceTransformer('flax-sentence-embeddings/all_datasets_v4_MiniLM-L6') spacy_nlp = spacy.load("en_core_web_sm") add_custom_stopwords(spacy_nlp, {"bring", "know", "come"}) return sentiment_pipeline, embedding_model, spacy_nlp @st.cache_resource def load_colors_image(): mask = np.array(Image.open('images/youtube_icon.jpg')) Reds = colormaps['Reds'] colors = ListedColormap(Reds(np.linspace(0.4, 0.8, 256))) with open("images/viewervoice_logo_crop.png", "rb") as img_file: logo_image = base64.b64encode(img_file.read()).decode("utf-8") return mask, colors, logo_image sentiment_pipeline, embedding_model, spacy_nlp = load_models() mask, colors, logo_image = load_colors_image() # Hide line at the top and "made with streamlit" text hide_decoration_bar_style = """ """ st.markdown(hide_decoration_bar_style, unsafe_allow_html=True) if 'YouTubeParser' not in st.session_state: st.session_state['YouTubeParser'] = YoutubeCommentParser() if 'comment_fig' not in st.session_state: st.session_state["comment_fig"] = None st.session_state["wordcloud_fig"] = None st.session_state["topic_fig"] = None st.session_state["sentiment_fig"] = None if 'rerun_button' not in st.session_state: st.session_state['rerun_button'] = "INIT" if 'topic_filter' not in st.session_state: st.session_state['topic_filter'] = False if 'sentiment_filter' not in st.session_state: st.session_state['sentiment_filter'] = False if 'filter_state' not in st.session_state: st.session_state['filter_state'] = "INIT" if 'video_link' not in st.session_state: st.session_state["video_link"] = None if 'num_comments' not in st.session_state: st.session_state['num_comments'] = None # Set reference to YouTubeParser object for more concise code yt_parser = st.session_state['YouTubeParser'] main_page = st.container() def query_comments_button(): # Delete larger objects from session state to later replace del st.session_state["comment_fig"] del st.session_state["wordcloud_fig"] del st.session_state["topic_fig"] del st.session_state["sentiment_fig"] del st.session_state["YouTubeParser"] # Reset session state variables back to placeholder values st.session_state.rerun_button = "QUERYING" st.session_state['filter_state'] = "INIT" st.session_state["topic_filter"] = False st.session_state["sentiment_filter"] = False st.session_state["semantic_filter"] = False st.session_state["figures_built"] = False st.session_state["comment_fig"] = None st.session_state["wordcloud_fig"] = None st.session_state["topic_fig"] = None st.session_state["sentiment_fig"] = None st.session_state["YouTubeParser"] = YoutubeCommentParser() def filter_visuals_button(): st.session_state["filter_state"] = "FILTERING" with st.sidebar: st.session_state["video_link"] = st.text_input('YouTube Video URL', value="") st.session_state["max_comments"] = st.slider(label="Maximum number of comments to query", min_value=100, max_value=3000, step=100) st.session_state["max_topics"] = st.slider(label="Maximum number of topics", min_value=5, max_value=20, step=1) st.button('Query comments :left_speech_bubble:', on_click=query_comments_button) with main_page: # Reduce space at the top reduce_header_height_style = """ """ st.markdown(reduce_header_height_style, unsafe_allow_html=True) # Title and intro section markdown_content = f"""
Made by Afiba Annor Marcus Singh
📝 Notes
", unsafe_allow_html=True) html_content = """Comments
""", unsafe_allow_html=True) st.plotly_chart(st.session_state["table_fig"], use_container_width=True) with word_cloud_col: st.markdown(f"""Word Cloud
""", unsafe_allow_html=True) st.pyplot(st.session_state["wordcloud_fig"], use_container_width=True) treemap_col, sentiment_donut_col = st.columns([0.55, 0.45]) with treemap_col: st.markdown(f"""Topic Proportions
""", unsafe_allow_html=True) st.plotly_chart(st.session_state["topic_fig"], use_container_width=True) with sentiment_donut_col: st.markdown(f"""Sentiment Distribution
""", unsafe_allow_html=True) st.plotly_chart(st.session_state["sentiment_fig"], use_container_width=True) # st.table(yt_parser.df_comments.head()) else: st.write("Unfortunately we couldn't find any comments for this set of filters, please try " "editing the filters and try again") with st.sidebar: # Define the HTML and CSS for the button-style container if st.session_state['num_comments'] is not None: num_comments = st.session_state['num_comments'] else: num_comments = 0 htmlstr = f"""{num_comments}
""" # Display the button-style container with number of comments st.subheader("Number of comments") st.markdown(htmlstr, unsafe_allow_html=True) # Filters section st.subheader("Filters") if yt_parser.df_comments is not None: st.session_state["topic_filter"] = st.multiselect("Topic", options=sorted(list(yt_parser.df_comments['Topic'].unique()))) st.session_state["sentiment_filter"] = st.multiselect("Sentiment", options=list(yt_parser.df_comments['Sentiment'].unique())) st.session_state["semantic_filter"] = st.text_input("Keyword search", max_chars=30) st.button('Filter visualisations :sleuth_or_spy:', on_click=filter_visuals_button) else: st.multiselect("Topic", options=["Please query comments from a video"], disabled=True) st.multiselect("Sentiment", options=["Please query comments from a video"], disabled=True) st.text_input("Keyword search", disabled=True) st.button('Please query comments before filtering', disabled=True)