File size: 4,327 Bytes
8da79d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
import pandas as pd
import googleapiclient.discovery
import plotly.express as px 

# Load the BERT tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")


# Set up the YouTube API service
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyC4Vx8G6nm3Ow9xq7NluTuCCJ1d_5w4YPE"  # Replace with your actual API key

youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)

# Function to fetch comments for a video ID
def scrape_comments(video_id):
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100
    )
    response = request.execute()


    comments = []

    for item in response['items']:
        comment = item['snippet']['topLevelComment']['snippet']
        comments.append([
            comment['textDisplay']
        ])

    comments_df = pd.DataFrame(comments, columns=['comment'])

    # df.head(10).

    return comments_df


# Function to extract video ID from YouTube URL
def extract_video_id(video_url):
    match = re.search(r'(?<=v=)[\w-]+', video_url)
    if match:
        return match.group(0)
    else:
        st.error("Invalid YouTube video URL")

# Function to fetch YouTube comments for a video ID
def fetch_comments(video_id):
    # Example using youtube-comment-scraper-python library
    comments = scrape_comments(video_id)
    return comments

# Function to analyze sentiment for a single comment
def analyze_sentiment(comment):
    tokens = tokenizer.encode(comment, return_tensors="pt", max_length=512, truncation=True)
    # input_ids = tokens['input_ids']
    # attention_mask = tokens['attention_mask']

    # result = model(input_ids, attention_mask=attention_mask)
    result = model(tokens)

    sentiment_id = torch.argmax(result.logits) + 1
    if(sentiment_id > 3):
        sentiment_label = "Positive"
    elif(sentiment_id < 3):
        sentiment_label = "Negative"
    else:
        sentiment_label = "Neutral"

    return sentiment_label


def main():
    st.title("YouTube Comments Sentiment Analysis")
    st.write("Enter a YouTube video link below:")

    video_url = st.text_input("YouTube Video URL:")
    if st.button("Extract Comments and Analyze"):
        video_id = extract_video_id(video_url)
        if video_id:
            comments_df = fetch_comments(video_id)
            # Comments is a dataframe of just the comments text
            # st.write("Top 100 Comments extracted\n", comments_df)
            comments_df['sentiment'] = comments_df['comment'].apply(lambda x: analyze_sentiment(x[:512]))
            sentiment_counts = comments_df['sentiment'].value_counts()
            positive_count = comments_df['sentiment'].value_counts().get('Positive', 0)
            negative_count = comments_df['sentiment'].value_counts().get('Negative', 0)
            neutral_count = comments_df['sentiment'].value_counts().get('Neutral', 0)

                # Create pie chart in col2 with custom colors
            fig_pie = px.pie(values=[positive_count, negative_count, neutral_count],
                            names=['Positive', 'Negative', 'Neutral'],
                            title='Pie chart representations',
                            color=sentiment_counts.index,  # Use sentiment categories as colors
                            color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'blue'})
            st.plotly_chart(fig_pie, use_container_width=True)

            # Create bar chart below the pie chart with custom colors
            fig_bar = px.bar(x=sentiment_counts.index, y=sentiment_counts.values,
                            labels={'x': 'Sentiment', 'y': 'Count'},
                            title='Bar plot representations',
                            color=sentiment_counts.index,  # Use sentiment categories as colors
                            color_discrete_map={'Positive': 'green', 'Negative': 'red', 'Neutral': 'blue'})
            st.plotly_chart(fig_bar)


if __name__ == "__main__":
    main()