Abu1998 commited on
Commit
3089e3a
·
verified ·
1 Parent(s): 5d12f9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -110
app.py CHANGED
@@ -3,49 +3,8 @@ import pandas as pd
3
  from googleapiclient.discovery import build
4
  from googleapiclient.errors import HttpError
5
 
6
- # Function to get the channel ID from the channel name
7
- def get_channel_id(api_key, channel_name):
8
- youtube = build('youtube', 'v3', developerKey=api_key)
9
- try:
10
- response = youtube.search().list(
11
- part='snippet',
12
- q=channel_name,
13
- type='channel',
14
- maxResults=1
15
- ).execute()
16
-
17
- if response['items']:
18
- return response['items'][0]['id']['channelId']
19
- else:
20
- return None, "No channel found with that name."
21
- except HttpError as e:
22
- return None, f"Error fetching channel ID: {e}"
23
-
24
- # Function to fetch videos from a channel
25
- def get_channel_videos(api_key, channel_id, max_results):
26
- youtube = build('youtube', 'v3', developerKey=api_key)
27
- videos = []
28
- try:
29
- response = youtube.search().list(
30
- part='snippet',
31
- channelId=channel_id,
32
- maxResults=max_results,
33
- type='video',
34
- order='date'
35
- ).execute()
36
-
37
- for item in response['items']:
38
- videos.append({
39
- 'video_id': item['id']['videoId'],
40
- 'title': item['snippet']['title']
41
- })
42
- except HttpError as e:
43
- return None, f"Error fetching videos: {e}"
44
-
45
- return videos, None
46
-
47
  # Function to fetch comments from a video
48
- def get_video_comments(api_key, video_id, filters):
49
  youtube = build('youtube', 'v3', developerKey=api_key)
50
  comments = []
51
  next_page_token = None
@@ -61,40 +20,21 @@ def get_video_comments(api_key, video_id, filters):
61
 
62
  for item in response['items']:
63
  comment = item['snippet']['topLevelComment']['snippet']
64
- published_at = comment['publishedAt']
65
- like_count = comment['likeCount']
66
- reply_count = item['snippet']['totalReplyCount']
67
 
68
- # Apply filters
69
- if (
70
- like_count >= filters['likes'] and
71
- reply_count >= filters['replies'] and
72
- (not filters['date'] or published_at >= filters['date'])
73
- ):
74
  comments.append({
75
  'VideoID': video_id,
76
- 'Channel': comment.get('authorChannelUrl', ''),
77
- 'CommentedDateTime': published_at,
78
- 'NumOfCommentLikes': like_count,
79
- 'NumOfReplies': reply_count,
80
- 'Comment': comment['textDisplay'],
81
  'CommentedUserID': comment['authorChannelId']['value']
82
  })
83
 
84
- # Handle replies (if any)
85
- if 'replies' in item:
86
- for reply in item['replies']['comments']:
87
- reply_snippet = reply['snippet']
88
- comments.append({
89
- 'VideoID': video_id,
90
- 'Channel': reply_snippet.get('authorChannelUrl', ''),
91
- 'CommentedDateTime': reply_snippet['publishedAt'],
92
- 'NumOfCommentLikes': reply_snippet['likeCount'],
93
- 'NumOfReplies': 0,
94
- 'Comment': reply_snippet['textDisplay'],
95
- 'CommentedUserID': reply_snippet['authorChannelId']['value']
96
- })
97
-
98
  next_page_token = response.get('nextPageToken')
99
  if not next_page_token:
100
  break
@@ -105,64 +45,40 @@ def get_video_comments(api_key, video_id, filters):
105
  return comments, None
106
 
107
  # Main function
108
- def scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
109
- if not api_key or not channel_name_or_id:
110
- return None, "API key and channel name/ID are required."
111
 
112
- if "UC" in channel_name_or_id:
113
- channel_id = channel_name_or_id
114
- error = None
115
- else:
116
- channel_id, error = get_channel_id(api_key, channel_name_or_id)
117
-
118
- if error:
119
- return None, error
120
-
121
- videos, error = get_channel_videos(api_key, channel_id, num_videos)
122
  if error:
123
  return None, error
124
 
125
- all_comments = []
126
- filters = {
127
- 'likes': min_likes,
128
- 'replies': min_replies,
129
- 'date': filter_date if filter_date else None
130
- }
131
-
132
- for video in videos:
133
- video_comments, error = get_video_comments(api_key, video['video_id'], filters)
134
- if error:
135
- return None, error
136
- all_comments.extend(video_comments)
137
-
138
- if all_comments:
139
- df_comments = pd.DataFrame(all_comments)
140
  csv_data = df_comments.to_csv(index=False)
141
  return csv_data, None
142
  else:
143
- return None, "No comments found for the selected videos."
144
 
145
  # Gradio Interface
146
  with gr.Blocks() as demo:
147
- gr.Markdown("### YouTube Comments Extractor")
148
  api_key = gr.Textbox(label="YouTube API Key", type="password")
149
- channel_name_or_id = gr.Textbox(label="Channel Name or ID")
150
- num_videos = gr.Slider(label="Number of Videos to Scrape", minimum=1, maximum=50, step=1, value=10)
151
- min_likes = gr.Slider(label="Minimum Likes on Comments", minimum=0, maximum=100, step=1, value=0)
152
- min_replies = gr.Slider(label="Minimum Replies on Comments", minimum=0, maximum=50, step=1, value=0)
153
- filter_date = gr.Textbox(label="Filter Comments After This Date (YYYY-MM-DD, Optional)")
154
 
155
- output_file = gr.File(label="Download Extracted Comments as CSV")
156
  error_msg = gr.Textbox(label="Error Message", interactive=False)
157
 
158
- def main(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
159
- csv_data, error = scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date)
160
  if error:
161
  return None, error
162
  else:
163
  return csv_data, None
164
 
165
- extract_button = gr.Button("Extract Comments")
166
- extract_button.click(main, [api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date], [output_file, error_msg])
167
 
168
  demo.launch()
 
3
  from googleapiclient.discovery import build
4
  from googleapiclient.errors import HttpError
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # Function to fetch comments from a video
7
+ def get_filtered_video_comments(api_key, video_id, username_filter, text_filter):
8
  youtube = build('youtube', 'v3', developerKey=api_key)
9
  comments = []
10
  next_page_token = None
 
20
 
21
  for item in response['items']:
22
  comment = item['snippet']['topLevelComment']['snippet']
23
+ author_name = comment.get('authorDisplayName', '')
24
+ comment_text = comment['textDisplay']
 
25
 
26
+ # Check if the comment matches the username or contains the specified text
27
+ if (username_filter.lower() in author_name.lower() if username_filter else True) and \
28
+ (text_filter.lower() in comment_text.lower() if text_filter else True):
 
 
 
29
  comments.append({
30
  'VideoID': video_id,
31
+ 'AuthorName': author_name,
32
+ 'CommentedDateTime': comment['publishedAt'],
33
+ 'CommentText': comment_text,
34
+ 'NumOfCommentLikes': comment['likeCount'],
 
35
  'CommentedUserID': comment['authorChannelId']['value']
36
  })
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  next_page_token = response.get('nextPageToken')
39
  if not next_page_token:
40
  break
 
45
  return comments, None
46
 
47
  # Main function
48
+ def scrape_filtered_comments(api_key, video_id, username_filter, text_filter):
49
+ if not api_key or not video_id:
50
+ return None, "API key and video ID are required."
51
 
52
+ comments, error = get_filtered_video_comments(api_key, video_id, username_filter, text_filter)
 
 
 
 
 
 
 
 
 
53
  if error:
54
  return None, error
55
 
56
+ if comments:
57
+ df_comments = pd.DataFrame(comments)
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  csv_data = df_comments.to_csv(index=False)
59
  return csv_data, None
60
  else:
61
+ return None, "No comments matching the criteria were found."
62
 
63
  # Gradio Interface
64
  with gr.Blocks() as demo:
65
+ gr.Markdown("### YouTube Filtered Comments Extractor")
66
  api_key = gr.Textbox(label="YouTube API Key", type="password")
67
+ video_id = gr.Textbox(label="Video ID")
68
+ username_filter = gr.Textbox(label="Filter by Username (Optional)")
69
+ text_filter = gr.Textbox(label="Filter by Text in Comments (Optional)")
 
 
70
 
71
+ output_file = gr.File(label="Download Filtered Comments as CSV")
72
  error_msg = gr.Textbox(label="Error Message", interactive=False)
73
 
74
+ def main(api_key, video_id, username_filter, text_filter):
75
+ csv_data, error = scrape_filtered_comments(api_key, video_id, username_filter, text_filter)
76
  if error:
77
  return None, error
78
  else:
79
  return csv_data, None
80
 
81
+ extract_button = gr.Button("Extract Filtered Comments")
82
+ extract_button.click(main, [api_key, video_id, username_filter, text_filter], [output_file, error_msg])
83
 
84
  demo.launch()