Spaces:
Sleeping
Sleeping
File size: 3,299 Bytes
9214aad 3f279da 9214aad 3089e3a 3f279da 3089e3a 9214aad 3089e3a 9214aad 3089e3a 9214aad 3f279da 9214aad 3f279da 9214aad 3f279da 9214aad 3089e3a 3f279da 3089e3a 9214aad 3089e3a 9214aad 3f279da 3089e3a 9214aad 3089e3a 9214aad 3089e3a 9214aad 3089e3a 9214aad 3089e3a 9214aad 3089e3a 9214aad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Function to fetch comments from a video
def get_filtered_video_comments(api_key, video_id, username_filter, text_filter):
youtube = build('youtube', 'v3', developerKey=api_key)
comments = []
next_page_token = None
while True:
try:
response = youtube.commentThreads().list(
part='snippet,replies',
videoId=video_id,
maxResults=100,
pageToken=next_page_token
).execute()
for item in response['items']:
comment = item['snippet']['topLevelComment']['snippet']
author_name = comment.get('authorDisplayName', '')
comment_text = comment['textDisplay']
# Check if the comment matches the username or contains the specified text
if (username_filter.lower() in author_name.lower() if username_filter else True) and \
(text_filter.lower() in comment_text.lower() if text_filter else True):
comments.append({
'VideoID': video_id,
'AuthorName': author_name,
'CommentedDateTime': comment['publishedAt'],
'CommentText': comment_text,
'NumOfCommentLikes': comment['likeCount'],
'CommentedUserID': comment['authorChannelId']['value']
})
next_page_token = response.get('nextPageToken')
if not next_page_token:
break
except HttpError as e:
return None, f"Error fetching comments: {e}"
return comments, None
# Main function
def scrape_filtered_comments(api_key, video_id, username_filter, text_filter):
if not api_key or not video_id:
return None, "API key and video ID are required."
comments, error = get_filtered_video_comments(api_key, video_id, username_filter, text_filter)
if error:
return None, error
if comments:
df_comments = pd.DataFrame(comments)
csv_data = df_comments.to_csv(index=False)
return csv_data, None
else:
return None, "No comments matching the criteria were found."
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("### YouTube Filtered Comments Extractor")
api_key = gr.Textbox(label="YouTube API Key", type="password")
video_id = gr.Textbox(label="Video ID")
username_filter = gr.Textbox(label="Filter by Username (Optional)")
text_filter = gr.Textbox(label="Filter by Text in Comments (Optional)")
output_file = gr.File(label="Download Filtered Comments as CSV")
error_msg = gr.Textbox(label="Error Message", interactive=False)
def main(api_key, video_id, username_filter, text_filter):
csv_data, error = scrape_filtered_comments(api_key, video_id, username_filter, text_filter)
if error:
return None, error
else:
return csv_data, None
extract_button = gr.Button("Extract Filtered Comments")
extract_button.click(main, [api_key, video_id, username_filter, text_filter], [output_file, error_msg])
demo.launch()
|