Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from googleapiclient.discovery import build | |
from googleapiclient.errors import HttpError | |
# Function to fetch comments from a video | |
def get_filtered_video_comments(api_key, video_id, username_filter, text_filter): | |
youtube = build('youtube', 'v3', developerKey=api_key) | |
comments = [] | |
next_page_token = None | |
while True: | |
try: | |
response = youtube.commentThreads().list( | |
part='snippet,replies', | |
videoId=video_id, | |
maxResults=100, | |
pageToken=next_page_token | |
).execute() | |
for item in response['items']: | |
comment = item['snippet']['topLevelComment']['snippet'] | |
author_name = comment.get('authorDisplayName', '') | |
comment_text = comment['textDisplay'] | |
# Check if the comment matches the username or contains the specified text | |
if (username_filter.lower() in author_name.lower() if username_filter else True) and \ | |
(text_filter.lower() in comment_text.lower() if text_filter else True): | |
comments.append({ | |
'VideoID': video_id, | |
'AuthorName': author_name, | |
'CommentedDateTime': comment['publishedAt'], | |
'CommentText': comment_text, | |
'NumOfCommentLikes': comment['likeCount'], | |
'CommentedUserID': comment['authorChannelId']['value'] | |
}) | |
next_page_token = response.get('nextPageToken') | |
if not next_page_token: | |
break | |
except HttpError as e: | |
return None, f"Error fetching comments: {e}" | |
return comments, None | |
# Main function | |
def scrape_filtered_comments(api_key, video_id, username_filter, text_filter): | |
if not api_key or not video_id: | |
return None, "API key and video ID are required." | |
comments, error = get_filtered_video_comments(api_key, video_id, username_filter, text_filter) | |
if error: | |
return None, error | |
if comments: | |
df_comments = pd.DataFrame(comments) | |
csv_data = df_comments.to_csv(index=False) | |
return csv_data, None | |
else: | |
return None, "No comments matching the criteria were found." | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("### YouTube Filtered Comments Extractor") | |
api_key = gr.Textbox(label="YouTube API Key", type="password") | |
video_id = gr.Textbox(label="Video ID") | |
username_filter = gr.Textbox(label="Filter by Username (Optional)") | |
text_filter = gr.Textbox(label="Filter by Text in Comments (Optional)") | |
output_file = gr.File(label="Download Filtered Comments as CSV") | |
error_msg = gr.Textbox(label="Error Message", interactive=False) | |
def main(api_key, video_id, username_filter, text_filter): | |
csv_data, error = scrape_filtered_comments(api_key, video_id, username_filter, text_filter) | |
if error: | |
return None, error | |
else: | |
return csv_data, None | |
extract_button = gr.Button("Extract Filtered Comments") | |
extract_button.click(main, [api_key, video_id, username_filter, text_filter], [output_file, error_msg]) | |
demo.launch() | |