File size: 3,299 Bytes
9214aad
3f279da
 
 
 
9214aad
3089e3a
3f279da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3089e3a
 
9214aad
3089e3a
 
 
9214aad
 
3089e3a
 
 
 
9214aad
 
 
3f279da
 
 
 
 
9214aad
3f279da
9214aad
3f279da
9214aad
3089e3a
 
 
3f279da
3089e3a
9214aad
 
 
3089e3a
 
9214aad
 
3f279da
3089e3a
9214aad
 
 
3089e3a
9214aad
3089e3a
 
 
9214aad
3089e3a
9214aad
 
3089e3a
 
9214aad
 
 
 
 
3089e3a
 
9214aad
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Function to fetch comments from a video
def get_filtered_video_comments(api_key, video_id, username_filter, text_filter):
    youtube = build('youtube', 'v3', developerKey=api_key)
    comments = []
    next_page_token = None

    while True:
        try:
            response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,
                pageToken=next_page_token
            ).execute()

            for item in response['items']:
                comment = item['snippet']['topLevelComment']['snippet']
                author_name = comment.get('authorDisplayName', '')
                comment_text = comment['textDisplay']

                # Check if the comment matches the username or contains the specified text
                if (username_filter.lower() in author_name.lower() if username_filter else True) and \
                   (text_filter.lower() in comment_text.lower() if text_filter else True):
                    comments.append({
                        'VideoID': video_id,
                        'AuthorName': author_name,
                        'CommentedDateTime': comment['publishedAt'],
                        'CommentText': comment_text,
                        'NumOfCommentLikes': comment['likeCount'],
                        'CommentedUserID': comment['authorChannelId']['value']
                    })

            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break

        except HttpError as e:
            return None, f"Error fetching comments: {e}"

    return comments, None

# Main function
def scrape_filtered_comments(api_key, video_id, username_filter, text_filter):
    if not api_key or not video_id:
        return None, "API key and video ID are required."

    comments, error = get_filtered_video_comments(api_key, video_id, username_filter, text_filter)
    if error:
        return None, error

    if comments:
        df_comments = pd.DataFrame(comments)
        csv_data = df_comments.to_csv(index=False)
        return csv_data, None
    else:
        return None, "No comments matching the criteria were found."

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("### YouTube Filtered Comments Extractor")
    api_key = gr.Textbox(label="YouTube API Key", type="password")
    video_id = gr.Textbox(label="Video ID")
    username_filter = gr.Textbox(label="Filter by Username (Optional)")
    text_filter = gr.Textbox(label="Filter by Text in Comments (Optional)")

    output_file = gr.File(label="Download Filtered Comments as CSV")
    error_msg = gr.Textbox(label="Error Message", interactive=False)

    def main(api_key, video_id, username_filter, text_filter):
        csv_data, error = scrape_filtered_comments(api_key, video_id, username_filter, text_filter)
        if error:
            return None, error
        else:
            return csv_data, None

    extract_button = gr.Button("Extract Filtered Comments")
    extract_button.click(main, [api_key, video_id, username_filter, text_filter], [output_file, error_msg])

demo.launch()