Abu1998 commited on
Commit
3f279da
·
verified ·
1 Parent(s): 9b3cfd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -93
app.py CHANGED
@@ -1,93 +1,126 @@
1
- import gradio as gr
2
- import requests
3
- import csv
4
- import re
5
- from datetime import timedelta
6
-
7
- # YouTube API key and required endpoint
8
- API_KEY = 'YOUR_YOUTUBE_API_KEY' # Replace with your API key
9
- YOUTUBE_API_URL = 'https://www.googleapis.com/youtube/v3/videos'
10
-
11
- def parse_duration(duration):
12
- """
13
- Parse ISO 8601 duration string to seconds.
14
- Example: "PT1H10M5S" -> 4205 seconds
15
- """
16
- regex = re.match(r"PT(\d+H)?(\d+M)?(\d+S)?", duration)
17
- hours = int(regex.group(1)[:-1] if regex.group(1) else 0)
18
- minutes = int(regex.group(2)[:-1] if regex.group(2) else 0)
19
- seconds = int(regex.group(3)[:-1] if regex.group(3) else 0)
20
- return hours * 3600 + minutes * 60 + seconds
21
-
22
- def fetch_video_data(video_ids):
23
- """
24
- Fetch video details (title, duration) from YouTube API.
25
- """
26
- video_data = []
27
- total_time_spent = 0 # Total time spent in seconds
28
-
29
- for video_id in video_ids:
30
- params = {
31
- 'part': 'contentDetails,snippet',
32
- 'id': video_id,
33
- 'key': API_KEY
34
- }
35
- response = requests.get(YOUTUBE_API_URL, params=params)
36
- data = response.json()
37
-
38
- if 'items' in data:
39
- item = data['items'][0]
40
- title = item['snippet']['title']
41
- duration = item['contentDetails']['duration']
42
- video_duration_seconds = parse_duration(duration)
43
- total_time_spent += video_duration_seconds
44
-
45
- video_data.append({
46
- 'Video ID': video_id,
47
- 'Title': title,
48
- 'Duration (seconds)': video_duration_seconds
49
- })
50
-
51
- # Convert total time spent to hours and minutes
52
- total_time_spent_in_hours = str(timedelta(seconds=total_time_spent))
53
-
54
- # Save data to CSV
55
- filename = "videos_time_spent.csv"
56
- with open(filename, mode="w", newline="", encoding="utf-8") as file:
57
- writer = csv.DictWriter(file, fieldnames=["Video ID", "Title", "Duration (seconds)"])
58
- writer.writeheader()
59
- writer.writerows(video_data)
60
-
61
- return filename, total_time_spent_in_hours
62
-
63
- def gradio_interface(video_ids):
64
- """
65
- Gradio interface function to process video IDs and generate time statistics.
66
- """
67
- video_ids_list = video_ids.split(",") # Convert input string to list
68
- csv_file, total_time = fetch_video_data(video_ids_list)
69
- return f"Total time spent: {total_time} on these videos. Download the CSV below.", csv_file
70
-
71
- # Gradio App setup
72
- with gr.Blocks() as demo:
73
- gr.Markdown("### Time Spent on YouTube Videos")
74
- gr.Markdown("Enter a list of YouTube video IDs (comma separated) to calculate the time spent on each video.")
75
-
76
- with gr.Row():
77
- video_ids_input = gr.Textbox(label="Enter Video IDs", placeholder="e.g., dQw4w9WgXcQ, kJQP7kiw5Fk")
78
-
79
- with gr.Row():
80
- submit_button = gr.Button("Calculate Time Spent")
81
-
82
- with gr.Row():
83
- message_output = gr.Textbox(label="Result", interactive=False)
84
- download_link = gr.File(label="Download CSV")
85
-
86
- submit_button.click(
87
- fn=gradio_interface,
88
- inputs=[video_ids_input],
89
- outputs=[message_output, download_link]
90
- )
91
-
92
- # Launch the app
93
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from googleapiclient.discovery import build
4
+ from googleapiclient.errors import HttpError
5
+ import time
6
+
7
+ # Streamlit UI
8
+ st.title("YouTube Video Comments Extractor")
9
+
10
+ # User input for API key and search query
11
+ api_key = st.text_input("Enter your YouTube API Key", type="password")
12
+ search_query = st.text_input("Enter the Search Query (e.g., MrBeast)")
13
+ num_videos = st.number_input("Number of Videos to Scrape", min_value=1, max_value=50, step=1)
14
+
15
+ # Function to search for videos based on a query
16
+ def search_videos(api_key, query, max_results):
17
+ youtube = build('youtube', 'v3', developerKey=api_key)
18
+ response = youtube.search().list(
19
+ part='snippet',
20
+ q=query,
21
+ type='video',
22
+ maxResults=max_results
23
+ ).execute()
24
+
25
+ videos = []
26
+ for item in response['items']:
27
+ videos.append({
28
+ 'video_id': item['id']['videoId'],
29
+ 'title': item['snippet']['title']
30
+ })
31
+
32
+ return videos
33
+
34
+ # Function to extract comments from a video
35
+ def get_video_comments(api_key, video_id):
36
+ youtube = build('youtube', 'v3', developerKey=api_key)
37
+ comments = []
38
+ next_page_token = None
39
+
40
+ while True:
41
+ try:
42
+ response = youtube.commentThreads().list(
43
+ part='snippet,replies',
44
+ videoId=video_id,
45
+ maxResults=100,
46
+ pageToken=next_page_token
47
+ ).execute()
48
+
49
+ for item in response['items']:
50
+ comment = item['snippet']['topLevelComment']['snippet']
51
+ comments.append({
52
+ 'VideoID': video_id,
53
+ 'Channel': comment.get('authorChannelUrl', ''),
54
+ 'CommentedDateTime': comment['publishedAt'],
55
+ 'NumOfCommentlikes': comment['likeCount'],
56
+ 'Comment': comment['textDisplay'],
57
+ 'CommentedUserID': comment['authorChannelId']['value']
58
+ })
59
+
60
+ # Handle replies (if any)
61
+ if 'replies' in item:
62
+ for reply in item['replies']['comments']:
63
+ reply_snippet = reply['snippet']
64
+ comments.append({
65
+ 'VideoID': video_id,
66
+ 'Channel': reply_snippet.get('authorChannelUrl', ''),
67
+ 'CommentedDateTime': reply_snippet['publishedAt'],
68
+ 'NumOfCommentlikes': reply_snippet['likeCount'],
69
+ 'Comment': reply_snippet['textDisplay'],
70
+ 'CommentedUserID': reply_snippet['authorChannelId']['value']
71
+ })
72
+
73
+ next_page_token = response.get('nextPageToken')
74
+ if not next_page_token:
75
+ break
76
+
77
+ except HttpError as e:
78
+ st.error(f"An error occurred while fetching comments: {e}")
79
+ break
80
+
81
+ return comments
82
+
83
+ # Action button
84
+ if st.button("Start Scraping"):
85
+ if api_key and search_query and num_videos:
86
+ st.text("Searching for videos...")
87
+ videos = search_videos(api_key, search_query, num_videos)
88
+
89
+ if not videos:
90
+ st.warning("No videos found for the given query.")
91
+ else:
92
+ st.text(f"Found {len(videos)} videos. Extracting comments...")
93
+ all_comments = []
94
+ progress_bar = st.progress(0)
95
+
96
+ for idx, video in enumerate(videos):
97
+ video_id = video['video_id']
98
+ st.text(f"Fetching comments for video: {video['title']} (ID: {video_id})")
99
+ video_comments = get_video_comments(api_key, video_id)
100
+ all_comments.extend(video_comments)
101
+
102
+ # Update progress bar
103
+ progress = (idx + 1) / len(videos)
104
+ progress_bar.progress(progress)
105
+
106
+ # Save all comments to a CSV file
107
+ if all_comments:
108
+ df_comments = pd.DataFrame(all_comments)
109
+ csv_file = f"{search_query.replace(' ', '_')}_comments.csv"
110
+ df_comments.to_csv(csv_file, index=False)
111
+ st.success(f"Comments extracted and saved to {csv_file}")
112
+
113
+ # Show dataframe details
114
+ st.write("First 5 rows of the extracted comments:")
115
+ st.dataframe(df_comments.head())
116
+
117
+ st.download_button(
118
+ label="Download CSV",
119
+ data=df_comments.to_csv(index=False),
120
+ file_name=csv_file,
121
+ mime='text/csv'
122
+ )
123
+ else:
124
+ st.warning("No comments found for the selected videos.")
125
+ else:
126
+ st.warning("Please enter your API key, search query, and number of videos.")