oceansweep commited on
Commit
9a2dd5b
1 Parent(s): 13b4956

Update App_Function_Libraries/Video_DL_Ingestion_Lib.py

Browse files
App_Function_Libraries/Video_DL_Ingestion_Lib.py CHANGED
@@ -1,331 +1,331 @@
1
- # Video_DL_Ingestion_Lib.py
2
- #########################################
3
- # Video Downloader and Ingestion Library
4
- # This library is used to handle downloading videos from YouTube and other platforms.
5
- # It also handles the ingestion of the videos into the database.
6
- # It uses yt-dlp to extract video information and download the videos.
7
- ####
8
- import json
9
- ####################
10
- # Function List
11
- #
12
- # 1. get_video_info(url)
13
- # 2. create_download_directory(title)
14
- # 3. sanitize_filename(title)
15
- # 4. normalize_title(title)
16
- # 5. get_youtube(video_url)
17
- # 6. get_playlist_videos(playlist_url)
18
- # 7. download_video(video_url, download_path, info_dict, download_video_flag)
19
- # 8. save_to_file(video_urls, filename)
20
- # 9. save_summary_to_file(summary, file_path)
21
- # 10. process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, download_video, download_audio, rolling_summarization, detail_level, question_box, keywords, chunk_summarization, chunk_duration_input, words_per_second_input)
22
- #
23
- #
24
- ####################
25
- # Import necessary libraries to run solo for testing
26
- import logging
27
- import os
28
- import re
29
- import sys
30
- from urllib.parse import urlparse, parse_qs
31
-
32
- import unicodedata
33
- # 3rd-Party Imports
34
- import yt_dlp
35
-
36
- from App_Function_Libraries.DB_Manager import check_media_and_whisper_model
37
-
38
-
39
- # Import Local
40
- #
41
- #######################################################################################################################
42
- # Function Definitions
43
- #
44
-
45
- def normalize_title(title):
46
- # Normalize the string to 'NFKD' form and encode to 'ascii' ignoring non-ascii characters
47
- title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore').decode('ascii')
48
- title = title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('"', '').replace('*', '').replace('?',
49
- '').replace(
50
- '<', '').replace('>', '').replace('|', '')
51
- return title
52
-
53
- def get_video_info(url: str) -> dict:
54
- ydl_opts = {
55
- 'quiet': True,
56
- 'no_warnings': True,
57
- 'skip_download': True,
58
- }
59
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
60
- try:
61
- info_dict = ydl.extract_info(url, download=False)
62
- return info_dict
63
- except Exception as e:
64
- logging.error(f"Error extracting video info: {e}")
65
- return None
66
-
67
-
68
- def get_youtube(video_url):
69
- ydl_opts = {
70
- 'format': 'bestaudio[ext=m4a]',
71
- 'noplaylist': False,
72
- 'quiet': True,
73
- 'extract_flat': True
74
- }
75
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
76
- logging.debug("About to extract youtube info")
77
- info_dict = ydl.extract_info(video_url, download=False)
78
- logging.debug("Youtube info successfully extracted")
79
- return info_dict
80
-
81
-
82
- def get_playlist_videos(playlist_url):
83
- ydl_opts = {
84
- 'extract_flat': True,
85
- 'skip_download': True,
86
- 'quiet': True
87
- }
88
-
89
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
90
- info = ydl.extract_info(playlist_url, download=False)
91
-
92
- if 'entries' in info:
93
- video_urls = [entry['url'] for entry in info['entries']]
94
- playlist_title = info['title']
95
- return video_urls, playlist_title
96
- else:
97
- print("No videos found in the playlist.")
98
- return [], None
99
-
100
-
101
- def download_video(video_url, download_path, info_dict, download_video_flag, current_whisper_model):
102
- global video_file_path, ffmpeg_path
103
- global audio_file_path
104
-
105
- # Normalize Video Title name
106
- logging.debug("About to normalize downloaded video title")
107
- if 'title' not in info_dict or 'ext' not in info_dict:
108
- logging.error("info_dict is missing 'title' or 'ext'")
109
- return None
110
-
111
- normalized_video_title = normalize_title(info_dict['title'])
112
-
113
- # Check if media already exists in the database and compare whisper models
114
- should_download, reason = check_media_and_whisper_model(
115
- title=normalized_video_title,
116
- url=video_url,
117
- current_whisper_model=current_whisper_model
118
- )
119
-
120
- if not should_download:
121
- logging.info(f"Skipping download: {reason}")
122
- return None
123
-
124
- logging.info(f"Proceeding with download: {reason}")
125
-
126
- video_file_path = os.path.join(download_path, f"{normalized_video_title}.{info_dict['ext']}")
127
-
128
- # Check for existence of video file
129
- if os.path.exists(video_file_path):
130
- logging.info(f"Video file already exists: {video_file_path}")
131
- return video_file_path
132
-
133
- # Setup path handling for ffmpeg on different OSs
134
- if sys.platform.startswith('win'):
135
- ffmpeg_path = os.path.join(os.getcwd(), 'Bin', 'ffmpeg.exe')
136
- elif sys.platform.startswith('linux'):
137
- ffmpeg_path = 'ffmpeg'
138
- elif sys.platform.startswith('darwin'):
139
- ffmpeg_path = 'ffmpeg'
140
-
141
- if download_video_flag:
142
- video_file_path = os.path.join(download_path, f"{normalized_video_title}.mp4")
143
- ydl_opts_video = {
144
- 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]',
145
- 'outtmpl': video_file_path,
146
- 'ffmpeg_location': ffmpeg_path
147
- }
148
-
149
- try:
150
- with yt_dlp.YoutubeDL(ydl_opts_video) as ydl:
151
- logging.debug("yt_dlp: About to download video with youtube-dl")
152
- ydl.download([video_url])
153
- logging.debug("yt_dlp: Video successfully downloaded with youtube-dl")
154
- if os.path.exists(video_file_path):
155
- return video_file_path
156
- else:
157
- logging.error("yt_dlp: Video file not found after download")
158
- return None
159
- except Exception as e:
160
- logging.error(f"yt_dlp: Error downloading video: {e}")
161
- return None
162
- elif not download_video_flag:
163
- video_file_path = os.path.join(download_path, f"{normalized_video_title}.mp4")
164
- # Set options for video and audio
165
- ydl_opts = {
166
- 'format': 'bestaudio[ext=m4a]',
167
- 'quiet': True,
168
- 'outtmpl': video_file_path
169
- }
170
-
171
- try:
172
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
173
- logging.debug("yt_dlp: About to download video with youtube-dl")
174
- ydl.download([video_url])
175
- logging.debug("yt_dlp: Video successfully downloaded with youtube-dl")
176
- if os.path.exists(video_file_path):
177
- return video_file_path
178
- else:
179
- logging.error("yt_dlp: Video file not found after download")
180
- return None
181
- except Exception as e:
182
- logging.error(f"yt_dlp: Error downloading video: {e}")
183
- return None
184
-
185
- else:
186
- logging.debug("download_video: Download video flag is set to False and video file path is not found")
187
- return None
188
-
189
-
190
- def extract_video_info(url):
191
- try:
192
- with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
193
- info = ydl.extract_info(url, download=False)
194
-
195
- # Log only a subset of the info to avoid overwhelming the logs
196
- log_info = {
197
- 'title': info.get('title'),
198
- 'duration': info.get('duration'),
199
- 'upload_date': info.get('upload_date')
200
- }
201
- logging.debug(f"Extracted info for {url}: {log_info}")
202
-
203
- return info
204
- except Exception as e:
205
- logging.error(f"Error extracting video info for {url}: {str(e)}", exc_info=True)
206
- return None
207
-
208
-
209
- def get_youtube_playlist_urls(playlist_id):
210
- ydl_opts = {
211
- 'extract_flat': True,
212
- 'quiet': True,
213
- }
214
-
215
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
216
- result = ydl.extract_info(f'https://www.youtube.com/playlist?list={playlist_id}', download=False)
217
- return [entry['url'] for entry in result['entries'] if entry.get('url')]
218
-
219
-
220
- def parse_and_expand_urls(urls):
221
- logging.info(f"Starting parse_and_expand_urls with input: {urls}")
222
- expanded_urls = []
223
-
224
- for url in urls:
225
- try:
226
- logging.info(f"Processing URL: {url}")
227
- parsed_url = urlparse(url)
228
- logging.debug(f"Parsed URL components: {parsed_url}")
229
-
230
- # YouTube playlist handling
231
- if 'youtube.com' in parsed_url.netloc and 'list' in parsed_url.query:
232
- playlist_id = parse_qs(parsed_url.query)['list'][0]
233
- logging.info(f"Detected YouTube playlist with ID: {playlist_id}")
234
- playlist_urls = get_youtube_playlist_urls(playlist_id)
235
- logging.info(f"Expanded playlist URLs: {playlist_urls}")
236
- expanded_urls.extend(playlist_urls)
237
-
238
- # YouTube short URL handling
239
- elif 'youtu.be' in parsed_url.netloc:
240
- video_id = parsed_url.path.lstrip('/')
241
- full_url = f'https://www.youtube.com/watch?v={video_id}'
242
- logging.info(f"Expanded YouTube short URL to: {full_url}")
243
- expanded_urls.append(full_url)
244
-
245
- # Vimeo handling
246
- elif 'vimeo.com' in parsed_url.netloc:
247
- video_id = parsed_url.path.lstrip('/')
248
- full_url = f'https://vimeo.com/{video_id}'
249
- logging.info(f"Processed Vimeo URL: {full_url}")
250
- expanded_urls.append(full_url)
251
-
252
- # Add more platform-specific handling here
253
-
254
- else:
255
- logging.info(f"URL not recognized as special case, adding as-is: {url}")
256
- expanded_urls.append(url)
257
-
258
- except Exception as e:
259
- logging.error(f"Error processing URL {url}: {str(e)}", exc_info=True)
260
- # Optionally, you might want to add the problematic URL to expanded_urls
261
- # expanded_urls.append(url)
262
-
263
- logging.info(f"Final expanded URLs: {expanded_urls}")
264
- return expanded_urls
265
-
266
-
267
- def extract_metadata(url, use_cookies=False, cookies=None):
268
- ydl_opts = {
269
- 'quiet': True,
270
- 'no_warnings': True,
271
- 'extract_flat': True,
272
- 'skip_download': True,
273
- }
274
-
275
- if use_cookies and cookies:
276
- try:
277
- cookie_dict = json.loads(cookies)
278
- ydl_opts['cookiefile'] = cookie_dict
279
- except json.JSONDecodeError:
280
- logging.warning("Invalid cookie format. Proceeding without cookies.")
281
-
282
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
283
- try:
284
- info = ydl.extract_info(url, download=False)
285
- metadata = {
286
- 'title': info.get('title'),
287
- 'uploader': info.get('uploader'),
288
- 'upload_date': info.get('upload_date'),
289
- 'view_count': info.get('view_count'),
290
- 'like_count': info.get('like_count'),
291
- 'duration': info.get('duration'),
292
- 'tags': info.get('tags'),
293
- 'description': info.get('description')
294
- }
295
-
296
- # Create a safe subset of metadata to log
297
- safe_metadata = {
298
- 'title': metadata.get('title', 'No title'),
299
- 'duration': metadata.get('duration', 'Unknown duration'),
300
- 'upload_date': metadata.get('upload_date', 'Unknown upload date'),
301
- 'uploader': metadata.get('uploader', 'Unknown uploader')
302
- }
303
-
304
- logging.info(f"Successfully extracted metadata for {url}: {safe_metadata}")
305
- return metadata
306
- except Exception as e:
307
- logging.error(f"Error extracting metadata for {url}: {str(e)}", exc_info=True)
308
- return None
309
-
310
-
311
- def generate_timestamped_url(url, hours, minutes, seconds):
312
- # Extract video ID from the URL
313
- video_id_match = re.search(r'(?:v=|)([0-9A-Za-z_-]{11}).*', url)
314
- if not video_id_match:
315
- return "Invalid YouTube URL"
316
-
317
- video_id = video_id_match.group(1)
318
-
319
- # Calculate total seconds
320
- total_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds)
321
-
322
- # Generate the new URL
323
- new_url = f"https://www.youtube.com/watch?v={video_id}&t={total_seconds}s"
324
-
325
- return new_url
326
-
327
-
328
-
329
- #
330
- #
331
- #######################################################################################################################
 
1
+ # Video_DL_Ingestion_Lib.py
2
+ #########################################
3
+ # Video Downloader and Ingestion Library
4
+ # This library is used to handle downloading videos from YouTube and other platforms.
5
+ # It also handles the ingestion of the videos into the database.
6
+ # It uses yt-dlp to extract video information and download the videos.
7
+ ####
8
+ import json
9
+ ####################
10
+ # Function List
11
+ #
12
+ # 1. get_video_info(url)
13
+ # 2. create_download_directory(title)
14
+ # 3. sanitize_filename(title)
15
+ # 4. normalize_title(title)
16
+ # 5. get_youtube(video_url)
17
+ # 6. get_playlist_videos(playlist_url)
18
+ # 7. download_video(video_url, download_path, info_dict, download_video_flag)
19
+ # 8. save_to_file(video_urls, filename)
20
+ # 9. save_summary_to_file(summary, file_path)
21
+ # 10. process_url(url, num_speakers, whisper_model, custom_prompt, offset, api_name, api_key, vad_filter, download_video, download_audio, rolling_summarization, detail_level, question_box, keywords, chunk_summarization, chunk_duration_input, words_per_second_input)
22
+ #
23
+ #
24
+ ####################
25
+ # Import necessary libraries to run solo for testing
26
+ import logging
27
+ import os
28
+ import re
29
+ import sys
30
+ from urllib.parse import urlparse, parse_qs
31
+
32
+ import unicodedata
33
+ # 3rd-Party Imports
34
+ import yt_dlp
35
+
36
+ from App_Function_Libraries.DB.DB_Manager import check_media_and_whisper_model
37
+
38
+
39
+ # Import Local
40
+ #
41
+ #######################################################################################################################
42
+ # Function Definitions
43
+ #
44
+
45
+ def normalize_title(title):
46
+ # Normalize the string to 'NFKD' form and encode to 'ascii' ignoring non-ascii characters
47
+ title = unicodedata.normalize('NFKD', title).encode('ascii', 'ignore').decode('ascii')
48
+ title = title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('"', '').replace('*', '').replace('?',
49
+ '').replace(
50
+ '<', '').replace('>', '').replace('|', '')
51
+ return title
52
+
53
+ def get_video_info(url: str) -> dict:
54
+ ydl_opts = {
55
+ 'quiet': True,
56
+ 'no_warnings': True,
57
+ 'skip_download': True,
58
+ }
59
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
60
+ try:
61
+ info_dict = ydl.extract_info(url, download=False)
62
+ return info_dict
63
+ except Exception as e:
64
+ logging.error(f"Error extracting video info: {e}")
65
+ return None
66
+
67
+
68
+ def get_youtube(video_url):
69
+ ydl_opts = {
70
+ 'format': 'bestaudio[ext=m4a]',
71
+ 'noplaylist': False,
72
+ 'quiet': True,
73
+ 'extract_flat': True
74
+ }
75
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
76
+ logging.debug("About to extract youtube info")
77
+ info_dict = ydl.extract_info(video_url, download=False)
78
+ logging.debug("Youtube info successfully extracted")
79
+ return info_dict
80
+
81
+
82
+ def get_playlist_videos(playlist_url):
83
+ ydl_opts = {
84
+ 'extract_flat': True,
85
+ 'skip_download': True,
86
+ 'quiet': True
87
+ }
88
+
89
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
90
+ info = ydl.extract_info(playlist_url, download=False)
91
+
92
+ if 'entries' in info:
93
+ video_urls = [entry['url'] for entry in info['entries']]
94
+ playlist_title = info['title']
95
+ return video_urls, playlist_title
96
+ else:
97
+ print("No videos found in the playlist.")
98
+ return [], None
99
+
100
+
101
+ def download_video(video_url, download_path, info_dict, download_video_flag, current_whisper_model):
102
+ global video_file_path, ffmpeg_path
103
+ global audio_file_path
104
+
105
+ # Normalize Video Title name
106
+ logging.debug("About to normalize downloaded video title")
107
+ if 'title' not in info_dict or 'ext' not in info_dict:
108
+ logging.error("info_dict is missing 'title' or 'ext'")
109
+ return None
110
+
111
+ normalized_video_title = normalize_title(info_dict['title'])
112
+
113
+ # Check if media already exists in the database and compare whisper models
114
+ should_download, reason = check_media_and_whisper_model(
115
+ title=normalized_video_title,
116
+ url=video_url,
117
+ current_whisper_model=current_whisper_model
118
+ )
119
+
120
+ if not should_download:
121
+ logging.info(f"Skipping download: {reason}")
122
+ return None
123
+
124
+ logging.info(f"Proceeding with download: {reason}")
125
+
126
+ video_file_path = os.path.join(download_path, f"{normalized_video_title}.{info_dict['ext']}")
127
+
128
+ # Check for existence of video file
129
+ if os.path.exists(video_file_path):
130
+ logging.info(f"Video file already exists: {video_file_path}")
131
+ return video_file_path
132
+
133
+ # Setup path handling for ffmpeg on different OSs
134
+ if sys.platform.startswith('win'):
135
+ ffmpeg_path = os.path.join(os.getcwd(), 'Bin', 'ffmpeg.exe')
136
+ elif sys.platform.startswith('linux'):
137
+ ffmpeg_path = 'ffmpeg'
138
+ elif sys.platform.startswith('darwin'):
139
+ ffmpeg_path = 'ffmpeg'
140
+
141
+ if download_video_flag:
142
+ video_file_path = os.path.join(download_path, f"{normalized_video_title}.mp4")
143
+ ydl_opts_video = {
144
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]',
145
+ 'outtmpl': video_file_path,
146
+ 'ffmpeg_location': ffmpeg_path
147
+ }
148
+
149
+ try:
150
+ with yt_dlp.YoutubeDL(ydl_opts_video) as ydl:
151
+ logging.debug("yt_dlp: About to download video with youtube-dl")
152
+ ydl.download([video_url])
153
+ logging.debug("yt_dlp: Video successfully downloaded with youtube-dl")
154
+ if os.path.exists(video_file_path):
155
+ return video_file_path
156
+ else:
157
+ logging.error("yt_dlp: Video file not found after download")
158
+ return None
159
+ except Exception as e:
160
+ logging.error(f"yt_dlp: Error downloading video: {e}")
161
+ return None
162
+ elif not download_video_flag:
163
+ video_file_path = os.path.join(download_path, f"{normalized_video_title}.mp4")
164
+ # Set options for video and audio
165
+ ydl_opts = {
166
+ 'format': 'bestaudio[ext=m4a]',
167
+ 'quiet': True,
168
+ 'outtmpl': video_file_path
169
+ }
170
+
171
+ try:
172
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
173
+ logging.debug("yt_dlp: About to download video with youtube-dl")
174
+ ydl.download([video_url])
175
+ logging.debug("yt_dlp: Video successfully downloaded with youtube-dl")
176
+ if os.path.exists(video_file_path):
177
+ return video_file_path
178
+ else:
179
+ logging.error("yt_dlp: Video file not found after download")
180
+ return None
181
+ except Exception as e:
182
+ logging.error(f"yt_dlp: Error downloading video: {e}")
183
+ return None
184
+
185
+ else:
186
+ logging.debug("download_video: Download video flag is set to False and video file path is not found")
187
+ return None
188
+
189
+
190
+ def extract_video_info(url):
191
+ try:
192
+ with yt_dlp.YoutubeDL({'quiet': True}) as ydl:
193
+ info = ydl.extract_info(url, download=False)
194
+
195
+ # Log only a subset of the info to avoid overwhelming the logs
196
+ log_info = {
197
+ 'title': info.get('title'),
198
+ 'duration': info.get('duration'),
199
+ 'upload_date': info.get('upload_date')
200
+ }
201
+ logging.debug(f"Extracted info for {url}: {log_info}")
202
+
203
+ return info
204
+ except Exception as e:
205
+ logging.error(f"Error extracting video info for {url}: {str(e)}", exc_info=True)
206
+ return None
207
+
208
+
209
+ def get_youtube_playlist_urls(playlist_id):
210
+ ydl_opts = {
211
+ 'extract_flat': True,
212
+ 'quiet': True,
213
+ }
214
+
215
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
216
+ result = ydl.extract_info(f'https://www.youtube.com/playlist?list={playlist_id}', download=False)
217
+ return [entry['url'] for entry in result['entries'] if entry.get('url')]
218
+
219
+
220
+ def parse_and_expand_urls(urls):
221
+ logging.info(f"Starting parse_and_expand_urls with input: {urls}")
222
+ expanded_urls = []
223
+
224
+ for url in urls:
225
+ try:
226
+ logging.info(f"Processing URL: {url}")
227
+ parsed_url = urlparse(url)
228
+ logging.debug(f"Parsed URL components: {parsed_url}")
229
+
230
+ # YouTube playlist handling
231
+ if 'youtube.com' in parsed_url.netloc and 'list' in parsed_url.query:
232
+ playlist_id = parse_qs(parsed_url.query)['list'][0]
233
+ logging.info(f"Detected YouTube playlist with ID: {playlist_id}")
234
+ playlist_urls = get_youtube_playlist_urls(playlist_id)
235
+ logging.info(f"Expanded playlist URLs: {playlist_urls}")
236
+ expanded_urls.extend(playlist_urls)
237
+
238
+ # YouTube short URL handling
239
+ elif 'youtu.be' in parsed_url.netloc:
240
+ video_id = parsed_url.path.lstrip('/')
241
+ full_url = f'https://www.youtube.com/watch?v={video_id}'
242
+ logging.info(f"Expanded YouTube short URL to: {full_url}")
243
+ expanded_urls.append(full_url)
244
+
245
+ # Vimeo handling
246
+ elif 'vimeo.com' in parsed_url.netloc:
247
+ video_id = parsed_url.path.lstrip('/')
248
+ full_url = f'https://vimeo.com/{video_id}'
249
+ logging.info(f"Processed Vimeo URL: {full_url}")
250
+ expanded_urls.append(full_url)
251
+
252
+ # Add more platform-specific handling here
253
+
254
+ else:
255
+ logging.info(f"URL not recognized as special case, adding as-is: {url}")
256
+ expanded_urls.append(url)
257
+
258
+ except Exception as e:
259
+ logging.error(f"Error processing URL {url}: {str(e)}", exc_info=True)
260
+ # Optionally, you might want to add the problematic URL to expanded_urls
261
+ # expanded_urls.append(url)
262
+
263
+ logging.info(f"Final expanded URLs: {expanded_urls}")
264
+ return expanded_urls
265
+
266
+
267
+ def extract_metadata(url, use_cookies=False, cookies=None):
268
+ ydl_opts = {
269
+ 'quiet': True,
270
+ 'no_warnings': True,
271
+ 'extract_flat': True,
272
+ 'skip_download': True,
273
+ }
274
+
275
+ if use_cookies and cookies:
276
+ try:
277
+ cookie_dict = json.loads(cookies)
278
+ ydl_opts['cookiefile'] = cookie_dict
279
+ except json.JSONDecodeError:
280
+ logging.warning("Invalid cookie format. Proceeding without cookies.")
281
+
282
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
283
+ try:
284
+ info = ydl.extract_info(url, download=False)
285
+ metadata = {
286
+ 'title': info.get('title'),
287
+ 'uploader': info.get('uploader'),
288
+ 'upload_date': info.get('upload_date'),
289
+ 'view_count': info.get('view_count'),
290
+ 'like_count': info.get('like_count'),
291
+ 'duration': info.get('duration'),
292
+ 'tags': info.get('tags'),
293
+ 'description': info.get('description')
294
+ }
295
+
296
+ # Create a safe subset of metadata to log
297
+ safe_metadata = {
298
+ 'title': metadata.get('title', 'No title'),
299
+ 'duration': metadata.get('duration', 'Unknown duration'),
300
+ 'upload_date': metadata.get('upload_date', 'Unknown upload date'),
301
+ 'uploader': metadata.get('uploader', 'Unknown uploader')
302
+ }
303
+
304
+ logging.info(f"Successfully extracted metadata for {url}: {safe_metadata}")
305
+ return metadata
306
+ except Exception as e:
307
+ logging.error(f"Error extracting metadata for {url}: {str(e)}", exc_info=True)
308
+ return None
309
+
310
+
311
+ def generate_timestamped_url(url, hours, minutes, seconds):
312
+ # Extract video ID from the URL
313
+ video_id_match = re.search(r'(?:v=|)([0-9A-Za-z_-]{11}).*', url)
314
+ if not video_id_match:
315
+ return "Invalid YouTube URL"
316
+
317
+ video_id = video_id_match.group(1)
318
+
319
+ # Calculate total seconds
320
+ total_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds)
321
+
322
+ # Generate the new URL
323
+ new_url = f"https://www.youtube.com/watch?v={video_id}&t={total_seconds}s"
324
+
325
+ return new_url
326
+
327
+
328
+
329
+ #
330
+ #
331
+ #######################################################################################################################