Spaces:
Sleeping
Sleeping
img
Browse files
app.py
CHANGED
@@ -457,25 +457,24 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
457 |
# print("===確認其他衍生文件 end ===")
|
458 |
|
459 |
# 處理截圖
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
|
468 |
# 更新逐字稿文件
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
return transcript
|
479 |
|
480 |
def process_youtube_link(link):
|
481 |
# 使用 YouTube API 获取逐字稿
|
@@ -501,8 +500,8 @@ def process_youtube_link(link):
|
|
501 |
start_time = format_seconds_to_time(entry['start'])
|
502 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
503 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
504 |
-
|
505 |
-
img_file_id =""
|
506 |
# 先取消 Google Drive 的图片
|
507 |
# screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
|
508 |
screenshot_path = img_file_id
|
@@ -534,8 +533,8 @@ def process_youtube_link(link):
|
|
534 |
summary = summary_json["summary"]
|
535 |
html_content = format_transcript_to_html(formatted_transcript)
|
536 |
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
537 |
-
|
538 |
-
first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png"
|
539 |
first_text = formatted_transcript[0]['text']
|
540 |
mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
|
541 |
mind_map = mind_map_json["mind_map"]
|
|
|
457 |
# print("===確認其他衍生文件 end ===")
|
458 |
|
459 |
# 處理截圖
|
460 |
+
for entry in transcript:
|
461 |
+
if 'img_file_id' not in entry:
|
462 |
+
screenshot_path = screenshot_youtube_video(video_id, entry['start'])
|
463 |
+
screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
|
464 |
+
img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
|
465 |
+
entry['img_file_id'] = img_file_id
|
466 |
+
print(f"截图已上传到GCS: {img_file_id}")
|
467 |
|
468 |
# 更新逐字稿文件
|
469 |
+
print("===更新逐字稿文件===")
|
470 |
+
print(transcript)
|
471 |
+
print("===更新逐字稿文件===")
|
472 |
+
updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
473 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
|
474 |
+
print("逐字稿已更新,包括截图链接")
|
475 |
+
updated_transcript_json = json.loads(updated_transcript_text)
|
476 |
+
|
477 |
+
return updated_transcript_json
|
|
|
478 |
|
479 |
def process_youtube_link(link):
|
480 |
# 使用 YouTube API 获取逐字稿
|
|
|
500 |
start_time = format_seconds_to_time(entry['start'])
|
501 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
502 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
503 |
+
img_file_id = entry['img_file_id']
|
504 |
+
# img_file_id =""
|
505 |
# 先取消 Google Drive 的图片
|
506 |
# screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
|
507 |
screenshot_path = img_file_id
|
|
|
533 |
summary = summary_json["summary"]
|
534 |
html_content = format_transcript_to_html(formatted_transcript)
|
535 |
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
536 |
+
first_image = formatted_transcript[0]['screenshot_path']
|
537 |
+
# first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png"
|
538 |
first_text = formatted_transcript[0]['text']
|
539 |
mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
|
540 |
mind_map = mind_map_json["mind_map"]
|