youngtsai commited on
Commit
1e6cfb0
·
1 Parent(s): 5c4e35b
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -457,25 +457,24 @@ def process_transcript_and_screenshots_on_gcs(video_id):
457
  # print("===確認其他衍生文件 end ===")
458
 
459
  # 處理截圖
460
- # for entry in transcript:
461
- # if 'img_file_id' not in entry:
462
- # screenshot_path = screenshot_youtube_video(video_id, entry['start'])
463
- # screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
464
- # img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
465
- # entry['img_file_id'] = img_file_id
466
- # print(f"截图已上传到GCS: {img_file_id}")
467
 
468
  # 更新逐字稿文件
469
- # print("===更新逐字稿文件===")
470
- # print(transcript)
471
- # print("===更新逐字稿文件===")
472
- # updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
473
- # upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
474
- # print("逐字稿已更新,包括截图链接")
475
- # updated_transcript_json = json.loads(updated_transcript_text)
476
-
477
- # return updated_transcript_json
478
- return transcript
479
 
480
  def process_youtube_link(link):
481
  # 使用 YouTube API 获取逐字稿
@@ -501,8 +500,8 @@ def process_youtube_link(link):
501
  start_time = format_seconds_to_time(entry['start'])
502
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
503
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
504
- # img_file_id = entry['img_file_id']
505
- img_file_id =""
506
  # 先取消 Google Drive 的图片
507
  # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
508
  screenshot_path = img_file_id
@@ -534,8 +533,8 @@ def process_youtube_link(link):
534
  summary = summary_json["summary"]
535
  html_content = format_transcript_to_html(formatted_transcript)
536
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
537
- # first_image = formatted_transcript[0]['screenshot_path']
538
- first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png"
539
  first_text = formatted_transcript[0]['text']
540
  mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
541
  mind_map = mind_map_json["mind_map"]
 
457
  # print("===確認其他衍生文件 end ===")
458
 
459
  # 處理截圖
460
+ for entry in transcript:
461
+ if 'img_file_id' not in entry:
462
+ screenshot_path = screenshot_youtube_video(video_id, entry['start'])
463
+ screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
464
+ img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
465
+ entry['img_file_id'] = img_file_id
466
+ print(f"截图已上传到GCS: {img_file_id}")
467
 
468
  # 更新逐字稿文件
469
+ print("===更新逐字稿文件===")
470
+ print(transcript)
471
+ print("===更新逐字稿文件===")
472
+ updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
473
+ upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
474
+ print("逐字稿已更新,包括截图链接")
475
+ updated_transcript_json = json.loads(updated_transcript_text)
476
+
477
+ return updated_transcript_json
 
478
 
479
  def process_youtube_link(link):
480
  # 使用 YouTube API 获取逐字稿
 
500
  start_time = format_seconds_to_time(entry['start'])
501
  end_time = format_seconds_to_time(entry['start'] + entry['duration'])
502
  embed_url = get_embedded_youtube_link(video_id, entry['start'])
503
+ img_file_id = entry['img_file_id']
504
+ # img_file_id =""
505
  # 先取消 Google Drive 的图片
506
  # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
507
  screenshot_path = img_file_id
 
533
  summary = summary_json["summary"]
534
  html_content = format_transcript_to_html(formatted_transcript)
535
  simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
536
+ first_image = formatted_transcript[0]['screenshot_path']
537
+ # first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png"
538
  first_text = formatted_transcript[0]['text']
539
  mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
540
  mind_map = mind_map_json["mind_map"]