Spaces:

JunyiAcademy
/

vaitor2

Running

App Files Files Community

youngtsai commited on Mar 1, 2024

Commit

1e6cfb0

1 Parent(s): 5c4e35b

img

Browse files

Files changed (1) hide show

app.py +20 -21

app.py CHANGED Viewed

@@ -457,25 +457,24 @@ def process_transcript_and_screenshots_on_gcs(video_id):
     # print("===確認其他衍生文件 end ===")
     # 處理截圖
-    # for entry in transcript:
-    #     if 'img_file_id' not in entry:
-    #         screenshot_path = screenshot_youtube_video(video_id, entry['start'])
-    #         screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
-    #         img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
-    #         entry['img_file_id'] = img_file_id
-    #         print(f"截图已上传到GCS: {img_file_id}")
     # 更新逐字稿文件
-    # print("===更新逐字稿文件===")
-    # print(transcript)
-    # print("===更新逐字稿文件===")
-    # updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
-    # upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
-    # print("逐字稿已更新，包括截图链接")
-    # updated_transcript_json = json.loads(updated_transcript_text)
-    # return updated_transcript_json
-    return transcript
 def process_youtube_link(link):
     # 使用 YouTube API 获取逐字稿
@@ -501,8 +500,8 @@ def process_youtube_link(link):
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
-        # img_file_id = entry['img_file_id']
-        img_file_id =""
         # 先取消 Google Drive 的图片
         # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
         screenshot_path = img_file_id
@@ -534,8 +533,8 @@ def process_youtube_link(link):
     summary = summary_json["summary"]
     html_content = format_transcript_to_html(formatted_transcript)
     simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
-    # first_image = formatted_transcript[0]['screenshot_path']
-    first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png"
     first_text = formatted_transcript[0]['text']
     mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
     mind_map = mind_map_json["mind_map"]

     # print("===確認其他衍生文件 end ===")
     # 處理截圖
+    for entry in transcript:
+        if 'img_file_id' not in entry:
+            screenshot_path = screenshot_youtube_video(video_id, entry['start'])
+            screenshot_blob_name = f"{video_id}/{video_id}_{entry['start']}.jpg"
+            img_file_id = upload_img_and_get_public_url(gcs_client, bucket_name, screenshot_blob_name, screenshot_path)
+            entry['img_file_id'] = img_file_id
+            print(f"截图已上传到GCS: {img_file_id}")
     # 更新逐字稿文件
+    print("===更新逐字稿文件===")
+    print(transcript)
+    print("===更新逐字稿文件===")
+    updated_transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
+    upload_file_to_gcs_with_json_string(gcs_client, bucket_name, transcript_blob_name, updated_transcript_text)
+    print("逐字稿已更新，包括截图链接")
+    updated_transcript_json = json.loads(updated_transcript_text)
+    return updated_transcript_json
 def process_youtube_link(link):
     # 使用 YouTube API 获取逐字稿
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
+        img_file_id = entry['img_file_id']
+        # img_file_id =""
         # 先取消 Google Drive 的图片
         # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
         screenshot_path = img_file_id
     summary = summary_json["summary"]
     html_content = format_transcript_to_html(formatted_transcript)
     simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
+    first_image = formatted_transcript[0]['screenshot_path']
+    # first_image = "https://www.nameslook.com/names/dfsadf-nameslook.png"
     first_text = formatted_transcript[0]['text']
     mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
     mind_map = mind_map_json["mind_map"]