Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Apr 6, 2024

Commit

6d0cfe0

1 Parent(s): a7204e6

elif kind == "key_moments":

Browse files

Files changed (1) hide show

app.py +75 -4

app.py CHANGED Viewed

@@ -610,7 +610,6 @@ def process_youtube_link(password, link):
     # 基于逐字稿生成其他所需的输出
     source = "gcs"
     questions = get_questions(video_id, formatted_simple_transcript, source)
-    formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
     summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
     summary = summary_json["summary"]
     key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source)
@@ -650,6 +649,37 @@ def process_youtube_link(password, link):
         subject, \
         grade
 def format_transcript_to_html(formatted_transcript):
     html_content = ""
     for entry in formatted_transcript:
@@ -1407,6 +1437,17 @@ def update_LLM_content(video_id, new_content, kind):
         mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
         updated_content = mind_map_text
     elif kind == "transcript":
         if isinstance(new_content, str):
             transcript_json = json.loads(new_content)
@@ -1432,6 +1473,16 @@ def create_LLM_content(video_id, df_string, kind):
     elif kind == "mind_map":
         content = generate_mind_map(df_string)
         update_LLM_content(video_id, content, kind)
     elif kind == "transcript":
         content = process_transcript_and_screenshots_on_gcs(video_id)
         update_LLM_content(video_id, content, kind)
@@ -2086,8 +2137,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
                     ai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath")
                 with gr.Row():
                     ai_msg = gr.Textbox(label="Message",scale=3)
-                    ai_send_button = gr.Button("Send", variant="primary",scale=1)
     with gr.Tab("文章模式"):
         with gr.Row() as reading_passage_admin:
             reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False)
@@ -2111,7 +2161,6 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
     with gr.Tab("關鍵時刻"):
         with gr.Row():
             key_moments_html = gr.HTML(value="")
     with gr.Tab("教學備課"):
         with gr.Row():
             content_subject = gr.Dropdown(label="選擇主題", choices=["數學", "自然", "國文", "英文", "社會","物理", "化學", "生物", "地理", "歷史", "公民"], value="", visible=False)
@@ -2429,6 +2478,28 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         inputs=[video_id, df_string_output, transcript_kind],
         outputs=[df_string_output]
     )
     # 教師版
     worksheet_content_btn.click(

     # 基于逐字稿生成其他所需的输出
     source = "gcs"
     questions = get_questions(video_id, formatted_simple_transcript, source)
     summary_json = get_video_id_summary(video_id, formatted_simple_transcript, source)
     summary = summary_json["summary"]
     key_moments_json = get_key_moments(video_id, formatted_simple_transcript, formatted_transcript, source)
         subject, \
         grade
+def create_formatted_simple_transcript(transcript):
+    formatted_simple_transcript = []
+    for entry in transcript:
+        start_time = format_seconds_to_time(entry['start'])
+        end_time = format_seconds_to_time(entry['start'] + entry['duration'])
+        line = {
+            "start_time": start_time,
+            "end_time": end_time,
+            "text": entry['text']
+        }
+        formatted_simple_transcript.append(line)
+    return formatted_simple_transcript
+def create_formatted_transcript(transcript):
+    formatted_transcript = []
+    for entry in transcript:
+        start_time = format_seconds_to_time(entry['start'])
+        end_time = format_seconds_to_time(entry['start'] + entry['duration'])
+        embed_url = get_embedded_youtube_link(VIDEO_ID, entry['start'])
+        img_file_id = entry['img_file_id']
+        screenshot_path = img_file_id
+        line = {
+            "start_time": start_time,
+            "end_time": end_time,
+            "text": entry['text'],
+            "embed_url": embed_url,
+            "screenshot_path": screenshot_path
+        }
+        formatted_transcript.append(line)
+    return formatted_transcript
 def format_transcript_to_html(formatted_transcript):
     html_content = ""
     for entry in formatted_transcript:
         mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
         updated_content = mind_map_text
+    elif kind == "key_moments":
+        # from update_LLM_btn  -> new_content is a string
+        # create_LLM_content -> new_content is a list
+        if isinstance(new_content, str):
+            key_moments_list = json.loads(new_content)
+        else:
+            key_moments_list = new_content
+        key_moments_json = {"key_moments": key_moments_list}
+        key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
+        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
+        updated_content = key_moments_text
     elif kind == "transcript":
         if isinstance(new_content, str):
             transcript_json = json.loads(new_content)
     elif kind == "mind_map":
         content = generate_mind_map(df_string)
         update_LLM_content(video_id, content, kind)
+    elif kind == "key_moments":
+        if isinstance(df_string, str):
+            transcript = json.loads(df_string)
+        else:
+            transcript = df_string
+        formatted_simple_transcript = create_formatted_simple_transcript(transcript)
+        formatted_transcript = create_formatted_transcript(transcript)
+        content = generate_key_moments(formatted_simple_transcript, formatted_transcript)
+        update_LLM_content(video_id, content, kind)
+        content = json.dumps(content, ensure_ascii=False, indent=2)
     elif kind == "transcript":
         content = process_transcript_and_screenshots_on_gcs(video_id)
         update_LLM_content(video_id, content, kind)
                     ai_chatbot_audio_input = gr.Audio(sources=["microphone"], type="filepath")
                 with gr.Row():
                     ai_msg = gr.Textbox(label="Message",scale=3)
+                    ai_send_button = gr.Button("Send", variant="primary",scale=1)
     with gr.Tab("文章模式"):
         with gr.Row() as reading_passage_admin:
             reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False)
     with gr.Tab("關鍵時刻"):
         with gr.Row():
             key_moments_html = gr.HTML(value="")
     with gr.Tab("教學備課"):
         with gr.Row():
             content_subject = gr.Dropdown(label="選擇主題", choices=["數學", "自然", "國文", "英文", "社會","物理", "化學", "生物", "地理", "歷史", "公民"], value="", visible=False)
         inputs=[video_id, df_string_output, transcript_kind],
         outputs=[df_string_output]
     )
+    # key_moments event
+    key_moments_create_button.click(
+        create_LLM_content,
+        inputs=[video_id, df_string_output, key_moments_kind],
+        outputs=[key_moments]
+    )
+    key_moments_delete_button.click(
+        delete_LLM_content,
+        inputs=[video_id, key_moments_kind],
+        outputs=[key_moments]
+    )
+    key_moments_edit_button.click(
+        enable_edit_mode,
+        inputs=[],
+        outputs=[key_moments]
+    )
+    key_moments_update_button.click(
+        update_LLM_content,
+        inputs=[video_id, key_moments, key_moments_kind],
+        outputs=[key_moments]
+    )
     # 教師版
     worksheet_content_btn.click(