Spaces:
Running
Running
def get_questions(video_id, df_string):
Browse files
app.py
CHANGED
@@ -51,6 +51,7 @@ from urllib.parse import urlparse, parse_qs
|
|
51 |
OUTPUT_PATH = 'videos'
|
52 |
TRANSCRIPTS = []
|
53 |
CURRENT_INDEX = 0
|
|
|
54 |
|
55 |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
|
56 |
client = OpenAI(api_key=OPEN_AI_KEY)
|
@@ -295,6 +296,10 @@ def process_youtube_link(link):
|
|
295 |
# 使用 YouTube API 获取逐字稿
|
296 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
297 |
video_id = extract_youtube_id(link)
|
|
|
|
|
|
|
|
|
298 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
299 |
|
300 |
try:
|
@@ -335,8 +340,7 @@ def process_youtube_link(link):
|
|
335 |
TRANSCRIPTS = formatted_transcript
|
336 |
|
337 |
# 基于逐字稿生成其他所需的输出
|
338 |
-
|
339 |
-
questions = ["", "", ""]
|
340 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
341 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
342 |
summary = summary_json["summary"]
|
@@ -566,8 +570,27 @@ def generate_questions(df_string):
|
|
566 |
|
567 |
return questions
|
568 |
|
569 |
-
def get_questions(df_string):
|
570 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
571 |
q1 = questions[0] if len(questions) > 0 else ""
|
572 |
q2 = questions[1] if len(questions) > 1 else ""
|
573 |
q3 = questions[2] if len(questions) > 2 else ""
|
@@ -724,7 +747,7 @@ with gr.Blocks() as demo:
|
|
724 |
btn_2.click(respond, inputs=[btn_2, df_string_output, chatbot], outputs=[msg, chatbot])
|
725 |
btn_3.click(respond, inputs=[btn_3, df_string_output, chatbot], outputs=[msg, chatbot])
|
726 |
|
727 |
-
btn_create_question.click(get_questions, inputs = [df_string_output], outputs = [btn_1, btn_2, btn_3])
|
728 |
|
729 |
# file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
|
730 |
file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
|
|
51 |
OUTPUT_PATH = 'videos'
|
52 |
TRANSCRIPTS = []
|
53 |
CURRENT_INDEX = 0
|
54 |
+
VIDEO_ID = ""
|
55 |
|
56 |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
|
57 |
client = OpenAI(api_key=OPEN_AI_KEY)
|
|
|
296 |
# 使用 YouTube API 获取逐字稿
|
297 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
298 |
video_id = extract_youtube_id(link)
|
299 |
+
global VIDEO_ID
|
300 |
+
VIDEO_ID = video_id
|
301 |
+
|
302 |
+
|
303 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
304 |
|
305 |
try:
|
|
|
340 |
TRANSCRIPTS = formatted_transcript
|
341 |
|
342 |
# 基于逐字稿生成其他所需的输出
|
343 |
+
questions = get_questions(video_id, formatted_simple_transcript)
|
|
|
344 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
345 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
346 |
summary = summary_json["summary"]
|
|
|
570 |
|
571 |
return questions
|
572 |
|
573 |
+
def get_questions(video_id, df_string):
|
574 |
+
# 去 g drive 確認是有有 video_id_questions.json
|
575 |
+
print("===get_questions===")
|
576 |
+
service = init_drive_service()
|
577 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
578 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
579 |
+
file_name = f'{video_id}_questions.json'
|
580 |
+
|
581 |
+
# 检查檔案是否存在
|
582 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
583 |
+
if not exists:
|
584 |
+
questions = generate_questions(df_string)
|
585 |
+
questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
|
586 |
+
upload_content_directly(service, file_name, folder_id, questions_text)
|
587 |
+
print("questions已上傳到Google Drive")
|
588 |
+
else:
|
589 |
+
# 逐字稿已存在,下载逐字稿内容
|
590 |
+
print("questions已存在于Google Drive中")
|
591 |
+
questions_text = download_file_as_string(service, file_id)
|
592 |
+
questions = json.loads(questions_text)
|
593 |
+
|
594 |
q1 = questions[0] if len(questions) > 0 else ""
|
595 |
q2 = questions[1] if len(questions) > 1 else ""
|
596 |
q3 = questions[2] if len(questions) > 2 else ""
|
|
|
747 |
btn_2.click(respond, inputs=[btn_2, df_string_output, chatbot], outputs=[msg, chatbot])
|
748 |
btn_3.click(respond, inputs=[btn_3, df_string_output, chatbot], outputs=[msg, chatbot])
|
749 |
|
750 |
+
btn_create_question.click(get_questions, inputs = [VIDEO_ID, df_string_output], outputs = [btn_1, btn_2, btn_3])
|
751 |
|
752 |
# file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
|
753 |
file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|