Spaces:
Sleeping
Sleeping
def summary_add_markdown_version(video_id):
Browse files
app.py
CHANGED
@@ -1549,6 +1549,132 @@ def create_LLM_content(video_id, df_string, kind):
|
|
1549 |
|
1550 |
return gr.update(value=content, interactive=False)
|
1551 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1552 |
# AI 生成教學素材
|
1553 |
def get_meta_data(video_id, source="gcs"):
|
1554 |
if source == "gcs":
|
@@ -2440,22 +2566,30 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2440 |
df_string_output = gr.Textbox(lines=40, label="Data Text", interactive=False, show_copy_button=True)
|
2441 |
with gr.Tab("文章本文"):
|
2442 |
with gr.Row() as reading_passage_admin:
|
2443 |
-
|
2444 |
-
|
2445 |
-
|
2446 |
-
|
2447 |
-
|
2448 |
-
|
|
|
|
|
|
|
|
|
2449 |
with gr.Row():
|
2450 |
reading_passage_text = gr.Textbox(label="reading_passage", lines=40, interactive=False, show_copy_button=True)
|
2451 |
with gr.Tab("重點摘要本文"):
|
2452 |
with gr.Row() as summary_admmin:
|
2453 |
-
|
2454 |
-
|
2455 |
-
|
2456 |
-
|
2457 |
-
|
2458 |
-
|
|
|
|
|
|
|
|
|
2459 |
with gr.Row():
|
2460 |
summary_text = gr.Textbox(label="Summary", lines=40, interactive=False, show_copy_button=True)
|
2461 |
with gr.Tab("關鍵時刻本文"):
|
@@ -2656,6 +2790,11 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2656 |
# web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
2657 |
|
2658 |
# reading_passage event
|
|
|
|
|
|
|
|
|
|
|
2659 |
reading_passage_get_button.click(
|
2660 |
get_LLM_content,
|
2661 |
inputs=[video_id, reading_passage_kind],
|
@@ -2683,6 +2822,11 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
2683 |
)
|
2684 |
|
2685 |
# summary event
|
|
|
|
|
|
|
|
|
|
|
2686 |
summary_get_button.click(
|
2687 |
get_LLM_content,
|
2688 |
inputs=[video_id, summary_kind],
|
|
|
1549 |
|
1550 |
return gr.update(value=content, interactive=False)
|
1551 |
|
1552 |
+
# ---- LLM refresh CRUD ----
|
1553 |
+
def reading_passage_add_latex_version(video_id):
|
1554 |
+
# 確認 GCS 是否有 reading_passage.json
|
1555 |
+
print("===reading_passage_convert_to_latex===")
|
1556 |
+
gcs_client = GCS_CLIENT
|
1557 |
+
bucket_name = 'video_ai_assistant'
|
1558 |
+
file_name = f'{video_id}_reading_passage.json'
|
1559 |
+
blob_name = f"{video_id}/{file_name}"
|
1560 |
+
print(f"blob_name: {blob_name}")
|
1561 |
+
|
1562 |
+
# 检查檔案是否存在
|
1563 |
+
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
1564 |
+
if not is_file_exists:
|
1565 |
+
raise gr.Error("reading_passage 不存在!")
|
1566 |
+
|
1567 |
+
# 逐字稿已存在,下载逐字稿内容
|
1568 |
+
print("reading_passage 已存在于GCS中,轉換 Latex 模式")
|
1569 |
+
reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
1570 |
+
reading_passage_json = json.loads(reading_passage_text)
|
1571 |
+
original_reading_passage = reading_passage_json["reading_passage"]
|
1572 |
+
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
|
1573 |
+
user_content = f"""
|
1574 |
+
請根據 {original_reading_passage}
|
1575 |
+
敘述中,請把數學或是專業術語,用 Latex 包覆($...$),並且不要去改原本的文章
|
1576 |
+
加減乘除、根號、次方、化學符號、物理符號等等的運算式口語也換成 LATEX 符號
|
1577 |
+
請一定要使用繁體中文 zh-TW,並用台灣人的口語
|
1578 |
+
產生的結果不要前後文解釋,也不要敘述這篇文章怎麼產生的
|
1579 |
+
只需要專注提供 Reading Passage,字數在 200~500 字以內
|
1580 |
+
"""
|
1581 |
+
messages = [
|
1582 |
+
{"role": "system", "content": sys_content},
|
1583 |
+
{"role": "user", "content": user_content}
|
1584 |
+
]
|
1585 |
+
|
1586 |
+
request_payload = {
|
1587 |
+
"model": "gpt-4-turbo",
|
1588 |
+
"messages": messages,
|
1589 |
+
"max_tokens": 4000,
|
1590 |
+
}
|
1591 |
+
|
1592 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1593 |
+
new_reading_passage = response.choices[0].message.content.strip()
|
1594 |
+
print("=====new_reading_passage=====")
|
1595 |
+
print(new_reading_passage)
|
1596 |
+
print("=====new_reading_passage=====")
|
1597 |
+
reading_passage_json["reading_passage"] = new_reading_passage
|
1598 |
+
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
|
1599 |
+
# 另存為 reading_passage_latex.json
|
1600 |
+
new_file_name = f'{video_id}_reading_passage_latex.json'
|
1601 |
+
new_blob_name = f"{video_id}/{new_file_name}"
|
1602 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, reading_passage_text)
|
1603 |
+
|
1604 |
+
return new_reading_passage
|
1605 |
+
|
1606 |
+
def summary_add_markdown_version(video_id):
|
1607 |
+
# 確認 GCS 是否有 summary.json
|
1608 |
+
print("===summary_convert_to_markdown===")
|
1609 |
+
gcs_client = GCS_CLIENT
|
1610 |
+
bucket_name = 'video_ai_assistant'
|
1611 |
+
file_name = f'{video_id}_summary.json'
|
1612 |
+
blob_name = f"{video_id}/{file_name}"
|
1613 |
+
print(f"blob_name: {blob_name}")
|
1614 |
+
|
1615 |
+
# 检查檔案是否存在
|
1616 |
+
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
1617 |
+
if not is_file_exists:
|
1618 |
+
raise gr.Error("summary 不存在!")
|
1619 |
+
|
1620 |
+
# 逐字稿已存在,下载逐字稿内容
|
1621 |
+
print("summary 已存在于GCS中,轉換 Markdown 模式")
|
1622 |
+
summary_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
1623 |
+
summary_json = json.loads(summary_text)
|
1624 |
+
original_summary = summary_json["summary"]
|
1625 |
+
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
|
1626 |
+
user_content = f"""
|
1627 |
+
請根據 {original_summary}
|
1628 |
+
轉換格式為 Markdown
|
1629 |
+
只保留:📚 整體摘要、🔖 重點概念、💡 為什麼我們要學這個、❓ 延伸小問題
|
1630 |
+
其他的不要保留
|
1631 |
+
整體摘要在一百字以內
|
1632 |
+
重點概念轉成 bullet points
|
1633 |
+
以及可能的結論與結尾延伸小問題提供學生作反思
|
1634 |
+
敘述中,請把數學或是專業術語,用 Latex 包覆($...$)
|
1635 |
+
加減乘除、根號、次方等等的運算式口語也換成 LATEX 數學符號
|
1636 |
+
|
1637 |
+
整體格式為:
|
1638 |
+
## 📚 整體摘要
|
1639 |
+
- (一個 bullet point....)
|
1640 |
+
|
1641 |
+
## 🔖 重點概念
|
1642 |
+
- xxx
|
1643 |
+
- xxx
|
1644 |
+
- xxx
|
1645 |
+
|
1646 |
+
## 💡 為什麼我們要學這個?
|
1647 |
+
- (一個 bullet point....)
|
1648 |
+
|
1649 |
+
## ❓ 延伸小問題
|
1650 |
+
- (一個 bullet point....)
|
1651 |
+
"""
|
1652 |
+
messages = [
|
1653 |
+
{"role": "system", "content": sys_content},
|
1654 |
+
{"role": "user", "content": user_content}
|
1655 |
+
]
|
1656 |
+
request_payload = {
|
1657 |
+
"model": "gpt-4-turbo",
|
1658 |
+
"messages": messages,
|
1659 |
+
"max_tokens": 4000,
|
1660 |
+
}
|
1661 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
1662 |
+
new_summary = response.choices[0].message.content.strip()
|
1663 |
+
print("=====new_summary=====")
|
1664 |
+
print(new_summary)
|
1665 |
+
print("=====new_summary=====")
|
1666 |
+
summary_json["summary"] = new_summary
|
1667 |
+
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
1668 |
+
# 另存為 summary_markdown.json
|
1669 |
+
new_file_name = f'{video_id}_summary_markdown.json'
|
1670 |
+
new_blob_name = f"{video_id}/{new_file_name}"
|
1671 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, summary_text)
|
1672 |
+
|
1673 |
+
return new_summary
|
1674 |
+
|
1675 |
+
|
1676 |
+
|
1677 |
+
|
1678 |
# AI 生成教學素材
|
1679 |
def get_meta_data(video_id, source="gcs"):
|
1680 |
if source == "gcs":
|
|
|
2566 |
df_string_output = gr.Textbox(lines=40, label="Data Text", interactive=False, show_copy_button=True)
|
2567 |
with gr.Tab("文章本文"):
|
2568 |
with gr.Row() as reading_passage_admin:
|
2569 |
+
with gr.Column():
|
2570 |
+
with gr.Row():
|
2571 |
+
reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False)
|
2572 |
+
with gr.Row():
|
2573 |
+
reading_passage_text_to_latex = gr.Button("新增 LaTeX", size="sm", variant="primary")
|
2574 |
+
reading_passage_get_button = gr.Button("取得", size="sm", variant="primary")
|
2575 |
+
reading_passage_edit_button = gr.Button("編輯", size="sm", variant="primary")
|
2576 |
+
reading_passage_update_button = gr.Button("更新", size="sm", variant="primary")
|
2577 |
+
reading_passage_delete_button = gr.Button("刪除", size="sm", variant="primary")
|
2578 |
+
reading_passage_create_button = gr.Button("建立", size="sm", variant="primary")
|
2579 |
with gr.Row():
|
2580 |
reading_passage_text = gr.Textbox(label="reading_passage", lines=40, interactive=False, show_copy_button=True)
|
2581 |
with gr.Tab("重點摘要本文"):
|
2582 |
with gr.Row() as summary_admmin:
|
2583 |
+
with gr.Column():
|
2584 |
+
with gr.Row():
|
2585 |
+
summary_kind = gr.Textbox(value="summary", show_label=False)
|
2586 |
+
with gr.Row():
|
2587 |
+
summary_to_markdown = gr.Button("新增 Markdown", size="sm", variant="primary")
|
2588 |
+
summary_get_button = gr.Button("取得", size="sm", variant="primary")
|
2589 |
+
summary_edit_button = gr.Button("編輯", size="sm", variant="primary")
|
2590 |
+
summary_update_button = gr.Button("更新", size="sm", variant="primary")
|
2591 |
+
summary_delete_button = gr.Button("刪除", size="sm", variant="primary")
|
2592 |
+
summary_create_button = gr.Button("建立", size="sm", variant="primary")
|
2593 |
with gr.Row():
|
2594 |
summary_text = gr.Textbox(label="Summary", lines=40, interactive=False, show_copy_button=True)
|
2595 |
with gr.Tab("關鍵時刻本文"):
|
|
|
2790 |
# web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
2791 |
|
2792 |
# reading_passage event
|
2793 |
+
reading_passage_text_to_latex.click(
|
2794 |
+
reading_passage_add_latex_version,
|
2795 |
+
inputs=[video_id],
|
2796 |
+
outputs=[reading_passage_text]
|
2797 |
+
)
|
2798 |
reading_passage_get_button.click(
|
2799 |
get_LLM_content,
|
2800 |
inputs=[video_id, reading_passage_kind],
|
|
|
2822 |
)
|
2823 |
|
2824 |
# summary event
|
2825 |
+
summary_to_markdown.click(
|
2826 |
+
summary_add_markdown_version,
|
2827 |
+
inputs=[video_id],
|
2828 |
+
outputs=[summary_text]
|
2829 |
+
)
|
2830 |
summary_get_button.click(
|
2831 |
get_LLM_content,
|
2832 |
inputs=[video_id, summary_kind],
|