Spaces:
Running
Running
Siyuan0730
commited on
Commit
•
9abc2ca
1
Parent(s):
ae8b477
更新测试好的代码(openai接口更新)
Browse files- app.py +46 -34
- requirements.txt +1 -1
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
import faiss
|
4 |
-
import
|
5 |
import tempfile
|
6 |
from PyPDF2 import PdfReader
|
7 |
import io
|
@@ -16,8 +16,6 @@ from collections import Counter
|
|
16 |
#import jieba.analyse
|
17 |
import nltk
|
18 |
|
19 |
-
|
20 |
-
|
21 |
@st.cache_data
|
22 |
def download_nltk():
|
23 |
nltk.download('punkt')
|
@@ -65,15 +63,16 @@ def get_keywords(file_paths): #这里的重点是,对每一个file做尽可能
|
|
65 |
return keywords_list
|
66 |
|
67 |
|
68 |
-
def get_completion_from_messages(messages, model="gpt-4", temperature=0):
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
75 |
|
76 |
-
def genarating_outline(keywords, num_lessons,language):
|
77 |
system_message = 'You are a great AI teacher and linguist, skilled at create course outline based on summarized knowledge materials.'
|
78 |
user_message = f"""You are a great AI teacher and linguist,
|
79 |
skilled at generating course outline based on keywords of the course.
|
@@ -96,7 +95,7 @@ def genarating_outline(keywords, num_lessons,language):
|
|
96 |
'content': user_message},
|
97 |
]
|
98 |
|
99 |
-
response = get_completion_from_messages(messages)
|
100 |
|
101 |
list_response = ['nothing in the answers..']
|
102 |
|
@@ -107,9 +106,9 @@ def genarating_outline(keywords, num_lessons,language):
|
|
107 |
|
108 |
return list_response
|
109 |
|
110 |
-
def courseOutlineGenerating(file_paths, num_lessons, language):
|
111 |
summarized_materials = get_keywords(file_paths)
|
112 |
-
course_outline = genarating_outline(summarized_materials, num_lessons, language)
|
113 |
return course_outline
|
114 |
|
115 |
def constructVDB(file_paths):
|
@@ -171,7 +170,7 @@ def searchVDB(search_sentence, paraphrase_embeddings_df, index):
|
|
171 |
|
172 |
return retrieved_chunks_list
|
173 |
|
174 |
-
def generateCourse(topic, materials, language, style_options):
|
175 |
system_message = 'You are a great AI teacher and linguist, skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.'
|
176 |
|
177 |
user_message = f"""You are a great AI teacher and linguist,
|
@@ -199,7 +198,7 @@ def generateCourse(topic, materials, language, style_options):
|
|
199 |
'content': user_message},
|
200 |
]
|
201 |
|
202 |
-
response = get_completion_from_messages(messages)
|
203 |
return response
|
204 |
|
205 |
def decorate_user_question(user_question, retrieved_chunks_for_user):
|
@@ -236,9 +235,9 @@ def initialize_vdb(temp_file_paths):
|
|
236 |
st.success("Constructing vector database from provided materials...Done")
|
237 |
return embeddings_df, faiss_index
|
238 |
|
239 |
-
def initialize_outline(temp_file_paths, num_lessons, language):
|
240 |
with st.spinner('Generating Course Outline...'):
|
241 |
-
course_outline_list = courseOutlineGenerating(temp_file_paths, num_lessons, language)
|
242 |
st.success("Generating Course Outline...Done")
|
243 |
course_outline_string = ''
|
244 |
lessons_count = 0
|
@@ -251,14 +250,14 @@ def initialize_outline(temp_file_paths, num_lessons, language):
|
|
251 |
|
252 |
return course_outline_list
|
253 |
|
254 |
-
def initialize_content(course_outline_list, embeddings_df, faiss_index, language, style_options):
|
255 |
count_generating_content = 0
|
256 |
course_content_list = []
|
257 |
for lesson in course_outline_list:
|
258 |
count_generating_content += 1
|
259 |
with st.spinner(f"Writing content for lesson {count_generating_content}..."):
|
260 |
retrievedChunksList = searchVDB(lesson, embeddings_df, faiss_index)
|
261 |
-
courseContent = generateCourse(lesson, retrievedChunksList, language, style_options)
|
262 |
course_content_list.append(courseContent)
|
263 |
st.success(f"Writing content for lesson {count_generating_content}...Done")
|
264 |
with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
|
@@ -364,9 +363,12 @@ def app():
|
|
364 |
|
365 |
if "OPENAI_API_KEY" not in st.session_state:
|
366 |
st.session_state["OPENAI_API_KEY"] = ''
|
|
|
|
|
367 |
if "openai_model" not in st.session_state:
|
368 |
-
st.session_state["openai_model"] = "gpt-
|
369 |
-
|
|
|
370 |
if "messages" not in st.session_state:
|
371 |
st.session_state.messages = []
|
372 |
|
@@ -425,7 +427,6 @@ def app():
|
|
425 |
|
426 |
''', unsafe_allow_html=True)
|
427 |
|
428 |
-
|
429 |
if btn:
|
430 |
if api_key != "sk-..." and api_key !="" and api_key.startswith("sk-"):
|
431 |
st.session_state.start_col1.empty()
|
@@ -438,10 +439,10 @@ def app():
|
|
438 |
#initialize app
|
439 |
temp_file_paths = initialize_file(added_files)
|
440 |
st.session_state["OPENAI_API_KEY"] = api_key
|
441 |
-
|
442 |
st.session_state.embeddings_df, st.session_state.faiss_index = initialize_vdb(temp_file_paths)
|
443 |
-
st.session_state.course_outline_list = initialize_outline(temp_file_paths, num_lessons, language)
|
444 |
-
st.session_state.course_content_list = initialize_content(st.session_state.course_outline_list, st.session_state.embeddings_df, st.session_state.faiss_index, language, style_options)
|
445 |
|
446 |
st.markdown('''
|
447 |
> 🤔 <font color = 'grey'> **Not satisfied with this course?** Simply click "Generate my course!" button to regenerate a new one! </font>
|
@@ -457,7 +458,7 @@ def app():
|
|
457 |
st.session_state.case_pay.empty()
|
458 |
announce.empty()
|
459 |
divider.empty()
|
460 |
-
warning = st.write("请输入正确的API Key令牌")
|
461 |
|
462 |
|
463 |
col1, col2 = st.columns([0.6,0.4])
|
@@ -485,10 +486,12 @@ def app():
|
|
485 |
st.write("Hello👋, how can I help you today? 😄")
|
486 |
|
487 |
# Display chat messages from history on app rerun
|
488 |
-
for message in st.session_state.
|
489 |
with st.chat_message(message["role"]):
|
490 |
-
st.markdown(message["content"]
|
491 |
|
|
|
|
|
492 |
# Display new user question.
|
493 |
with st.chat_message("user"):
|
494 |
st.markdown(user_question)
|
@@ -496,21 +499,30 @@ def app():
|
|
496 |
#这里的session.state就是保存了这个对话会话的一些基本信息和设置
|
497 |
retrieved_chunks_for_user = searchVDB(user_question, st.session_state.embeddings_df, st.session_state.faiss_index)
|
498 |
prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
|
499 |
-
st.session_state.messages.append({"role": "user", "content":
|
500 |
|
501 |
# Display assistant response in chat message container
|
502 |
with st.chat_message("assistant"):
|
503 |
message_placeholder = st.empty()
|
504 |
full_response = ""
|
505 |
-
|
|
|
506 |
model=st.session_state["openai_model"],
|
507 |
-
messages=[
|
|
|
|
|
|
|
508 |
stream=True,
|
509 |
):
|
510 |
-
|
|
|
|
|
|
|
511 |
message_placeholder.markdown(full_response + "▌")
|
512 |
message_placeholder.markdown(full_response)
|
513 |
-
st.session_state.messages.append({"role": "assistant", "content":
|
|
|
|
|
514 |
|
515 |
|
516 |
if __name__ == "__main__":
|
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
import faiss
|
4 |
+
from openai import OpenAI
|
5 |
import tempfile
|
6 |
from PyPDF2 import PdfReader
|
7 |
import io
|
|
|
16 |
#import jieba.analyse
|
17 |
import nltk
|
18 |
|
|
|
|
|
19 |
@st.cache_data
|
20 |
def download_nltk():
|
21 |
nltk.download('punkt')
|
|
|
63 |
return keywords_list
|
64 |
|
65 |
|
66 |
+
def get_completion_from_messages(client, messages, model="gpt-4-1106-preview", temperature=0):
|
67 |
+
client = client
|
68 |
+
completion = client.chat.completions.create(
|
69 |
+
model=model,
|
70 |
+
messages=messages,
|
71 |
+
temperature=temperature,
|
72 |
+
)
|
73 |
+
return completion.choices[0].message.content
|
74 |
|
75 |
+
def genarating_outline(client, keywords, num_lessons,language):
|
76 |
system_message = 'You are a great AI teacher and linguist, skilled at create course outline based on summarized knowledge materials.'
|
77 |
user_message = f"""You are a great AI teacher and linguist,
|
78 |
skilled at generating course outline based on keywords of the course.
|
|
|
95 |
'content': user_message},
|
96 |
]
|
97 |
|
98 |
+
response = get_completion_from_messages(client, messages)
|
99 |
|
100 |
list_response = ['nothing in the answers..']
|
101 |
|
|
|
106 |
|
107 |
return list_response
|
108 |
|
109 |
+
def courseOutlineGenerating(client, file_paths, num_lessons, language):
|
110 |
summarized_materials = get_keywords(file_paths)
|
111 |
+
course_outline = genarating_outline(client, summarized_materials, num_lessons, language)
|
112 |
return course_outline
|
113 |
|
114 |
def constructVDB(file_paths):
|
|
|
170 |
|
171 |
return retrieved_chunks_list
|
172 |
|
173 |
+
def generateCourse(client, topic, materials, language, style_options):
|
174 |
system_message = 'You are a great AI teacher and linguist, skilled at writing informative and easy-to-understand course script based on given lesson topic and knowledge materials.'
|
175 |
|
176 |
user_message = f"""You are a great AI teacher and linguist,
|
|
|
198 |
'content': user_message},
|
199 |
]
|
200 |
|
201 |
+
response = get_completion_from_messages(client, messages)
|
202 |
return response
|
203 |
|
204 |
def decorate_user_question(user_question, retrieved_chunks_for_user):
|
|
|
235 |
st.success("Constructing vector database from provided materials...Done")
|
236 |
return embeddings_df, faiss_index
|
237 |
|
238 |
+
def initialize_outline(client, temp_file_paths, num_lessons, language):
|
239 |
with st.spinner('Generating Course Outline...'):
|
240 |
+
course_outline_list = courseOutlineGenerating(client, temp_file_paths, num_lessons, language)
|
241 |
st.success("Generating Course Outline...Done")
|
242 |
course_outline_string = ''
|
243 |
lessons_count = 0
|
|
|
250 |
|
251 |
return course_outline_list
|
252 |
|
253 |
+
def initialize_content(client, course_outline_list, embeddings_df, faiss_index, language, style_options):
|
254 |
count_generating_content = 0
|
255 |
course_content_list = []
|
256 |
for lesson in course_outline_list:
|
257 |
count_generating_content += 1
|
258 |
with st.spinner(f"Writing content for lesson {count_generating_content}..."):
|
259 |
retrievedChunksList = searchVDB(lesson, embeddings_df, faiss_index)
|
260 |
+
courseContent = generateCourse(client, lesson, retrievedChunksList, language, style_options)
|
261 |
course_content_list.append(courseContent)
|
262 |
st.success(f"Writing content for lesson {count_generating_content}...Done")
|
263 |
with st.expander(f"Learn the lesson {count_generating_content} ", expanded=False):
|
|
|
363 |
|
364 |
if "OPENAI_API_KEY" not in st.session_state:
|
365 |
st.session_state["OPENAI_API_KEY"] = ''
|
366 |
+
#if "client" not in st.session_state:
|
367 |
+
# st.session_state["client"] = ''
|
368 |
if "openai_model" not in st.session_state:
|
369 |
+
st.session_state["openai_model"] = "gpt-4-1106-preview"
|
370 |
+
if "messages_ui" not in st.session_state:
|
371 |
+
st.session_state.messages_ui = []
|
372 |
if "messages" not in st.session_state:
|
373 |
st.session_state.messages = []
|
374 |
|
|
|
427 |
|
428 |
''', unsafe_allow_html=True)
|
429 |
|
|
|
430 |
if btn:
|
431 |
if api_key != "sk-..." and api_key !="" and api_key.startswith("sk-"):
|
432 |
st.session_state.start_col1.empty()
|
|
|
439 |
#initialize app
|
440 |
temp_file_paths = initialize_file(added_files)
|
441 |
st.session_state["OPENAI_API_KEY"] = api_key
|
442 |
+
client = OpenAI(api_key = st.session_state["OPENAI_API_KEY"])
|
443 |
st.session_state.embeddings_df, st.session_state.faiss_index = initialize_vdb(temp_file_paths)
|
444 |
+
st.session_state.course_outline_list = initialize_outline(client, temp_file_paths, num_lessons, language)
|
445 |
+
st.session_state.course_content_list = initialize_content(client, st.session_state.course_outline_list, st.session_state.embeddings_df, st.session_state.faiss_index, language, style_options)
|
446 |
|
447 |
st.markdown('''
|
448 |
> 🤔 <font color = 'grey'> **Not satisfied with this course?** Simply click "Generate my course!" button to regenerate a new one! </font>
|
|
|
458 |
st.session_state.case_pay.empty()
|
459 |
announce.empty()
|
460 |
divider.empty()
|
461 |
+
warning = st.write("请输入正确的OpenAI API Key令牌")
|
462 |
|
463 |
|
464 |
col1, col2 = st.columns([0.6,0.4])
|
|
|
486 |
st.write("Hello👋, how can I help you today? 😄")
|
487 |
|
488 |
# Display chat messages from history on app rerun
|
489 |
+
for message in st.session_state.messages_ui:
|
490 |
with st.chat_message(message["role"]):
|
491 |
+
st.markdown(message["content"])
|
492 |
|
493 |
+
#更新ui上显示的聊天记录
|
494 |
+
st.session_state.messages_ui.append({"role": "user", "content": user_question})
|
495 |
# Display new user question.
|
496 |
with st.chat_message("user"):
|
497 |
st.markdown(user_question)
|
|
|
499 |
#这里的session.state就是保存了这个对话会话的一些基本信息和设置
|
500 |
retrieved_chunks_for_user = searchVDB(user_question, st.session_state.embeddings_df, st.session_state.faiss_index)
|
501 |
prompt = decorate_user_question(user_question, retrieved_chunks_for_user)
|
502 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
503 |
|
504 |
# Display assistant response in chat message container
|
505 |
with st.chat_message("assistant"):
|
506 |
message_placeholder = st.empty()
|
507 |
full_response = ""
|
508 |
+
client = OpenAI(api_key = st.session_state["OPENAI_API_KEY"])
|
509 |
+
for response in client.chat.completions.create(
|
510 |
model=st.session_state["openai_model"],
|
511 |
+
messages=[
|
512 |
+
{"role": m["role"], "content": m["content"]}
|
513 |
+
for m in st.session_state.messages #用chatbot那边的隐藏消息记录
|
514 |
+
],
|
515 |
stream=True,
|
516 |
):
|
517 |
+
try:
|
518 |
+
full_response += response.choices[0].delta.content
|
519 |
+
except:
|
520 |
+
full_response += ""
|
521 |
message_placeholder.markdown(full_response + "▌")
|
522 |
message_placeholder.markdown(full_response)
|
523 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
524 |
+
st.session_state.messages_ui.append({"role": "assistant", "content": full_response})
|
525 |
+
|
526 |
|
527 |
|
528 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
faiss_cpu==1.7.3
|
2 |
nltk==3.8.1
|
3 |
numpy==1.25.0
|
4 |
-
openai==
|
5 |
pandas==2.0.2
|
6 |
PyPDF2==3.0.1
|
7 |
sentence_transformers==2.2.2
|
|
|
1 |
faiss_cpu==1.7.3
|
2 |
nltk==3.8.1
|
3 |
numpy==1.25.0
|
4 |
+
openai==1.6.1
|
5 |
pandas==2.0.2
|
6 |
PyPDF2==3.0.1
|
7 |
sentence_transformers==2.2.2
|