Spaces:
Runtime error
Runtime error
quincyqiang
commited on
Commit
·
65d97b1
1
Parent(s):
57f12d9
feature@添加问答模式选择
Browse files- README.md +4 -0
- app.py +63 -35
- assets/custom.css +3 -3
- clc/config.py +2 -2
- clc/langchain_application.py +30 -18
- clc/source_service.py +10 -7
- main.py +62 -34
README.md
CHANGED
@@ -8,6 +8,7 @@ colorTo: yellow
|
|
8 |
pinned: true
|
9 |
app_file: app.py
|
10 |
---
|
|
|
11 |
# Chinese-LangChain
|
12 |
|
13 |
> Chinese-LangChain:中文langchain项目,基于ChatGLM-6b+langchain实现本地化知识库检索与智能答案生成
|
@@ -55,6 +56,8 @@ python main.py
|
|
55 |
|
56 |
## 🚀 特性
|
57 |
|
|
|
|
|
58 |
- 📝 2023/04/19 发布45万Wikipedia的文本预处理语料以及FAISS索引向量
|
59 |
- 🐯 2023/04/19 引入ChuanhuChatGPT皮肤
|
60 |
- 📱 2023/04/19 增加web search功能,需要确保网络畅通!(感谢[@wanghao07456](https://github.com/wanghao07456),提供的idea)
|
@@ -87,6 +90,7 @@ python main.py
|
|
87 |
* [x] 支持加载不同知识库
|
88 |
* [x] 支持检索结果与LLM生成结果对比
|
89 |
* [ ] 支持检索生成结果与原始LLM生成结果对比
|
|
|
90 |
* [ ] 检索结果过滤与排序
|
91 |
* [x] 互联网检索结果接入
|
92 |
* [ ] 模型初始化有问题
|
|
|
8 |
pinned: true
|
9 |
app_file: app.py
|
10 |
---
|
11 |
+
|
12 |
# Chinese-LangChain
|
13 |
|
14 |
> Chinese-LangChain:中文langchain项目,基于ChatGLM-6b+langchain实现本地化知识库检索与智能答案生成
|
|
|
56 |
|
57 |
## 🚀 特性
|
58 |
|
59 |
+
- 📝 2023/04/20 支持模型问答与检索问答模式切换
|
60 |
+
- 📝 2023/04/20 感谢HF官方提供免费算力,添加HuggingFace Spaces在线体验[[🤗 DEMO](https://huggingface.co/spaces/ChallengeHub/Chinese-LangChain)
|
61 |
- 📝 2023/04/19 发布45万Wikipedia的文本预处理语料以及FAISS索引向量
|
62 |
- 🐯 2023/04/19 引入ChuanhuChatGPT皮肤
|
63 |
- 📱 2023/04/19 增加web search功能,需要确保网络畅通!(感谢[@wanghao07456](https://github.com/wanghao07456),提供的idea)
|
|
|
90 |
* [x] 支持加载不同知识库
|
91 |
* [x] 支持检索结果与LLM生成结果对比
|
92 |
* [ ] 支持检索生成结果与原始LLM生成结果对比
|
93 |
+
* [ ] 支持模型问答与检索问答
|
94 |
* [ ] 检索结果过滤与排序
|
95 |
* [x] 互联网检索结果接入
|
96 |
* [ ] 模型初始化有问题
|
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
import shutil
|
3 |
|
|
|
4 |
from app_modules.presets import *
|
5 |
from clc.langchain_application import LangChainApplication
|
6 |
|
@@ -8,15 +9,16 @@ from clc.langchain_application import LangChainApplication
|
|
8 |
# 修改成自己的配置!!!
|
9 |
class LangChainCFG:
|
10 |
llm_model_name = 'THUDM/chatglm-6b-int4-qe' # 本地模型文件 or huggingface远程仓库
|
11 |
-
embedding_model_name = 'GanymedeNil/text2vec-
|
12 |
vector_store_path = './cache'
|
13 |
docs_path = './docs'
|
14 |
kg_vector_stores = {
|
15 |
'中文维基百科': './cache/zh_wikipedia',
|
16 |
-
'
|
17 |
-
'
|
18 |
} # 可以替换成自己的知识库,如果没有需要设置为None
|
19 |
# kg_vector_stores=None
|
|
|
20 |
|
21 |
|
22 |
config = LangChainCFG()
|
@@ -61,6 +63,7 @@ def predict(input,
|
|
61 |
embedding_model,
|
62 |
top_k,
|
63 |
use_web,
|
|
|
64 |
history=None):
|
65 |
# print(large_language_model, embedding_model)
|
66 |
print(input)
|
@@ -71,24 +74,31 @@ def predict(input,
|
|
71 |
web_content = application.source_service.search_web(query=input)
|
72 |
else:
|
73 |
web_content = ''
|
74 |
-
resp = application.get_knowledge_based_answer(
|
75 |
-
query=input,
|
76 |
-
history_len=1,
|
77 |
-
temperature=0.1,
|
78 |
-
top_p=0.9,
|
79 |
-
top_k=top_k,
|
80 |
-
web_content=web_content,
|
81 |
-
chat_history=history
|
82 |
-
)
|
83 |
-
history.append((input, resp['result']))
|
84 |
search_text = ''
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
|
94 |
with open("assets/custom.css", "r", encoding="utf-8") as f:
|
@@ -121,28 +131,35 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
121 |
step=1,
|
122 |
label="检索top-k文档",
|
123 |
interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
kg_name = gr.Radio(['中文维基百科',
|
125 |
'大规模金融研报知识图谱',
|
126 |
'初始化知识库'
|
127 |
],
|
128 |
label="知识库",
|
129 |
-
value=
|
|
|
130 |
interactive=True)
|
131 |
-
set_kg_btn = gr.Button("
|
132 |
-
|
133 |
-
use_web = gr.Radio(["使用", "不使用"], label="web search",
|
134 |
-
info="是否使用网络搜索,使用时确保网络通常",
|
135 |
-
value="不使用"
|
136 |
-
)
|
137 |
|
138 |
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
|
139 |
visible=True,
|
140 |
file_types=['.txt', '.md', '.docx', '.pdf']
|
141 |
)
|
142 |
|
143 |
-
file.upload(upload_file,
|
144 |
-
inputs=file,
|
145 |
-
outputs=None)
|
146 |
with gr.Column(scale=4):
|
147 |
with gr.Row():
|
148 |
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
|
@@ -159,6 +176,10 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
159 |
with gr.Column(scale=2):
|
160 |
search = gr.Textbox(label='搜索结果')
|
161 |
|
|
|
|
|
|
|
|
|
162 |
set_kg_btn.click(
|
163 |
set_knowledge,
|
164 |
show_progress=True,
|
@@ -168,9 +189,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
168 |
# 发送按钮 提交
|
169 |
send.click(predict,
|
170 |
inputs=[
|
171 |
-
message,
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
174 |
state
|
175 |
],
|
176 |
outputs=[message, chatbot, state, search])
|
@@ -184,8 +208,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
184 |
# 输入框 回车
|
185 |
message.submit(predict,
|
186 |
inputs=[
|
187 |
-
message,
|
188 |
-
|
|
|
|
|
|
|
|
|
189 |
state
|
190 |
],
|
191 |
outputs=[message, chatbot, state, search])
|
|
|
1 |
import os
|
2 |
import shutil
|
3 |
|
4 |
+
from app_modules.overwrites import postprocess
|
5 |
from app_modules.presets import *
|
6 |
from clc.langchain_application import LangChainApplication
|
7 |
|
|
|
9 |
# 修改成自己的配置!!!
|
10 |
class LangChainCFG:
|
11 |
llm_model_name = 'THUDM/chatglm-6b-int4-qe' # 本地模型文件 or huggingface远程仓库
|
12 |
+
embedding_model_name = 'GanymedeNil/text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
|
13 |
vector_store_path = './cache'
|
14 |
docs_path = './docs'
|
15 |
kg_vector_stores = {
|
16 |
'中文维基百科': './cache/zh_wikipedia',
|
17 |
+
'大规模金融研报': './cache/financial_research_reports',
|
18 |
+
'初始化': './cache',
|
19 |
} # 可以替换成自己的知识库,如果没有需要设置为None
|
20 |
# kg_vector_stores=None
|
21 |
+
patterns = ['模型问答', '知识库问答'] #
|
22 |
|
23 |
|
24 |
config = LangChainCFG()
|
|
|
63 |
embedding_model,
|
64 |
top_k,
|
65 |
use_web,
|
66 |
+
use_pattern,
|
67 |
history=None):
|
68 |
# print(large_language_model, embedding_model)
|
69 |
print(input)
|
|
|
74 |
web_content = application.source_service.search_web(query=input)
|
75 |
else:
|
76 |
web_content = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
search_text = ''
|
78 |
+
if use_pattern == '模型问答':
|
79 |
+
result = application.get_llm_answer(query=input, web_content=web_content)
|
80 |
+
history.append((input, result))
|
81 |
+
search_text += web_content
|
82 |
+
return '', history, history, search_text
|
83 |
+
|
84 |
+
else:
|
85 |
+
resp = application.get_knowledge_based_answer(
|
86 |
+
query=input,
|
87 |
+
history_len=1,
|
88 |
+
temperature=0.1,
|
89 |
+
top_p=0.9,
|
90 |
+
top_k=top_k,
|
91 |
+
web_content=web_content,
|
92 |
+
chat_history=history
|
93 |
+
)
|
94 |
+
history.append((input, resp['result']))
|
95 |
+
for idx, source in enumerate(resp['source_documents'][:4]):
|
96 |
+
sep = f'----------【搜索结果{idx + 1}:】---------------\n'
|
97 |
+
search_text += f'{sep}\n{source.page_content}\n\n'
|
98 |
+
print(search_text)
|
99 |
+
search_text += "----------【网络检索内容】-----------\n"
|
100 |
+
search_text += web_content
|
101 |
+
return '', history, history, search_text
|
102 |
|
103 |
|
104 |
with open("assets/custom.css", "r", encoding="utf-8") as f:
|
|
|
131 |
step=1,
|
132 |
label="检索top-k文档",
|
133 |
interactive=True)
|
134 |
+
|
135 |
+
use_web = gr.Radio(["使用", "不使用"], label="web search",
|
136 |
+
info="是否使用网络搜索,使用时确保网络通常",
|
137 |
+
value="不使用"
|
138 |
+
)
|
139 |
+
use_pattern = gr.Radio(
|
140 |
+
[
|
141 |
+
'模型问答',
|
142 |
+
'知识库问答',
|
143 |
+
],
|
144 |
+
label="模式",
|
145 |
+
value='模型问答',
|
146 |
+
interactive=True)
|
147 |
+
|
148 |
kg_name = gr.Radio(['中文维基百科',
|
149 |
'大规模金融研报知识图谱',
|
150 |
'初始化知识库'
|
151 |
],
|
152 |
label="知识库",
|
153 |
+
value=None,
|
154 |
+
info="使用知识库问答,请加载知识库",
|
155 |
interactive=True)
|
156 |
+
set_kg_btn = gr.Button("加载知识库")
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
|
159 |
visible=True,
|
160 |
file_types=['.txt', '.md', '.docx', '.pdf']
|
161 |
)
|
162 |
|
|
|
|
|
|
|
163 |
with gr.Column(scale=4):
|
164 |
with gr.Row():
|
165 |
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
|
|
|
176 |
with gr.Column(scale=2):
|
177 |
search = gr.Textbox(label='搜索结果')
|
178 |
|
179 |
+
# ============= 触发动作=============
|
180 |
+
file.upload(upload_file,
|
181 |
+
inputs=file,
|
182 |
+
outputs=None)
|
183 |
set_kg_btn.click(
|
184 |
set_knowledge,
|
185 |
show_progress=True,
|
|
|
189 |
# 发送按钮 提交
|
190 |
send.click(predict,
|
191 |
inputs=[
|
192 |
+
message,
|
193 |
+
large_language_model,
|
194 |
+
embedding_model,
|
195 |
+
top_k,
|
196 |
+
use_web,
|
197 |
+
use_pattern,
|
198 |
state
|
199 |
],
|
200 |
outputs=[message, chatbot, state, search])
|
|
|
208 |
# 输入框 回车
|
209 |
message.submit(predict,
|
210 |
inputs=[
|
211 |
+
message,
|
212 |
+
large_language_model,
|
213 |
+
embedding_model,
|
214 |
+
top_k,
|
215 |
+
use_web,
|
216 |
+
use_pattern,
|
217 |
state
|
218 |
],
|
219 |
outputs=[message, chatbot, state, search])
|
assets/custom.css
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
:root {
|
2 |
-
--chatbot-color-light:
|
3 |
--chatbot-color-dark: #121111;
|
4 |
}
|
5 |
|
@@ -40,7 +40,7 @@ ol:not(.options), ul:not(.options) {
|
|
40 |
color: #000000 !important;
|
41 |
}
|
42 |
[data-testid = "bot"] {
|
43 |
-
background-color:
|
44 |
}
|
45 |
[data-testid = "user"] {
|
46 |
background-color: #95EC69 !important;
|
@@ -49,7 +49,7 @@ ol:not(.options), ul:not(.options) {
|
|
49 |
/* Dark mode */
|
50 |
.dark #chuanhu_chatbot {
|
51 |
background-color: var(--chatbot-color-dark) !important;
|
52 |
-
color:
|
53 |
}
|
54 |
.dark [data-testid = "bot"] {
|
55 |
background-color: #2C2C2C !important;
|
|
|
1 |
:root {
|
2 |
+
--chatbot-color-light: rgba(255, 255, 255, 0.08);
|
3 |
--chatbot-color-dark: #121111;
|
4 |
}
|
5 |
|
|
|
40 |
color: #000000 !important;
|
41 |
}
|
42 |
[data-testid = "bot"] {
|
43 |
+
background-color: rgba(255, 255, 255, 0.08) !important;
|
44 |
}
|
45 |
[data-testid = "user"] {
|
46 |
background-color: #95EC69 !important;
|
|
|
49 |
/* Dark mode */
|
50 |
.dark #chuanhu_chatbot {
|
51 |
background-color: var(--chatbot-color-dark) !important;
|
52 |
+
color: rgba(255, 255, 255, 0.08) !important;
|
53 |
}
|
54 |
.dark [data-testid = "bot"] {
|
55 |
background-color: #2C2C2C !important;
|
clc/config.py
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
|
13 |
|
14 |
class LangChainCFG:
|
15 |
-
llm_model_name = 'chatglm-6b' # 本地模型文件 or huggingface远程仓库
|
16 |
-
embedding_model_name = 'text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
|
17 |
vector_store_path = '.'
|
18 |
docs_path = './docs'
|
|
|
12 |
|
13 |
|
14 |
class LangChainCFG:
|
15 |
+
llm_model_name = 'THUDM/chatglm-6b-int4-qe' # 本地模型文件 or huggingface远程仓库
|
16 |
+
embedding_model_name = 'GanymedeNil/text2vec-large-chinese' # 检索模型文件 or huggingface远程仓库
|
17 |
vector_store_path = '.'
|
18 |
docs_path = './docs'
|
clc/langchain_application.py
CHANGED
@@ -9,10 +9,10 @@
|
|
9 |
@software: PyCharm
|
10 |
@description: coding..
|
11 |
"""
|
12 |
-
|
13 |
from langchain.chains import RetrievalQA
|
14 |
from langchain.prompts.prompt import PromptTemplate
|
15 |
|
|
|
16 |
from clc.gpt_service import ChatGLMService
|
17 |
from clc.source_service import SourceService
|
18 |
|
@@ -23,15 +23,16 @@ class LangChainApplication(object):
|
|
23 |
self.llm_service = ChatGLMService()
|
24 |
self.llm_service.load_model(model_name_or_path=self.config.llm_model_name)
|
25 |
self.source_service = SourceService(config)
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
35 |
|
36 |
def get_knowledge_based_answer(self, query,
|
37 |
history_len=5,
|
@@ -75,11 +76,22 @@ class LangChainApplication(object):
|
|
75 |
result = knowledge_chain({"query": query})
|
76 |
return result
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
@software: PyCharm
|
10 |
@description: coding..
|
11 |
"""
|
|
|
12 |
from langchain.chains import RetrievalQA
|
13 |
from langchain.prompts.prompt import PromptTemplate
|
14 |
|
15 |
+
from clc.config import LangChainCFG
|
16 |
from clc.gpt_service import ChatGLMService
|
17 |
from clc.source_service import SourceService
|
18 |
|
|
|
23 |
self.llm_service = ChatGLMService()
|
24 |
self.llm_service.load_model(model_name_or_path=self.config.llm_model_name)
|
25 |
self.source_service = SourceService(config)
|
26 |
+
|
27 |
+
# if self.config.kg_vector_stores is None:
|
28 |
+
# print("init a source vector store")
|
29 |
+
# self.source_service.init_source_vector()
|
30 |
+
# else:
|
31 |
+
# print("load zh_wikipedia source vector store ")
|
32 |
+
# try:
|
33 |
+
# self.source_service.load_vector_store(self.config.kg_vector_stores['初始化知识库'])
|
34 |
+
# except Exception as e:
|
35 |
+
# self.source_service.init_source_vector()
|
36 |
|
37 |
def get_knowledge_based_answer(self, query,
|
38 |
history_len=5,
|
|
|
76 |
result = knowledge_chain({"query": query})
|
77 |
return result
|
78 |
|
79 |
+
def get_llm_answer(self, query='', web_content=''):
|
80 |
+
if web_content:
|
81 |
+
prompt = f'基于网络检索内容:{web_content},回答以下问题{query}'
|
82 |
+
else:
|
83 |
+
prompt = query
|
84 |
+
result = self.llm_service._call(prompt)
|
85 |
+
return result
|
86 |
+
|
87 |
+
|
88 |
+
if __name__ == '__main__':
|
89 |
+
config = LangChainCFG()
|
90 |
+
application = LangChainApplication(config)
|
91 |
+
# result = application.get_knowledge_based_answer('马保国是谁')
|
92 |
+
# print(result)
|
93 |
+
# application.source_service.add_document('/home/searchgpt/yq/Knowledge-ChatGLM/docs/added/马保国.txt')
|
94 |
+
# result = application.get_knowledge_based_answer('马保国是谁')
|
95 |
+
# print(result)
|
96 |
+
result = application.get_llm_answer('马保国是谁')
|
97 |
+
print(result)
|
clc/source_service.py
CHANGED
@@ -13,7 +13,6 @@
|
|
13 |
import os
|
14 |
|
15 |
from duckduckgo_search import ddg
|
16 |
-
from duckduckgo_search.utils import SESSION
|
17 |
from langchain.document_loaders import UnstructuredFileLoader
|
18 |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
19 |
from langchain.vectorstores import FAISS
|
@@ -61,12 +60,16 @@ class SourceService(object):
|
|
61 |
# "http": f"socks5h://localhost:7890",
|
62 |
# "https": f"socks5h://localhost:7890"
|
63 |
# }
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
# if __name__ == '__main__':
|
71 |
# config = LangChainCFG()
|
72 |
# source_service = SourceService(config)
|
|
|
13 |
import os
|
14 |
|
15 |
from duckduckgo_search import ddg
|
|
|
16 |
from langchain.document_loaders import UnstructuredFileLoader
|
17 |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
18 |
from langchain.vectorstores import FAISS
|
|
|
60 |
# "http": f"socks5h://localhost:7890",
|
61 |
# "https": f"socks5h://localhost:7890"
|
62 |
# }
|
63 |
+
try:
|
64 |
+
results = ddg(query)
|
65 |
+
web_content = ''
|
66 |
+
if results:
|
67 |
+
for result in results:
|
68 |
+
web_content += result['body']
|
69 |
+
return web_content
|
70 |
+
except Exception as e:
|
71 |
+
print(f"网络检索异常:{query}")
|
72 |
+
return ''
|
73 |
# if __name__ == '__main__':
|
74 |
# config = LangChainCFG()
|
75 |
# source_service = SourceService(config)
|
main.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
import shutil
|
3 |
|
|
|
4 |
from app_modules.presets import *
|
5 |
from clc.langchain_application import LangChainApplication
|
6 |
|
@@ -13,10 +14,11 @@ class LangChainCFG:
|
|
13 |
docs_path = './docs'
|
14 |
kg_vector_stores = {
|
15 |
'中文维基百科': './cache/zh_wikipedia',
|
16 |
-
'
|
17 |
-
'
|
18 |
} # 可以替换成自己的知识库,如果没有需要设置为None
|
19 |
# kg_vector_stores=None
|
|
|
20 |
|
21 |
|
22 |
config = LangChainCFG()
|
@@ -61,6 +63,7 @@ def predict(input,
|
|
61 |
embedding_model,
|
62 |
top_k,
|
63 |
use_web,
|
|
|
64 |
history=None):
|
65 |
# print(large_language_model, embedding_model)
|
66 |
print(input)
|
@@ -71,24 +74,31 @@ def predict(input,
|
|
71 |
web_content = application.source_service.search_web(query=input)
|
72 |
else:
|
73 |
web_content = ''
|
74 |
-
resp = application.get_knowledge_based_answer(
|
75 |
-
query=input,
|
76 |
-
history_len=1,
|
77 |
-
temperature=0.1,
|
78 |
-
top_p=0.9,
|
79 |
-
top_k=top_k,
|
80 |
-
web_content=web_content,
|
81 |
-
chat_history=history
|
82 |
-
)
|
83 |
-
history.append((input, resp['result']))
|
84 |
search_text = ''
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
|
94 |
with open("assets/custom.css", "r", encoding="utf-8") as f:
|
@@ -121,28 +131,35 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
121 |
step=1,
|
122 |
label="检索top-k文档",
|
123 |
interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
kg_name = gr.Radio(['中文维基百科',
|
125 |
'大规模金融研报知识图谱',
|
126 |
'初始化知识库'
|
127 |
],
|
128 |
label="知识库",
|
129 |
-
value=
|
|
|
130 |
interactive=True)
|
131 |
-
set_kg_btn = gr.Button("
|
132 |
-
|
133 |
-
use_web = gr.Radio(["使用", "不使用"], label="web search",
|
134 |
-
info="是否使用网络搜索,使用时确保网络通常",
|
135 |
-
value="不使用"
|
136 |
-
)
|
137 |
|
138 |
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
|
139 |
visible=True,
|
140 |
file_types=['.txt', '.md', '.docx', '.pdf']
|
141 |
)
|
142 |
|
143 |
-
file.upload(upload_file,
|
144 |
-
inputs=file,
|
145 |
-
outputs=None)
|
146 |
with gr.Column(scale=4):
|
147 |
with gr.Row():
|
148 |
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
|
@@ -159,6 +176,10 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
159 |
with gr.Column(scale=2):
|
160 |
search = gr.Textbox(label='搜索结果')
|
161 |
|
|
|
|
|
|
|
|
|
162 |
set_kg_btn.click(
|
163 |
set_knowledge,
|
164 |
show_progress=True,
|
@@ -168,9 +189,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
168 |
# 发送按钮 提交
|
169 |
send.click(predict,
|
170 |
inputs=[
|
171 |
-
message,
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
174 |
state
|
175 |
],
|
176 |
outputs=[message, chatbot, state, search])
|
@@ -184,8 +208,12 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
|
|
184 |
# 输入框 回车
|
185 |
message.submit(predict,
|
186 |
inputs=[
|
187 |
-
message,
|
188 |
-
|
|
|
|
|
|
|
|
|
189 |
state
|
190 |
],
|
191 |
outputs=[message, chatbot, state, search])
|
|
|
1 |
import os
|
2 |
import shutil
|
3 |
|
4 |
+
from app_modules.overwrites import postprocess
|
5 |
from app_modules.presets import *
|
6 |
from clc.langchain_application import LangChainApplication
|
7 |
|
|
|
14 |
docs_path = './docs'
|
15 |
kg_vector_stores = {
|
16 |
'中文维基百科': './cache/zh_wikipedia',
|
17 |
+
'大规模金融研报': './cache/financial_research_reports',
|
18 |
+
'初始化': './cache',
|
19 |
} # 可以替换成自己的知识库,如果没有需要设置为None
|
20 |
# kg_vector_stores=None
|
21 |
+
patterns = ['模型问答', '知识库问答'] #
|
22 |
|
23 |
|
24 |
config = LangChainCFG()
|
|
|
63 |
embedding_model,
|
64 |
top_k,
|
65 |
use_web,
|
66 |
+
use_pattern,
|
67 |
history=None):
|
68 |
# print(large_language_model, embedding_model)
|
69 |
print(input)
|
|
|
74 |
web_content = application.source_service.search_web(query=input)
|
75 |
else:
|
76 |
web_content = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
search_text = ''
|
78 |
+
if use_pattern == '模型问答':
|
79 |
+
result = application.get_llm_answer(query=input, web_content=web_content)
|
80 |
+
history.append((input, result))
|
81 |
+
search_text += web_content
|
82 |
+
return '', history, history, search_text
|
83 |
+
|
84 |
+
else:
|
85 |
+
resp = application.get_knowledge_based_answer(
|
86 |
+
query=input,
|
87 |
+
history_len=1,
|
88 |
+
temperature=0.1,
|
89 |
+
top_p=0.9,
|
90 |
+
top_k=top_k,
|
91 |
+
web_content=web_content,
|
92 |
+
chat_history=history
|
93 |
+
)
|
94 |
+
history.append((input, resp['result']))
|
95 |
+
for idx, source in enumerate(resp['source_documents'][:4]):
|
96 |
+
sep = f'----------【搜索结果{idx + 1}:】---------------\n'
|
97 |
+
search_text += f'{sep}\n{source.page_content}\n\n'
|
98 |
+
print(search_text)
|
99 |
+
search_text += "----------【网络检索内容】-----------\n"
|
100 |
+
search_text += web_content
|
101 |
+
return '', history, history, search_text
|
102 |
|
103 |
|
104 |
with open("assets/custom.css", "r", encoding="utf-8") as f:
|
|
|
131 |
step=1,
|
132 |
label="检索top-k文档",
|
133 |
interactive=True)
|
134 |
+
|
135 |
+
use_web = gr.Radio(["使用", "不使用"], label="web search",
|
136 |
+
info="是否使用网络搜索,使用时确保网络通常",
|
137 |
+
value="不使用"
|
138 |
+
)
|
139 |
+
use_pattern = gr.Radio(
|
140 |
+
[
|
141 |
+
'模型问答',
|
142 |
+
'知识库问答',
|
143 |
+
],
|
144 |
+
label="模式",
|
145 |
+
value='模型问答',
|
146 |
+
interactive=True)
|
147 |
+
|
148 |
kg_name = gr.Radio(['中文维基百科',
|
149 |
'大规模金融研报知识图谱',
|
150 |
'初始化知识库'
|
151 |
],
|
152 |
label="知识库",
|
153 |
+
value=None,
|
154 |
+
info="使用知识库问答,请加载知识库",
|
155 |
interactive=True)
|
156 |
+
set_kg_btn = gr.Button("加载知识库")
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
file = gr.File(label="将文件上传到知识库库,内容要尽量匹配",
|
159 |
visible=True,
|
160 |
file_types=['.txt', '.md', '.docx', '.pdf']
|
161 |
)
|
162 |
|
|
|
|
|
|
|
163 |
with gr.Column(scale=4):
|
164 |
with gr.Row():
|
165 |
chatbot = gr.Chatbot(label='Chinese-LangChain').style(height=400)
|
|
|
176 |
with gr.Column(scale=2):
|
177 |
search = gr.Textbox(label='搜索结果')
|
178 |
|
179 |
+
# ============= 触发动作=============
|
180 |
+
file.upload(upload_file,
|
181 |
+
inputs=file,
|
182 |
+
outputs=None)
|
183 |
set_kg_btn.click(
|
184 |
set_knowledge,
|
185 |
show_progress=True,
|
|
|
189 |
# 发送按钮 提交
|
190 |
send.click(predict,
|
191 |
inputs=[
|
192 |
+
message,
|
193 |
+
large_language_model,
|
194 |
+
embedding_model,
|
195 |
+
top_k,
|
196 |
+
use_web,
|
197 |
+
use_pattern,
|
198 |
state
|
199 |
],
|
200 |
outputs=[message, chatbot, state, search])
|
|
|
208 |
# 输入框 回车
|
209 |
message.submit(predict,
|
210 |
inputs=[
|
211 |
+
message,
|
212 |
+
large_language_model,
|
213 |
+
embedding_model,
|
214 |
+
top_k,
|
215 |
+
use_web,
|
216 |
+
use_pattern,
|
217 |
state
|
218 |
],
|
219 |
outputs=[message, chatbot, state, search])
|