qingxu99 commited on
Commit
e6cf553
·
1 Parent(s): 91d07c3

提升稳定性

Browse files
crazy_functional.py CHANGED
@@ -29,7 +29,7 @@ def get_crazy_functions():
29
  "Color": "stop", # 按钮颜色
30
  "Function": HotReload(解析一个C项目的头文件)
31
  },
32
- "解析整个C++项目(.cpp/.h)": {
33
  "Color": "stop", # 按钮颜色
34
  "AsButton": False, # 加入下拉菜单中
35
  "Function": HotReload(解析一个C项目)
 
29
  "Color": "stop", # 按钮颜色
30
  "Function": HotReload(解析一个C项目的头文件)
31
  },
32
+ "解析整个C++项目(.cpp/.hpp/.c/.h)": {
33
  "Color": "stop", # 按钮颜色
34
  "AsButton": False, # 加入下拉菜单中
35
  "Function": HotReload(解析一个C项目)
crazy_functions/Latex全文润色.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from request_llm.bridge_chatgpt import predict_no_ui
2
+ from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
3
+ fast_debug = False
4
+
5
+
6
+ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
7
+ import time, glob, os
8
+ print('begin analysis on:', file_manifest)
9
+ for index, fp in enumerate(file_manifest):
10
+ with open(fp, 'r', encoding='utf-8') as f:
11
+ file_content = f.read()
12
+
13
+ prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
14
+ i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
15
+ i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
16
+ chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
17
+ print('[1] yield chatbot, history')
18
+ yield chatbot, history, '正常'
19
+
20
+ if not fast_debug:
21
+ msg = '正常'
22
+ # ** gpt request **
23
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
24
+
25
+ print('[2] end gpt req')
26
+ chatbot[-1] = (i_say_show_user, gpt_say)
27
+ history.append(i_say_show_user); history.append(gpt_say)
28
+ print('[3] yield chatbot, history')
29
+ yield chatbot, history, msg
30
+ print('[4] next')
31
+ if not fast_debug: time.sleep(2)
32
+
33
+ all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
34
+ i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
35
+ chatbot.append((i_say, "[Local Message] waiting gpt response."))
36
+ yield chatbot, history, '正常'
37
+
38
+ if not fast_debug:
39
+ msg = '正常'
40
+ # ** gpt request **
41
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
42
+
43
+ chatbot[-1] = (i_say, gpt_say)
44
+ history.append(i_say); history.append(gpt_say)
45
+ yield chatbot, history, msg
46
+ res = write_results_to_file(history)
47
+ chatbot.append(("完成了吗?", res))
48
+ yield chatbot, history, msg
49
+
50
+
51
+
52
+ @CatchException
53
+ def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
54
+ history = [] # 清空历史,以免输入溢出
55
+ import glob, os
56
+ if os.path.exists(txt):
57
+ project_folder = txt
58
+ else:
59
+ if txt == "": txt = '空空如也的输入栏'
60
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
61
+ yield chatbot, history, '正常'
62
+ return
63
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
64
+ # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
65
+ # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
66
+ if len(file_manifest) == 0:
67
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
68
+ yield chatbot, history, '正常'
69
+ return
70
+ yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
crazy_functions/crazy_utils.py CHANGED
@@ -1,19 +1,115 @@
1
  import traceback
 
2
 
3
- def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_p, temperature, chatbot, history, sys_prompt, refresh_interval=0.2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import time
5
  from concurrent.futures import ThreadPoolExecutor
6
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
7
  # 用户反馈
8
  chatbot.append([inputs_show_user, ""])
9
  msg = '正常'
10
- yield chatbot, [], msg
11
  executor = ThreadPoolExecutor(max_workers=16)
12
  mutable = ["", time.time()]
13
- future = executor.submit(lambda:
14
- predict_no_ui_long_connection(
15
- inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable)
16
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  while True:
18
  # yield一次以刷新前端页面
19
  time.sleep(refresh_interval)
@@ -27,8 +123,42 @@ def request_gpt_model_in_new_thread_with_ui_alive(inputs, inputs_show_user, top_
27
  return future.result()
28
 
29
 
30
- def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inputs_array, inputs_show_user_array, top_p, temperature, chatbot, history_array, sys_prompt_array, refresh_interval=0.2, max_workers=10, scroller_max_len=30):
31
- import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  from concurrent.futures import ThreadPoolExecutor
33
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
34
  assert len(inputs_array) == len(history_array)
@@ -40,20 +170,61 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
40
  msg = '正常'
41
  yield chatbot, [], msg
42
  # 异步原子
43
- mutable = [["", time.time()] for _ in range(n_frag)]
44
 
45
  def _req_gpt(index, inputs, history, sys_prompt):
46
- try:
47
- gpt_say = predict_no_ui_long_connection(
48
- inputs=inputs, top_p=top_p, temperature=temperature, history=history, sys_prompt=sys_prompt, observe_window=mutable[index]
49
- )
50
- except:
51
- # 收拾残局
52
- tb_str = '```\n' + traceback.format_exc() + '```'
53
- gpt_say = f"[Local Message] 线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
54
- if len(mutable[index][0]) > 0:
55
- gpt_say += "此线程失败前收到的回答:" + mutable[index][0]
56
- return gpt_say
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # 异步任务开始
58
  futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
59
  range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
@@ -68,6 +239,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
68
  break
69
  # 更好的UI视觉效果
70
  observe_win = []
 
71
  # 每个线程都要“喂狗”(看门狗)
72
  for thread_index, _ in enumerate(worker_done):
73
  mutable[thread_index][1] = time.time()
@@ -77,10 +249,10 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
77
  replace('\n', '').replace('```', '...').replace(
78
  ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
79
  observe_win.append(print_something_really_funny)
80
- stat_str = ''.join([f'执行中: {obs}\n\n' if not done else '已完成\n\n' for done, obs in zip(
81
- worker_done, observe_win)])
82
- chatbot[-1] = [chatbot[-1][0],
83
- f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
84
  msg = "正常"
85
  yield chatbot, [], msg
86
  # 异步任务结束
@@ -88,9 +260,38 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(inp
88
  for inputs_show_user, f in zip(inputs_show_user_array, futures):
89
  gpt_res = f.result()
90
  gpt_response_collection.extend([inputs_show_user, gpt_res])
 
 
 
 
 
 
 
91
  return gpt_response_collection
92
 
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
95
  def cut(txt_tocut, must_break_at_empty_line): # 递归
96
  if get_token_fn(txt_tocut) <= limit:
 
1
  import traceback
2
+ from toolbox import update_ui
3
 
4
+ def input_clipping(inputs, history, max_token_limit):
5
+ import tiktoken
6
+ import numpy as np
7
+ from toolbox import get_conf
8
+ enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
9
+ def get_token_num(txt): return len(enc.encode(txt))
10
+
11
+ mode = 'input-and-history'
12
+ # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
13
+ input_token_num = get_token_num(inputs)
14
+ if input_token_num < max_token_limit//2:
15
+ mode = 'only-history'
16
+ max_token_limit = max_token_limit - input_token_num
17
+
18
+ everything = [inputs] if mode == 'input-and-history' else ['']
19
+ everything.extend(history)
20
+ n_token = get_token_num('\n'.join(everything))
21
+ everything_token = [get_token_num(e) for e in everything]
22
+ delta = max(everything_token) // 16 # 截断时的颗粒度
23
+
24
+ while n_token > max_token_limit:
25
+ where = np.argmax(everything_token)
26
+ encoded = enc.encode(everything[where])
27
+ clipped_encoded = encoded[:len(encoded)-delta]
28
+ everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
29
+ everything_token[where] = get_token_num(everything[where])
30
+ n_token = get_token_num('\n'.join(everything))
31
+
32
+ if mode == 'input-and-history':
33
+ inputs = everything[0]
34
+ else:
35
+ pass
36
+ history = everything[1:]
37
+ return inputs, history
38
+
39
+ def request_gpt_model_in_new_thread_with_ui_alive(
40
+ inputs, inputs_show_user, top_p, temperature,
41
+ chatbot, history, sys_prompt, refresh_interval=0.2,
42
+ handle_token_exceed=True,
43
+ retry_times_at_unknown_error=2,
44
+ ):
45
+ """
46
+ Request GPT model,请求GPT模型同时维持用户界面活跃。
47
+
48
+ 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
49
+ inputs (string): List of inputs (输入)
50
+ inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
51
+ top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
52
+ temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
53
+ chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
54
+ history (list): List of chat history (历史,对话历史列表)
55
+ sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
56
+ refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
57
+ handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
58
+ retry_times_at_unknown_error:失败时的重试次数
59
+
60
+ 输出 Returns:
61
+ future: 输出,GPT返回的结果
62
+ """
63
  import time
64
  from concurrent.futures import ThreadPoolExecutor
65
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
66
  # 用户反馈
67
  chatbot.append([inputs_show_user, ""])
68
  msg = '正常'
69
+ yield from update_ui(chatbot=chatbot, history=[])
70
  executor = ThreadPoolExecutor(max_workers=16)
71
  mutable = ["", time.time()]
72
+ def _req_gpt(inputs, history, sys_prompt):
73
+ retry_op = retry_times_at_unknown_error
74
+ exceeded_cnt = 0
75
+ while True:
76
+ try:
77
+ # 【第一种情况】:顺利完成
78
+ result = predict_no_ui_long_connection(
79
+ inputs=inputs, top_p=top_p, temperature=temperature,
80
+ history=history, sys_prompt=sys_prompt, observe_window=mutable)
81
+ return result
82
+ except ConnectionAbortedError as token_exceeded_error:
83
+ # 【第二种情况】:Token溢出,
84
+ if handle_token_exceed:
85
+ exceeded_cnt += 1
86
+ # 【选择处理】 尝试计算比例,尽可能多地保留文本
87
+ from toolbox import get_reduce_token_percent
88
+ p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
89
+ MAX_TOKEN = 4096
90
+ EXCEED_ALLO = 512 + 512 * exceeded_cnt
91
+ inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
92
+ mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
93
+ continue # 返回重试
94
+ else:
95
+ # 【选择放弃】
96
+ tb_str = '```\n' + traceback.format_exc() + '```'
97
+ mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
98
+ return mutable[0] # 放弃
99
+ except:
100
+ # 【第三种情况】:其他错误
101
+ tb_str = '```\n' + traceback.format_exc() + '```'
102
+ mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
103
+ if retry_op > 0:
104
+ retry_op -= 1
105
+ mutable[0] += f"[Local Message] 重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
106
+ time.sleep(5)
107
+ continue # 返回重试
108
+ else:
109
+ time.sleep(5)
110
+ return mutable[0] # 放弃
111
+
112
+ future = executor.submit(_req_gpt, inputs, history, sys_prompt)
113
  while True:
114
  # yield一次以刷新前端页面
115
  time.sleep(refresh_interval)
 
123
  return future.result()
124
 
125
 
126
+ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
127
+ inputs_array, inputs_show_user_array, top_p, temperature,
128
+ chatbot, history_array, sys_prompt_array,
129
+ refresh_interval=0.2, max_workers=10, scroller_max_len=30,
130
+ handle_token_exceed=True, show_user_at_complete=False,
131
+ retry_times_at_unknown_error=2,
132
+ ):
133
+ """
134
+ Request GPT model using multiple threads with UI and high efficiency
135
+ 请求GPT模型的[多线程]版。
136
+ 具备以下功能:
137
+ 实时在UI上反馈远程数据流
138
+ 使用线程池,可调节线程池的大小避免openai的流量限制错误
139
+ 处理中途中止的情况
140
+ 网络等出问题时,会把traceback和已经接收的数据转入输出
141
+
142
+ 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
143
+ inputs_array (list): List of inputs (每个子任务的输入)
144
+ inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
145
+ top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
146
+ temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
147
+ chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化)
148
+ history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史)
149
+ sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
150
+ refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
151
+ max_workers (int, optional): Maximum number of threads (default: 10) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
152
+ scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果)
153
+ handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本)
154
+ handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
155
+ show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框)
156
+ retry_times_at_unknown_error:子任务失败时的重试次数
157
+
158
+ 输出 Returns:
159
+ list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错信息,方便调试和定位问题。)
160
+ """
161
+ import time, random
162
  from concurrent.futures import ThreadPoolExecutor
163
  from request_llm.bridge_chatgpt import predict_no_ui_long_connection
164
  assert len(inputs_array) == len(history_array)
 
170
  msg = '正常'
171
  yield chatbot, [], msg
172
  # 异步原子
173
+ mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
174
 
175
  def _req_gpt(index, inputs, history, sys_prompt):
176
+ gpt_say = ""
177
+ retry_op = retry_times_at_unknown_error
178
+ exceeded_cnt = 0
179
+ mutable[index][2] = "执行中"
180
+ while True:
181
+ try:
182
+ # 【第一种情况】:顺利完成
183
+ # time.sleep(10); raise RuntimeError("测试")
184
+ gpt_say = predict_no_ui_long_connection(
185
+ inputs=inputs, top_p=top_p, temperature=temperature, history=history,
186
+ sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
187
+ )
188
+ mutable[index][2] = "已成功"
189
+ return gpt_say
190
+ except ConnectionAbortedError as token_exceeded_error:
191
+ # 【第二种情况】:Token溢出,
192
+ if handle_token_exceed:
193
+ exceeded_cnt += 1
194
+ # 【选择处理】 尝试计算比例,尽可能多地保留文本
195
+ from toolbox import get_reduce_token_percent
196
+ p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
197
+ MAX_TOKEN = 4096
198
+ EXCEED_ALLO = 512 + 512 * exceeded_cnt
199
+ inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
200
+ gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
201
+ mutable[index][2] = f"截断重试"
202
+ continue # 返回重试
203
+ else:
204
+ # 【选择放弃】
205
+ tb_str = '```\n' + traceback.format_exc() + '```'
206
+ gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
207
+ if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
208
+ mutable[index][2] = "输入过长已放弃"
209
+ return gpt_say # 放弃
210
+ except:
211
+ # 【第三种情况】:其他错误
212
+ tb_str = '```\n' + traceback.format_exc() + '```'
213
+ gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
214
+ if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
215
+ if retry_op > 0:
216
+ retry_op -= 1
217
+ wait = random.randint(5, 20)
218
+ for i in range(wait):# 也许等待十几秒后,情况会好转
219
+ mutable[index][2] = f"等待重试 {wait-i}"; time.sleep(1)
220
+ mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
221
+ continue # 返回重试
222
+ else:
223
+ mutable[index][2] = "已失败"
224
+ wait = 5
225
+ time.sleep(5)
226
+ return gpt_say # 放弃
227
+
228
  # 异步任务开始
229
  futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
230
  range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
 
239
  break
240
  # 更好的UI视觉效果
241
  observe_win = []
242
+ # print([mutable[thread_index][2] for thread_index, _ in enumerate(worker_done)])
243
  # 每个线程都要“喂狗”(看门狗)
244
  for thread_index, _ in enumerate(worker_done):
245
  mutable[thread_index][1] = time.time()
 
249
  replace('\n', '').replace('```', '...').replace(
250
  ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
251
  observe_win.append(print_something_really_funny)
252
+ stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
253
+ if not done else f'`{mutable[thread_index][2]}`\n\n'
254
+ for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
255
+ chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
256
  msg = "正常"
257
  yield chatbot, [], msg
258
  # 异步任务结束
 
260
  for inputs_show_user, f in zip(inputs_show_user_array, futures):
261
  gpt_res = f.result()
262
  gpt_response_collection.extend([inputs_show_user, gpt_res])
263
+
264
+ if show_user_at_complete:
265
+ for inputs_show_user, f in zip(inputs_show_user_array, futures):
266
+ gpt_res = f.result()
267
+ chatbot.append([inputs_show_user, gpt_res])
268
+ yield chatbot, [], msg
269
+ time.sleep(1)
270
  return gpt_response_collection
271
 
272
 
273
+ def WithRetry(f):
274
+ """
275
+ 装饰器函数,用于自动重试。
276
+ """
277
+ def decorated(retry, res_when_fail, *args, **kwargs):
278
+ assert retry >= 0
279
+ while True:
280
+ try:
281
+ res = yield from f(*args, **kwargs)
282
+ return res
283
+ except:
284
+ retry -= 1
285
+ if retry<0:
286
+ print("达到最大重试次数")
287
+ break
288
+ else:
289
+ print("重试中……")
290
+ continue
291
+ return res_when_fail
292
+ return decorated
293
+
294
+
295
  def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
296
  def cut(txt_tocut, must_break_at_empty_line): # 递归
297
  if get_token_fn(txt_tocut) <= limit:
crazy_functions/代码重写为全英文_多线程.py CHANGED
@@ -58,11 +58,10 @@ def 全项目切换英文(txt, top_p, temperature, chatbot, history, sys_prompt,
58
 
59
  # 第5步:Token限制下的截断与处理
60
  MAX_TOKEN = 3000
61
- from transformers import GPT2TokenizerFast
62
- print('加载tokenizer中')
63
- tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
64
- get_token_fn = lambda txt: len(tokenizer(txt)["input_ids"])
65
- print('加载tokenizer结束')
66
 
67
 
68
  # 第6步:任务函数
 
58
 
59
  # 第5步:Token限制下的截断与处理
60
  MAX_TOKEN = 3000
61
+ import tiktoken
62
+ from toolbox import get_conf
63
+ enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
64
+ def get_token_fn(txt): return len(enc.encode(txt))
 
65
 
66
 
67
  # 第6步:任务函数
crazy_functions/批量翻译PDF文档_多线程.py CHANGED
@@ -148,7 +148,8 @@ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, histor
148
  file_content, page_one = read_and_clean_pdf_text(fp)
149
  # 递归地切割PDF文件
150
  from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
151
- enc = tiktoken.get_encoding("gpt2")
 
152
  def get_token_num(txt): return len(enc.encode(txt))
153
  # 分解文本
154
  paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
 
148
  file_content, page_one = read_and_clean_pdf_text(fp)
149
  # 递归地切割PDF文件
150
  from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
151
+ from toolbox import get_conf
152
+ enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL'))
153
  def get_token_num(txt): return len(enc.encode(txt))
154
  # 分解文本
155
  paper_fragments = breakdown_txt_to_satisfy_token_limit_for_pdf(
crazy_functions/解析项目源代码.py CHANGED
@@ -2,92 +2,96 @@ from request_llm.bridge_chatgpt import predict_no_ui
2
  from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
3
  fast_debug = False
4
 
5
- def 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
6
- import time, glob, os
7
- print('begin analysis on:', file_manifest)
 
 
 
 
 
 
 
 
 
 
8
  for index, fp in enumerate(file_manifest):
9
  with open(fp, 'r', encoding='utf-8') as f:
10
  file_content = f.read()
11
-
12
  prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
13
  i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
14
  i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
15
- chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
16
- yield chatbot, history, '正常'
17
-
18
- if not fast_debug:
19
- msg = '正常'
20
-
21
- # ** gpt request **
22
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
23
-
24
- chatbot[-1] = (i_say_show_user, gpt_say)
25
- history.append(i_say_show_user); history.append(gpt_say)
26
- yield chatbot, history, msg
27
- if not fast_debug: time.sleep(2)
28
-
29
- all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
30
- i_say = f'根据以上你自己的分析,对程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能(包括{all_file})。'
31
- chatbot.append((i_say, "[Local Message] waiting gpt response."))
32
- yield chatbot, history, '正常'
33
-
34
- if not fast_debug:
35
- msg = '正常'
36
- # ** gpt request **
37
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
38
-
39
- chatbot[-1] = (i_say, gpt_say)
40
- history.append(i_say); history.append(gpt_say)
41
- yield chatbot, history, msg
42
- res = write_results_to_file(history)
43
- chatbot.append(("完成了吗?", res))
44
- yield chatbot, history, msg
45
-
46
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  @CatchException
50
  def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
51
  history = [] # 清空历史,以免输入溢出
52
- import time, glob, os
53
  file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
54
- [f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
55
- for index, fp in enumerate(file_manifest):
56
- # if 'test_project' in fp: continue
57
- with open(fp, 'r', encoding='utf-8') as f:
58
- file_content = f.read()
59
-
60
- prefix = "接下来请你分析自己的程序构成,别紧张," if index==0 else ""
61
- i_say = prefix + f'请对下面的程序文件做一个概述文件名是{fp},文件代码是 ```{file_content}```'
62
- i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
63
- chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
64
- yield chatbot, history, '正常'
65
-
66
- if not fast_debug:
67
- # ** gpt request **
68
- # gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature)
69
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[], long_connection=True) # 带超时倒计时
70
-
71
- chatbot[-1] = (i_say_show_user, gpt_say)
72
- history.append(i_say_show_user); history.append(gpt_say)
73
- yield chatbot, history, '正常'
74
- time.sleep(2)
75
-
76
- i_say = f'根据以上你自己的分析,对程序的整体功能和构架做出概括。然后用一张markdown表格整理每个文件的功能(包括{file_manifest})。'
77
- chatbot.append((i_say, "[Local Message] waiting gpt response."))
78
- yield chatbot, history, '正常'
79
-
80
- if not fast_debug:
81
- # ** gpt request **
82
- # gpt_say = predict_no_ui(inputs=i_say, top_p=top_p, temperature=temperature, history=history)
83
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history, long_connection=True) # 带超时倒计时
84
-
85
- chatbot[-1] = (i_say, gpt_say)
86
- history.append(i_say); history.append(gpt_say)
87
- yield chatbot, history, '正常'
88
- res = write_results_to_file(history)
89
- chatbot.append(("完成了吗?", res))
90
  yield chatbot, history, '正常'
 
 
91
 
92
  @CatchException
93
  def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
@@ -105,7 +109,7 @@ def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPr
105
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
106
  yield chatbot, history, '正常'
107
  return
108
- yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
109
 
110
 
111
  @CatchException
@@ -126,7 +130,7 @@ def 解析一个C项目的头文件(txt, top_p, temperature, chatbot, history, s
126
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
127
  yield chatbot, history, '正常'
128
  return
129
- yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
130
 
131
  @CatchException
132
  def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
@@ -147,7 +151,7 @@ def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptT
147
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
148
  yield chatbot, history, '正常'
149
  return
150
- yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
151
 
152
 
153
  @CatchException
@@ -169,7 +173,7 @@ def 解析一个Java项目(txt, top_p, temperature, chatbot, history, systemProm
169
  report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
170
  yield chatbot, history, '正常'
171
  return
172
- yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
173
 
174
 
175
  @CatchException
@@ -192,7 +196,7 @@ def 解析一个Rect项目(txt, top_p, temperature, chatbot, history, systemProm
192
  report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何Rect文件: {txt}")
193
  yield chatbot, history, '正常'
194
  return
195
- yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
196
 
197
 
198
  @CatchException
@@ -211,4 +215,4 @@ def 解析一个Golang项目(txt, top_p, temperature, chatbot, history, systemPr
211
  report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
212
  yield chatbot, history, '正常'
213
  return
214
- yield from 解析源代码(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
 
2
  from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
3
  fast_debug = False
4
 
5
+
6
+ def 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
7
+ import os, copy
8
+ from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
9
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, WithRetry
10
+ msg = '正常'
11
+ inputs_array = []
12
+ inputs_show_user_array = []
13
+ history_array = []
14
+ sys_prompt_array = []
15
+ report_part_1 = []
16
+
17
+ ############################## <第一步,逐个文件分析,多线程> ##################################
18
  for index, fp in enumerate(file_manifest):
19
  with open(fp, 'r', encoding='utf-8') as f:
20
  file_content = f.read()
 
21
  prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
22
  i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
23
  i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
24
+ # 装载请求内容
25
+ inputs_array.append(i_say)
26
+ inputs_show_user_array.append(i_say_show_user)
27
+ history_array.append([])
28
+ sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。")
29
+
30
+ gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
31
+ inputs_array = inputs_array,
32
+ inputs_show_user_array = inputs_show_user_array,
33
+ history_array = history_array,
34
+ sys_prompt_array = sys_prompt_array,
35
+ top_p = top_p,
36
+ temperature = temperature,
37
+ chatbot = chatbot,
38
+ show_user_at_complete = True
39
+ )
40
+
41
+ report_part_1 = copy.deepcopy(gpt_response_collection)
42
+ history_to_return = report_part_1
43
+ res = write_results_to_file(report_part_1)
44
+ chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
45
+ yield chatbot, history_to_return, msg
46
+
47
+ ############################## <第二步,综合,单线程,分组+迭代处理> ##################################
48
+ batchsize = 16 # 10个文件为一组
49
+ report_part_2 = []
50
+ previous_iteration_files = []
51
+ while True:
52
+ if len(file_manifest) == 0: break
53
+ this_iteration_file_manifest = file_manifest[:batchsize]
54
+ this_iteration_gpt_response_collection = gpt_response_collection[:batchsize*2]
55
+ file_rel_path = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
56
+ # 把“请对下面的程序文件做一个概述” 替换成 精简的 "文件名:{all_file[index]}"
57
+ for index, content in enumerate(this_iteration_gpt_response_collection):
58
+ if index%2==0: this_iteration_gpt_response_collection[index] = f"文件名:{file_rel_path[index//2]}"
59
+ previous_iteration_files.extend([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
60
+ previous_iteration_files_string = ', '.join(previous_iteration_files)
61
+ current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
62
+ i_say = f'根据以上分析,对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能(包括{previous_iteration_files_string})。'
63
+ inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
64
+ this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
65
+ this_iteration_history.extend(report_part_2)
66
+ result = yield from request_gpt_model_in_new_thread_with_ui_alive(
67
+ inputs=i_say, inputs_show_user=inputs_show_user, top_p=top_p, temperature=temperature, chatbot=chatbot,
68
+ history=this_iteration_history, # 迭代之前的分析
69
+ sys_prompt="你是一个程序架构分析师,正在分析一个源代码项目。")
70
+ report_part_2.extend([i_say, result])
71
+
72
+ file_manifest = file_manifest[batchsize:]
73
+ gpt_response_collection = gpt_response_collection[batchsize*2:]
74
+
75
+ ############################## <END> ##################################
76
+ history_to_return.extend(report_part_2)
77
+ res = write_results_to_file(history_to_return)
78
+ chatbot.append(("完成了吗?", res))
79
+ yield chatbot, history_to_return, msg
80
 
81
 
82
  @CatchException
83
  def 解析项目本身(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
84
  history = [] # 清空历史,以免输入溢出
85
+ import glob
86
  file_manifest = [f for f in glob.glob('./*.py') if ('test_project' not in f) and ('gpt_log' not in f)] + \
87
+ [f for f in glob.glob('./crazy_functions/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]+ \
88
+ [f for f in glob.glob('./request_llm/*.py') if ('test_project' not in f) and ('gpt_log' not in f)]
89
+ project_folder = './'
90
+ if len(file_manifest) == 0:
91
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  yield chatbot, history, '正常'
93
+ return
94
+ yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
95
 
96
  @CatchException
97
  def 解析一个Python项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
 
109
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
110
  yield chatbot, history, '正常'
111
  return
112
+ yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
113
 
114
 
115
  @CatchException
 
130
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
131
  yield chatbot, history, '正常'
132
  return
133
+ yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
134
 
135
  @CatchException
136
  def 解析一个C项目(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
 
151
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
152
  yield chatbot, history, '正常'
153
  return
154
+ yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
155
 
156
 
157
  @CatchException
 
173
  report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
174
  yield chatbot, history, '正常'
175
  return
176
+ yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
177
 
178
 
179
  @CatchException
 
196
  report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何Rect文件: {txt}")
197
  yield chatbot, history, '正常'
198
  return
199
+ yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
200
 
201
 
202
  @CatchException
 
215
  report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
216
  yield chatbot, history, '正常'
217
  return
218
+ yield from 解析源代码新(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
objdump.tmp ADDED
Binary file (26.6 kB). View file
 
request_llm/bridge_chatgpt.py CHANGED
@@ -72,7 +72,7 @@ def predict_no_ui(inputs, top_p, temperature, history=[], sys_prompt=""):
72
  raise ConnectionAbortedError("Json解析不合常规,可能是文本过长" + response.text)
73
 
74
 
75
- def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None):
76
  """
77
  发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
78
  inputs:
@@ -121,7 +121,7 @@ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_pr
121
  if "role" in delta: continue
122
  if "content" in delta:
123
  result += delta["content"]
124
- print(delta["content"], end='')
125
  if observe_window is not None:
126
  # 观测窗,把已经获取的数据显示出去
127
  if len(observe_window) >= 1: observe_window[0] += delta["content"]
@@ -264,8 +264,7 @@ def generate_payload(inputs, top_p, temperature, history, system_prompt, stream)
264
  "presence_penalty": 0,
265
  "frequency_penalty": 0,
266
  }
267
-
268
- print(f" {LLM_MODEL} : {conversation_cnt} : {inputs}")
269
  return headers,payload
270
 
271
 
 
72
  raise ConnectionAbortedError("Json解析不合常规,可能是文本过长" + response.text)
73
 
74
 
75
+ def predict_no_ui_long_connection(inputs, top_p, temperature, history=[], sys_prompt="", observe_window=None, console_slience=False):
76
  """
77
  发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
78
  inputs:
 
121
  if "role" in delta: continue
122
  if "content" in delta:
123
  result += delta["content"]
124
+ if not console_slience: print(delta["content"], end='')
125
  if observe_window is not None:
126
  # 观测窗,把已经获取的数据显示出去
127
  if len(observe_window) >= 1: observe_window[0] += delta["content"]
 
264
  "presence_penalty": 0,
265
  "frequency_penalty": 0,
266
  }
267
+ print(f" {LLM_MODEL} : {conversation_cnt} : {inputs[:100]}")
 
268
  return headers,payload
269
 
270
 
toolbox.py CHANGED
@@ -21,6 +21,8 @@ def ArgsGeneralWrapper(f):
21
  yield from f(txt_passon, *args, **kwargs)
22
  return decorated
23
 
 
 
24
 
25
  def get_reduce_token_percent(text):
26
  try:
 
21
  yield from f(txt_passon, *args, **kwargs)
22
  return decorated
23
 
24
+ def update_ui(chatbot, history, msg='正常', *args, **kwargs):
25
+ yield chatbot, history, msg
26
 
27
  def get_reduce_token_percent(text):
28
  try:
version CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "version": 2.5,
3
  "show_feature": true,
4
- "new_feature": "新增一键更新程序<->高亮代码<->高亮公式<->新增垂直布局选项"
5
  }
 
1
  {
2
+ "version": 2.6,
3
  "show_feature": true,
4
+ "new_feature": "增强多线程稳定性(涉及代码解析、PDF翻译等)<->修复Token计数错误(解决PDF翻译的分割不合理的问题)"
5
  }