binary-husky commited on
Commit
da8cb77
·
2 Parent(s): dde672c a87ce5b

Merge pull request #147 from JasonGuo1/master

Browse files

feat(toolbox.py,总结word文档.py): 支持rar格式与7z格式解压;word读取

crazy_functions/总结word文档.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from predict import predict_no_ui
2
+ from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
3
+ fast_debug = False
4
+
5
+
6
+ def 解析docx(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
7
+ import time, os
8
+ # pip install python-docx 用于docx格式,跨平台
9
+ # pip install pywin32 用于doc格式,仅支持Win平台
10
+
11
+ print('begin analysis on:', file_manifest)
12
+ for index, fp in enumerate(file_manifest):
13
+ if fp.split(".")[-1] == "docx":
14
+ from docx import Document
15
+ doc = Document(fp)
16
+ file_content = "\n".join([para.text for para in doc.paragraphs])
17
+ else:
18
+ import win32com.client
19
+ word = win32com.client.Dispatch("Word.Application")
20
+ word.visible = False
21
+ # 打开文件
22
+ print('fp', os.getcwd())
23
+ doc = word.Documents.Open(os.getcwd() + '/' + fp)
24
+ # file_content = doc.Content.Text
25
+ doc = word.ActiveDocument
26
+ file_content = doc.Range().Text
27
+ doc.Close()
28
+ word.Quit()
29
+
30
+ print(file_content)
31
+
32
+ prefix = "接下来请你逐文件分析下面的论文文件," if index == 0 else ""
33
+ # private_upload里面的文件名在解压zip后容易出现乱码(rar和7z格式正常),故可以只分析文章内容,不输入文件名
34
+ i_say = prefix + f'请对下面的文章片段用中英文做概述,文件名是{os.path.relpath(fp, project_folder)},' \
35
+ f'文章内容是 ```{file_content}```'
36
+ i_say_show_user = prefix + f'[{index+1}/{len(file_manifest)}] 假设你是论文审稿专家,请对下面的文章片段做概述: {os.path.abspath(fp)}'
37
+ chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
38
+ yield chatbot, history, '正常'
39
+
40
+ if not fast_debug:
41
+ msg = '正常'
42
+ # ** gpt request **
43
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature,
44
+ history=[]) # 带超时倒计时
45
+ chatbot[-1] = (i_say_show_user, gpt_say)
46
+ history.append(i_say_show_user);
47
+ history.append(gpt_say)
48
+ yield chatbot, history, msg
49
+ if not fast_debug: time.sleep(2)
50
+
51
+ """
52
+ # 可按需启用
53
+ i_say = f'根据你上述的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一篇英文的。'
54
+ chatbot.append((i_say, "[Local Message] waiting gpt response."))
55
+ yield chatbot, history, '正常'
56
+
57
+
58
+ i_say = f'我想让你做一个论文写作导师。您的任务是使用人工智能工具(例如自然语言处理)提供有关如何改进其上述文章的反馈。' \
59
+ f'您还应该利用您在有效写作技巧方面的修辞知识和经验来建议作者可以更好地以书面形式表达他们的想法和想法的方法。' \
60
+ f'根据你之前的分析,提出建议'
61
+ chatbot.append((i_say, "[Local Message] waiting gpt response."))
62
+ yield chatbot, history, '正常'
63
+
64
+ """
65
+
66
+ if not fast_debug:
67
+ msg = '正常'
68
+ # ** gpt request **
69
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature,
70
+ history=history) # 带超时倒计时
71
+
72
+ chatbot[-1] = (i_say, gpt_say)
73
+ history.append(i_say)
74
+ history.append(gpt_say)
75
+ yield chatbot, history, msg
76
+ res = write_results_to_file(history)
77
+ chatbot.append(("完成了吗?", res))
78
+ yield chatbot, history, msg
79
+
80
+
81
+ @CatchException
82
+ def 总结word文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
83
+ import glob, os
84
+
85
+ # 基本信息:功能、贡献者
86
+ chatbot.append([
87
+ "函数插件功能?",
88
+ "批量总结Word文档。函数插件贡献者: JasonGuo1"])
89
+ yield chatbot, history, '正常'
90
+
91
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
92
+ try:
93
+ from docx import Document
94
+ except:
95
+ report_execption(chatbot, history,
96
+ a=f"解析项目: {txt}",
97
+ b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade python-docx pywin32```。")
98
+ yield chatbot, history, '正常'
99
+ return
100
+
101
+ # 清空历史,以免输入溢出
102
+ history = []
103
+
104
+ # 检测输入参数,如没有给定输入参数,直接退出
105
+ if os.path.exists(txt):
106
+ project_folder = txt
107
+ else:
108
+ if txt == "": txt = '空空如也的输入栏'
109
+ report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
110
+ yield chatbot, history, '正常'
111
+ return
112
+
113
+ # 搜索需���处理的文件清单
114
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.docx', recursive=True)] + \
115
+ [f for f in glob.glob(f'{project_folder}/**/*.doc', recursive=True)]
116
+ # [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
117
+ # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
118
+ # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
119
+
120
+ # 如果没找到任何文件
121
+ if len(file_manifest) == 0:
122
+ report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.docx或doc文件: {txt}")
123
+ yield chatbot, history, '正常'
124
+ return
125
+
126
+ # 开始正式执行任务
127
+ yield from 解析docx(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
functional_crazy.py CHANGED
@@ -56,6 +56,7 @@ def get_crazy_functionals():
56
  if UserVisibleLevel >= 1:
57
  from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
58
  from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
 
59
  function_plugins.update({
60
  "[仅供开发调试] 批量总结PDF文档": {
61
  "Color": "stop",
@@ -66,6 +67,10 @@ def get_crazy_functionals():
66
  "Color": "stop",
67
  "Function": HotReload(批量总结PDF文档pdfminer)
68
  },
 
 
 
 
69
  })
70
 
71
  # VisibleLevel=2 尚未充分测试的函数插件,放在这里
 
56
  if UserVisibleLevel >= 1:
57
  from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
58
  from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
59
+ from crazy_functions.总结word文档 import 总结word文档
60
  function_plugins.update({
61
  "[仅供开发调试] 批量总结PDF文档": {
62
  "Color": "stop",
 
67
  "Color": "stop",
68
  "Function": HotReload(批量总结PDF文档pdfminer)
69
  },
70
+ "[仅供开发调试] 批量总结Word文档": {
71
+ "Color": "stop",
72
+ "Function": HotReload(总结word文档)
73
+ },
74
  })
75
 
76
  # VisibleLevel=2 尚未充分测试的函数插件,放在这里
requirements.txt CHANGED
@@ -2,4 +2,4 @@ gradio>=3.23
2
  requests[socks]
3
  mdtex2html
4
  Markdown
5
- latex2mathml
 
2
  requests[socks]
3
  mdtex2html
4
  Markdown
5
+ latex2mathml
toolbox.py CHANGED
@@ -176,8 +176,32 @@ def extract_archive(file_path, dest_dir):
176
  with tarfile.open(file_path, 'r:*') as tarobj:
177
  tarobj.extractall(path=dest_dir)
178
  print("Successfully extracted tar archive to {}".format(dest_dir))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  else:
180
- return
 
181
 
182
  def find_recent_files(directory):
183
  """
@@ -209,16 +233,19 @@ def on_file_uploaded(files, chatbot, txt):
209
  except: pass
210
  time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
211
  os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
 
212
  for file in files:
213
  file_origin_name = os.path.basename(file.orig_name)
214
  shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
215
- extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
216
  dest_dir=f'private_upload/{time_tag}/{file_origin_name}.extract')
217
  moved_files = [fp for fp in glob.glob('private_upload/**/*', recursive=True)]
218
  txt = f'private_upload/{time_tag}'
219
  moved_files_str = '\t\n\n'.join(moved_files)
220
- chatbot.append(['我上传了文件,请查收',
221
- f'[Local Message] 收到以下文件: \n\n{moved_files_str}\n\n调用路径参数已自动修正到: \n\n{txt}\n\n现在您点击任意实验功能时,以上文件将被作为输入参数'])
 
 
222
  return chatbot, txt
223
 
224
 
 
176
  with tarfile.open(file_path, 'r:*') as tarobj:
177
  tarobj.extractall(path=dest_dir)
178
  print("Successfully extracted tar archive to {}".format(dest_dir))
179
+
180
+ # 第三方库,需要预先pip install rarfile
181
+ # 此外,Windows上还需要安装winrar软件,配置其Path环境变量,如"C:\Program Files\WinRAR"才可以
182
+ elif file_extension == '.rar':
183
+ try:
184
+ import rarfile
185
+ with rarfile.RarFile(file_path) as rf:
186
+ rf.extractall(path=dest_dir)
187
+ print("Successfully extracted rar archive to {}".format(dest_dir))
188
+ except:
189
+ print("Rar format requires additional dependencies to install")
190
+ return '\n\n需要安装pip install rarfile来解压rar文件'
191
+
192
+ # 第三方库,需要预先pip install py7zr
193
+ elif file_extension == '.7z':
194
+ try:
195
+ import py7zr
196
+ with py7zr.SevenZipFile(file_path, mode='r') as f:
197
+ f.extractall(path=dest_dir)
198
+ print("Successfully extracted 7z archive to {}".format(dest_dir))
199
+ except:
200
+ print("7z format requires additional dependencies to install")
201
+ return '\n\n需要安装pip install py7zr来解压7z文件'
202
  else:
203
+ return ''
204
+ return ''
205
 
206
  def find_recent_files(directory):
207
  """
 
233
  except: pass
234
  time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
235
  os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
236
+ err_msg = ''
237
  for file in files:
238
  file_origin_name = os.path.basename(file.orig_name)
239
  shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
240
+ err_msg += extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
241
  dest_dir=f'private_upload/{time_tag}/{file_origin_name}.extract')
242
  moved_files = [fp for fp in glob.glob('private_upload/**/*', recursive=True)]
243
  txt = f'private_upload/{time_tag}'
244
  moved_files_str = '\t\n\n'.join(moved_files)
245
+ chatbot.append(['我上传了文件,请查收',
246
+ f'[Local Message] 收到以下文件: \n\n{moved_files_str}'+
247
+ f'\n\n调用路径参数已自动修正到: \n\n{txt}'+
248
+ f'\n\n现在您点击任意实验功能时,以上文件将被作为输入参数'+err_msg])
249
  return chatbot, txt
250
 
251