qqqwt commited on
Commit
a5cc116
·
1 Parent(s): b8d0233

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +260 -148
app.py CHANGED
@@ -194,6 +194,23 @@ class Paper:
194
  text_list = []
195
  section_dict = {}
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  # 再处理其他章节:
199
  text_list = [page.get_text() for page in self.pdf]
@@ -260,27 +277,198 @@ class Reader:
260
  self.filter_keys = filter_keys # 用于在摘要中筛选的关键词
261
  self.root_path = root_path
262
  self.file_format = 'md' # or 'txt',如果为图片,则必须为'md'
263
-
264
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  def validateTitle(self, title):
266
  # 将论文的乱七八糟的路径格式修正
267
  rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
268
  new_title = re.sub(rstr, "_", title) # 替换为下划线
269
  return new_title
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  def summary_with_chat(self, paper_list, key):
273
  htmls = []
274
  for paper_index, paper in enumerate(paper_list):
275
  # 第一步先用title,abs,和introduction进行总结。
276
-
 
 
 
 
 
 
 
277
  chat_summary_text = self.chat_summary(text=text, key=str(key))
278
  htmls.append(chat_summary_text)
279
-
280
- chat_review_text = self.chat_review(text=text, key=str(key))
281
- htmls.append(chat_review_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  htmls.append("\n")
283
-
284
  md_text = "\n".join(htmls)
285
 
286
  return markdown.markdown(md_text)
@@ -289,31 +477,25 @@ class Reader:
289
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
290
  stop=tenacity.stop_after_attempt(5),
291
  reraise=True)
292
- def chat_conclusion(self, text):
293
- openai.api_key = self.chat_api_list[self.cur_api]
294
- self.cur_api += 1
295
- self.cur_api = 0 if self.cur_api >= len(self.chat_api_list) - 1 else self.cur_api
296
  response = openai.ChatCompletion.create(
297
  model="gpt-3.5-turbo",
298
  # prompt需要用英语替换,少占用token。
299
  messages=[
300
- {"role": "system",
301
- "content": "You are a reviewer in the field of [" + self.key_word + "] and you need to critically review this article"},
302
- # chatgpt 角色
303
- {"role": "assistant",
304
- "content": "This is the <summary> and <conclusion> part of an English literature, where <summary> you have already summarized, but <conclusion> part, I need your help to summarize the following questions:" + text},
305
- # 背景知识,可以参考OpenReview的审稿流程
306
  {"role": "user", "content": """
307
- 8. Make the following summary.Be sure to use Chinese answers (proper nouns need to be marked in English).
308
- - (1):What is the significance of this piece of work?
309
- - (2):Summarize the strengths and weaknesses of this article in three dimensions: innovation point, performance, and workload.
310
  .......
311
- Follow the format of the output later:
312
- 8. Conclusion: \n\n
313
- - (1):xxx;\n
314
- - (2):Innovation point: xxx; Performance: xxx; Workload: xxx;\n
315
-
316
- Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
317
  """},
318
  ]
319
  )
@@ -321,38 +503,32 @@ class Reader:
321
  for choice in response.choices:
322
  result += choice.message.content
323
  print("conclusion_result:\n", result)
324
- return result
325
-
326
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
327
  stop=tenacity.stop_after_attempt(5),
328
  reraise=True)
329
- def chat_method(self, text):
330
- openai.api_key = self.chat_api_list[self.cur_api]
331
- self.cur_api += 1
332
- self.cur_api = 0 if self.cur_api >= len(self.chat_api_list) - 1 else self.cur_api
333
  response = openai.ChatCompletion.create(
334
  model="gpt-3.5-turbo",
335
  messages=[
336
- {"role": "system",
337
- "content": "You are a researcher in the field of [" + self.key_word + "] who is good at summarizing papers using concise statements"},
338
- # chatgpt 角色
339
- {"role": "assistant",
340
- "content": "This is the <summary> and <Method> part of an English document, where <summary> you have summarized, but the <Methods> part, I need your help to read and summarize the following questions." + text},
341
- # 背景知识
342
  {"role": "user", "content": """
343
- 7. Describe in detail the methodological idea of this article. Be sure to use Chinese answers (proper nouns need to be marked in English). For example, its steps are.
344
  - (1):...
345
  - (2):...
346
  - (3):...
347
  - .......
348
- Follow the format of the output that follows:
349
- 7. Methods: \n\n
350
- - (1):xxx;\n
351
- - (2):xxx;\n
352
- - (3):xxx;\n
353
- ....... \n\n
354
-
355
- Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not repeat the content of the previous <summary>, the value of the use of the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed, ....... means fill in according to the actual requirements, if not, you can not write.
356
  """},
357
  ]
358
  )
@@ -365,44 +541,37 @@ class Reader:
365
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
366
  stop=tenacity.stop_after_attempt(5),
367
  reraise=True)
368
- def chat_summary(self, text):
369
- openai.api_key = self.chat_api_list[self.cur_api]
370
- self.cur_api += 1
371
- self.cur_api = 0 if self.cur_api >= len(self.chat_api_list) - 1 else self.cur_api
372
-
373
  response = openai.ChatCompletion.create(
374
  model="gpt-3.5-turbo",
375
  messages=[
376
- {"role": "system",
377
- "content": "You are a researcher in the field of [" + self.key_word + "] who is good at summarizing papers using concise statements"},
378
- # chatgpt 角色
379
- {"role": "assistant",
380
- "content": "This is the title, author, link, abstract and introduction of an English document. I need your help to read and summarize the following questions: " + text},
381
- # 背景知识
382
  {"role": "user", "content": """
383
- 1. Mark the title of the paper (with Chinese translation)
384
- 2. list all the authors' names (use English)
385
- 3. mark the first author's affiliation (output Chinese translation only)
386
- 4. mark the keywords of this article (use English)
387
- 5. link to the paper, Github code link (if available, fill in Github:None if not)
388
- 6. summarize according to the following four points.Be sure to use Chinese answers (proper nouns need to be marked in English)
389
- - (1):What is the research background of this article?
390
- - (2):What are the past methods? What are the problems with them? Is the approach well motivated?
391
- - (3):What is the research methodology proposed in this paper?
392
- - (4):On what task and what performance is achieved by the methods in this paper? Can the performance support their goals?
393
- Follow the format of the output that follows:
394
- 1. Title: xxx\n\n
395
- 2. Authors: xxx\n\n
396
- 3. Affiliation: xxx\n\n
397
- 4. Keywords: xxx\n\n
398
- 5. Urls: xxx or xxx , xxx \n\n
399
- 6. Summary: \n\n
400
- - (1):xxx;\n
401
- - (2):xxx;\n
402
- - (3):xxx;\n
403
- - (4):xxx.\n\n
404
-
405
- Be sure to use Chinese answers (proper nouns need to be marked in English), statements as concise and academic as possible, do not have too much repetitive information, numerical values using the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed.
406
  """},
407
  ]
408
  )
@@ -410,69 +579,8 @@ class Reader:
410
  for choice in response.choices:
411
  result += choice.message.content
412
  print("summary_result:\n", result)
413
- return result
414
- @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
415
- stop=tenacity.stop_after_attempt(5),
416
- reraise=True)
417
- def chat_review(self, text):
418
- openai.api_key = self.chat_api_list[self.cur_api]
419
- self.cur_api += 1
420
- self.cur_api = 0 if self.cur_api >= len(self.chat_api_list) - 1 else self.cur_api
421
-
422
- response = openai.ChatCompletion.create(
423
- model="gpt-3.5-turbo",
424
- messages=[
425
- {"role": "system",
426
- "content": "You are a researcher in the field of [" + self.key_word + "] who is good at reviewing papers using concise statements"},
427
- # chatgpt 角色
428
- {"role": "assistant",
429
- "content": "This is the title, author, link, abstract, introduction, method, experiments, and conclusions of an English document. I need your help to read and summarize the following questions: " + text},
430
- # 背景知识
431
- {"role": "user", "content": """
432
- 1. Mark the title of the paper (use English)
433
- 2. list all the authors' names (use English)
434
- 3. mark the first author's affiliation (use English)
435
- 4. mark the keywords of this article (use English)
436
- 5. link to the paper, Github code link (if available, fill in Github:None if not)
437
- 6. summarize according to the following four points.Be sure to use Chinese answers (proper nouns need to be marked in English)
438
- - (1):What is the research background of this article?
439
- - (2):What are the past methods? What are the problems with them? Is the approach well motivated?
440
- - (3):What is the research methodology proposed in this paper?
441
- - (4):On what task and what performance is achieved by the methods in this paper? Can the performance support their goals?
442
- Follow the format of the output that follows:
443
- 1. Title: xxx\n\n
444
- 2. Summary: \n\n
445
- - (1):xxx;\n
446
- - (2):xxx;\n
447
- - (3):xxx;\n
448
- - (4):xxx.\n\n
449
- 3. Strength: \n\n
450
- - (1):xxx;\n
451
- - (2):xxx;\n
452
- - (3):xxx;\n
453
- - (4):xxx.\n\n
454
- 4. Weakness: \n\n
455
- - (1):xxx;\n
456
- - (2):xxx;\n
457
- - (3):xxx;\n
458
- - (4):xxx.\n\n
459
- 5. Other questions: \n\n
460
- - (1):grammars;\n
461
- - (2):figures;\n
462
- - (3):tables;\n
463
- - (4):other detailed questions.\n\n
464
- Be sure to use English answers, statements as concise and academic as possible, do not have too much repetitive information, numerical values using the original numbers, be sure to strictly follow the format, the corresponding content output to xxx, in accordance with \n line feed.
465
- """},
466
- ]
467
- )
468
- result = ''
469
- for choice in response.choices:
470
- result += choice.message.content
471
- print("Review_result:\n", result)
472
- return result
473
-
474
-
475
-
476
  def export_to_markdown(self, text, file_name, mode='w'):
477
  # 使用markdown模块的convert方法,将文本转换为html格式
478
  # html = markdown.markdown(text)
@@ -487,8 +595,10 @@ class Reader:
487
  print(f"Query: {self.query}")
488
  print(f"Sort: {self.sort}")
489
 
490
- def upload_pdf(key,file):
491
-
 
 
492
  # 判断PDF文件
493
  if file and file.name.split(".")[-1].lower() != "pdf":
494
  return '请勿上传非 PDF 文件!'
@@ -501,18 +611,20 @@ def upload_pdf(key,file):
501
  return sum_info
502
 
503
  # 标题
504
- title = "ChatPaperPlus"
505
  # 描述
506
  description = '''<div align='center'>
507
 
508
- Use ChatGPT to summary and Review the papers.
509
 
 
510
 
511
  </div>
512
  '''
513
  # 创建Gradio界面
514
  ip = [
515
  gradio.inputs.Textbox(label="请输入你的API-key(必填)", default=""),
 
516
  gradio.inputs.File(label="请上传论文PDF(必填)")
517
  ]
518
 
 
194
  text_list = []
195
  section_dict = {}
196
 
197
+ # # 先处理Abstract章节
198
+ # for page_index, page in enumerate(self.pdf):
199
+ # cur_text = page.get_text()
200
+ # # 如果该页面是Abstract章节所在页面
201
+ # if page_index == list(self.section_page_dict.values())[0]:
202
+ # abs_str = "Abstract"
203
+ # # 获取Abstract章节的起始位置
204
+ # first_index = cur_text.find(abs_str)
205
+ # # 查找下一个章节的关键词,这里是Introduction
206
+ # intro_str = "Introduction"
207
+ # if intro_str in cur_text:
208
+ # second_index = cur_text.find(intro_str)
209
+ # elif intro_str.upper() in cur_text:
210
+ # second_index = cur_text.find(intro_str.upper())
211
+ # # 将Abstract章节内容加入字典中
212
+ # section_dict[abs_str] = cur_text[first_index+len(abs_str)+1:second_index].replace('-\n',
213
+ # '').replace('\n', ' ').split('I.')[0].split("II.")[0]
214
 
215
  # 再处理其他章节:
216
  text_list = [page.get_text() for page in self.pdf]
 
277
  self.filter_keys = filter_keys # 用于在摘要中筛选的关键词
278
  self.root_path = root_path
279
  self.file_format = 'md' # or 'txt',如果为图片,则必须为'md'
280
+ self.save_image = False
281
+ if self.save_image:
282
+ self.gitee_key = self.config.get('Gitee', 'api')
283
+ else:
284
+ self.gitee_key = ''
285
+
286
+ def get_arxiv(self, max_results=30):
287
+ search = arxiv.Search(query=self.query,
288
+ max_results=max_results,
289
+ sort_by=self.sort,
290
+ sort_order=arxiv.SortOrder.Descending,
291
+ )
292
+ return search
293
+
294
+ def filter_arxiv(self, max_results=30):
295
+ search = self.get_arxiv(max_results=max_results)
296
+ print("all search:")
297
+ for index, result in enumerate(search.results()):
298
+ print(index, result.title, result.updated)
299
+
300
+ filter_results = []
301
+ filter_keys = self.filter_keys
302
+
303
+ print("filter_keys:", self.filter_keys)
304
+ # 确保每个关键词都能在摘要中找到,才算是目标论文
305
+ for index, result in enumerate(search.results()):
306
+ abs_text = result.summary.replace('-\n', '-').replace('\n', ' ')
307
+ meet_num = 0
308
+ for f_key in filter_keys.split(" "):
309
+ if f_key.lower() in abs_text.lower():
310
+ meet_num += 1
311
+ if meet_num == len(filter_keys.split(" ")):
312
+ filter_results.append(result)
313
+ # break
314
+ print("filter_results:", len(filter_results))
315
+ print("filter_papers:")
316
+ for index, result in enumerate(filter_results):
317
+ print(index, result.title, result.updated)
318
+ return filter_results
319
+
320
  def validateTitle(self, title):
321
  # 将论文的乱七八糟的路径格式修正
322
  rstr = r"[\/\\\:\*\?\"\<\>\|]" # '/ \ : * ? " < > |'
323
  new_title = re.sub(rstr, "_", title) # 替换为下划线
324
  return new_title
325
 
326
+ def download_pdf(self, filter_results):
327
+ # 先创建文件夹
328
+ date_str = str(datetime.datetime.now())[:13].replace(' ', '-')
329
+ key_word = str(self.key_word.replace(':', ' '))
330
+ path = self.root_path + 'pdf_files/' + self.query.replace('au: ', '').replace('title: ', '').replace('ti: ', '').replace(':', ' ')[:25] + '-' + date_str
331
+ try:
332
+ os.makedirs(path)
333
+ except:
334
+ pass
335
+ print("All_paper:", len(filter_results))
336
+ # 开始下载:
337
+ paper_list = []
338
+ for r_index, result in enumerate(filter_results):
339
+ try:
340
+ title_str = self.validateTitle(result.title)
341
+ pdf_name = title_str+'.pdf'
342
+ # result.download_pdf(path, filename=pdf_name)
343
+ self.try_download_pdf(result, path, pdf_name)
344
+ paper_path = os.path.join(path, pdf_name)
345
+ print("paper_path:", paper_path)
346
+ paper = Paper(path=paper_path,
347
+ url=result.entry_id,
348
+ title=result.title,
349
+ abs=result.summary.replace('-\n', '-').replace('\n', ' '),
350
+ authers=[str(aut) for aut in result.authors],
351
+ )
352
+ # 下载完毕,开始解析:
353
+ paper.parse_pdf()
354
+ paper_list.append(paper)
355
+ except Exception as e:
356
+ print("download_error:", e)
357
+ pass
358
+ return paper_list
359
+
360
+ @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
361
+ stop=tenacity.stop_after_attempt(5),
362
+ reraise=True)
363
+ def try_download_pdf(self, result, path, pdf_name):
364
+ result.download_pdf(path, filename=pdf_name)
365
+
366
+ @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
367
+ stop=tenacity.stop_after_attempt(5),
368
+ reraise=True)
369
+ def upload_gitee(self, image_path, image_name='', ext='png'):
370
+ """
371
+ 上传到码云
372
+ :return:
373
+ """
374
+ with open(image_path, 'rb') as f:
375
+ base64_data = base64.b64encode(f.read())
376
+ base64_content = base64_data.decode()
377
+
378
+ date_str = str(datetime.datetime.now())[:19].replace(':', '-').replace(' ', '-') + '.' + ext
379
+ path = image_name+ '-' +date_str
380
+
381
+ payload = {
382
+ "access_token": self.gitee_key,
383
+ "owner": self.config.get('Gitee', 'owner'),
384
+ "repo": self.config.get('Gitee', 'repo'),
385
+ "path": self.config.get('Gitee', 'path'),
386
+ "content": base64_content,
387
+ "message": "upload image"
388
+ }
389
+ # 这里需要修改成你的gitee的账户和仓库名,以及文件夹的名字:
390
+ url = f'https://gitee.com/api/v5/repos/'+self.config.get('Gitee', 'owner')+'/'+self.config.get('Gitee', 'repo')+'/contents/'+self.config.get('Gitee', 'path')+'/'+path
391
+ rep = requests.post(url, json=payload).json()
392
+ print("rep:", rep)
393
+ if 'content' in rep.keys():
394
+ image_url = rep['content']['download_url']
395
+ else:
396
+ image_url = r"https://gitee.com/api/v5/repos/"+self.config.get('Gitee', 'owner')+'/'+self.config.get('Gitee', 'repo')+'/contents/'+self.config.get('Gitee', 'path')+'/' + path
397
+
398
+ return image_url
399
 
400
  def summary_with_chat(self, paper_list, key):
401
  htmls = []
402
  for paper_index, paper in enumerate(paper_list):
403
  # 第一步先用title,abs,和introduction进行总结。
404
+ text = ''
405
+ text += 'Title:' + paper.title
406
+ text += 'Url:' + paper.url
407
+ text += 'Abstrat:' + paper.abs
408
+ # intro
409
+ text += list(paper.section_text_dict.values())[0]
410
+ max_token = 2500 * 4
411
+ text = text[:max_token]
412
  chat_summary_text = self.chat_summary(text=text, key=str(key))
413
  htmls.append(chat_summary_text)
414
+
415
+ # TODO 往md文档中插入论文里的像素最大的一张图片,这个方案可以弄的更加智能一些:
416
+ first_image, ext = paper.get_image_path()
417
+ if first_image is None or self.gitee_key == '':
418
+ pass
419
+ else:
420
+ image_title = self.validateTitle(paper.title)
421
+ image_url = self.upload_gitee(image_path=first_image, image_name=image_title, ext=ext)
422
+ htmls.append("\n")
423
+ htmls.append("![Fig]("+image_url+")")
424
+ htmls.append("\n")
425
+ # 第二步总结方法:
426
+ # TODO,由于有些文章的方法章节名是算法名,所以简单的通过关键词来筛选,很难获取,后面需要用其他的方案去优化。
427
+ method_key = ''
428
+ for parse_key in paper.section_text_dict.keys():
429
+ if 'method' in parse_key.lower() or 'approach' in parse_key.lower():
430
+ method_key = parse_key
431
+ break
432
+
433
+ if method_key != '':
434
+ text = ''
435
+ method_text = ''
436
+ summary_text = ''
437
+ summary_text += "<summary>" + chat_summary_text
438
+ # methods
439
+ method_text += paper.section_text_dict[method_key]
440
+ # TODO 把这个变成tenacity的自动判别!
441
+ max_token = 2500 * 4
442
+ text = summary_text + "\n <Methods>:\n" + method_text
443
+ text = text[:max_token]
444
+ chat_method_text = self.chat_method(text=text, key=str(key))
445
+ htmls.append(chat_method_text)
446
+ else:
447
+ chat_method_text = ''
448
+ htmls.append("\n")
449
+
450
+ # 第三步总结全文,并打分:
451
+ conclusion_key = ''
452
+ for parse_key in paper.section_text_dict.keys():
453
+ if 'conclu' in parse_key.lower():
454
+ conclusion_key = parse_key
455
+ break
456
+
457
+ text = ''
458
+ conclusion_text = ''
459
+ summary_text = ''
460
+ summary_text += "<summary>" + chat_summary_text + "\n <Method summary>:\n" + chat_method_text
461
+ if conclusion_key != '':
462
+ # conclusion
463
+ conclusion_text += paper.section_text_dict[conclusion_key]
464
+ max_token = 2500 * 4
465
+ text = summary_text + "\n <Conclusion>:\n" + conclusion_text
466
+ else:
467
+ text = summary_text
468
+ text = text[:max_token]
469
+ chat_conclusion_text = self.chat_conclusion(text=text, key=str(key))
470
+ htmls.append(chat_conclusion_text)
471
  htmls.append("\n")
 
472
  md_text = "\n".join(htmls)
473
 
474
  return markdown.markdown(md_text)
 
477
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
478
  stop=tenacity.stop_after_attempt(5),
479
  reraise=True)
480
+ def chat_conclusion(self, text, key):
481
+ openai.api_key = key
 
 
482
  response = openai.ChatCompletion.create(
483
  model="gpt-3.5-turbo",
484
  # prompt需要用英语替换,少占用token。
485
  messages=[
486
+ {"role": "system", "content": "你是一个["+self.key_word+"]领域的审稿人,你需要严格评审这篇文章"}, # chatgpt 角色
487
+ {"role": "assistant", "content": "这是一篇英文文献的<summary>和<conclusion>部分内容,其中<summary>你已经总结好了,但是<conclusion>部分,我需要你帮忙归纳下面问题:"+text}, # 背景知识,可以参考OpenReview的审稿流程
 
 
 
 
488
  {"role": "user", "content": """
489
+ 8. 做出如下总结:
490
+ - (1):这篇工作的意义如何?
491
+ - (2):从创新点、性能、工作量这三个维度,总结这篇文章的优点和缺点。
492
  .......
493
+ 按照后面的格式输出:
494
+ 8. Conclusion:
495
+ - (1):xxx;
496
+ - (2):创新点: xxx; 性能: xxx; 工作量: xxx;
497
+
498
+ 务必使用中文回答(专有名词需要用英文标注),语句尽量简洁且学术,不要和之前的<summary>内容重复,数值使用原文数字, 务必严格按照格式,将对应内容输出到xxx中,.......代表按照实际需求填写,如果没有可以不用写.
499
  """},
500
  ]
501
  )
 
503
  for choice in response.choices:
504
  result += choice.message.content
505
  print("conclusion_result:\n", result)
506
+ return result
507
+
508
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
509
  stop=tenacity.stop_after_attempt(5),
510
  reraise=True)
511
+ def chat_method(self, text, key):
512
+ openai.api_key = key
 
 
513
  response = openai.ChatCompletion.create(
514
  model="gpt-3.5-turbo",
515
  messages=[
516
+ {"role": "system", "content": "你是一个["+self.key_word+"]领域的科研人员,善于使用精炼的语句总结论文"}, # chatgpt 角色
517
+ {"role": "assistant", "content": "这是一篇英文文献的<summary>和<Method>部分内容,其中<summary>你已经总结好了,但是<Methods>部分,我需要你帮忙阅读并归纳下面问题:"+text}, # 背景知识
 
 
 
 
518
  {"role": "user", "content": """
519
+ 7. 详细描述这篇文章的方法思路。比如说它的步骤是:
520
  - (1):...
521
  - (2):...
522
  - (3):...
523
  - .......
524
+ 按照后面的格式输出:
525
+ 7. Methods:
526
+ - (1):xxx;
527
+ - (2):xxx;
528
+ - (3):xxx;
529
+ .......
530
+
531
+ 务必使用中文回答(专有名词需要用英文标注),语句尽量简洁且学术,不要和之前的<summary>内容重复,数值使用原文数字, 务必严格按照格式,将对应内容输出到xxx中,按照\n换行,.......代表按照实际需求填写,如果没有可以不用写.
532
  """},
533
  ]
534
  )
 
541
  @tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10),
542
  stop=tenacity.stop_after_attempt(5),
543
  reraise=True)
544
+ def chat_summary(self, text, key):
545
+ openai.api_key = key
 
 
 
546
  response = openai.ChatCompletion.create(
547
  model="gpt-3.5-turbo",
548
  messages=[
549
+ {"role": "system", "content": "你是一个["+self.key_word+"]领域的科研人员,善于使用精炼的语句总结论文"}, # chatgpt 角色
550
+ {"role": "assistant", "content": "这是一篇英文文献的标题,作者,链接,Abstract和Introduction部分内容,我需要你帮忙阅读并归纳下面问题:"+text}, # 背景知识
 
 
 
 
551
  {"role": "user", "content": """
552
+ 1. 标记出这篇文献的标题(加上中文翻译)
553
+ 2. 列举所有的作者姓名 (使用英文)
554
+ 3. 标记第一作者的单位(只输出中文翻译)
555
+ 4. 标记出这篇文章的关键词(使用英文)
556
+ 5. 论文链接,Github代码链接(如果有的话,没有的话请填写Github:None
557
+ 6. 按照下面四个点进行总结:
558
+ - (1):这篇文章的研究背景是什么?
559
+ - (2):过去的方法有哪些?它们存在什么问题?本文和过去的研究有哪些本质的区别?Is the approach well motivated?
560
+ - (3):本文提出的研究方法是什么?
561
+ - (4):本文方法在什么任务上,取得了什么性能?性能能否支持他们的目标?
562
+ 按照后面的格式输出:
563
+ 1. Title: xxx
564
+ 2. Authors: xxx
565
+ 3. Affiliation: xxx
566
+ 4. Keywords: xxx
567
+ 5. Urls: xxx or xxx , xxx
568
+ 6. Summary:
569
+ - (1):xxx;
570
+ - (2):xxx;
571
+ - (3):xxx;
572
+ - (4):xxx.
573
+
574
+ 务必使用中文回答(专有名词需要用英文标注),语句尽量简洁且学术,不要有太多重复的信息,数值使用原文数字, 务必严格按照格式,将对应内容输出到xxx中,按照\n换行.
575
  """},
576
  ]
577
  )
 
579
  for choice in response.choices:
580
  result += choice.message.content
581
  print("summary_result:\n", result)
582
+ return result
583
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  def export_to_markdown(self, text, file_name, mode='w'):
585
  # 使用markdown模块的convert方法,将文本转换为html格式
586
  # html = markdown.markdown(text)
 
595
  print(f"Query: {self.query}")
596
  print(f"Sort: {self.sort}")
597
 
598
+ def upload_pdf(key, text, file):
599
+ # 检查两个输入都不为空
600
+ if not key or not text or not file:
601
+ return "两个输入都不能为空,请输入字符并上传 PDF 文件!"
602
  # 判断PDF文件
603
  if file and file.name.split(".")[-1].lower() != "pdf":
604
  return '请勿上传非 PDF 文件!'
 
611
  return sum_info
612
 
613
  # 标题
614
+ title = "ChatPaper"
615
  # 描述
616
  description = '''<div align='center'>
617
 
618
+ Use ChatGPT to summary the papers.
619
 
620
+ Star our Github [ChatPaper](https://github.com/kaixindelele/ChatPaper)
621
 
622
  </div>
623
  '''
624
  # 创建Gradio界面
625
  ip = [
626
  gradio.inputs.Textbox(label="请输入你的API-key(必填)", default=""),
627
+ gradio.inputs.Textbox(label="请输入论文大标题索引(用英文逗号隔开,必填)", default="'Abstract,Introduction,Related Work,Background,Preliminary,Problem Formulation,Methods,Methodology,Method,Approach,Approaches,Materials and Methods,Experiment Settings,Experiment,Experimental Results,Evaluation,Experiments,Results,Findings,Data Analysis,Discussion,Results and Discussion,Conclusion,References'"),
628
  gradio.inputs.File(label="请上传论文PDF(必填)")
629
  ]
630