|
import numpy as np |
|
import os |
|
import re |
|
import jieba |
|
from io import BytesIO |
|
import datetime |
|
import time |
|
import openai, tenacity |
|
import argparse |
|
import configparser |
|
import json |
|
import tiktoken |
|
import PyPDF2 |
|
import gradio |
|
|
|
|
|
def contains_chinese(text): |
|
for ch in text: |
|
if u'\u4e00' <= ch <= u'\u9fff': |
|
return True |
|
return False |
|
|
|
def insert_sentence(text, sentence, interval): |
|
lines = text.split('\n') |
|
new_lines = [] |
|
|
|
for line in lines: |
|
if contains_chinese(line): |
|
words = list(jieba.cut(line)) |
|
separator = '' |
|
else: |
|
words = line.split() |
|
separator = ' ' |
|
|
|
new_words = [] |
|
count = 0 |
|
|
|
for word in words: |
|
new_words.append(word) |
|
count += 1 |
|
|
|
if count % interval == 0: |
|
new_words.append(sentence) |
|
|
|
new_lines.append(separator.join(new_words)) |
|
|
|
return '\n'.join(new_lines) |
|
|
|
|
|
class Reviewer: |
|
|
|
def __init__(self, api, api_base, review_format, paper_pdf, language): |
|
self.api = api |
|
self.review_format = review_format |
|
self.api_base = api_base |
|
self.language = language |
|
self.paper_pdf = paper_pdf |
|
self.max_token_num = 12000 |
|
self.encoding = tiktoken.get_encoding("gpt2") |
|
|
|
|
|
def review_by_chatgpt(self, paper_list): |
|
text = self.extract_chapter(self.paper_pdf) |
|
chat_review_text, total_token_used = self.chat_review(text=text) |
|
return chat_review_text, total_token_used |
|
|
|
|
|
|
|
@tenacity.retry(wait=tenacity.wait_exponential(multiplier=1, min=4, max=10), |
|
stop=tenacity.stop_after_attempt(5), |
|
reraise=True) |
|
def chat_review(self, text): |
|
openai.api_key = self.api |
|
openai.api_base = self.api_base |
|
review_prompt_token = 1000 |
|
try: |
|
text_token = len(self.encoding.encode(text)) |
|
except: |
|
text_token = 13000 |
|
input_text_index = int(len(text)*(self.max_token_num-review_prompt_token)/(text_token+1)) |
|
input_text = "This is the paper for your review:" + text[:input_text_index] |
|
messages=[ |
|
{"role": "system", "content": "You are a professional reviewer. Now I will give you a paper. You need to give a complete review opinion according to the following requirements and format:"+ self.review_format + "Be sure to use {} answers".format(self.language)} , |
|
{"role": "user", "content": input_text + " Translate the output into {}.".format(self.language)}, |
|
] |
|
try: |
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo-16k", |
|
messages=messages, |
|
temperature=0.5 |
|
) |
|
result = '' |
|
for choice in response.choices: |
|
result += choice.message.content |
|
result = insert_sentence(result, '**Generated by ChatGPT, no copying allowed!**', 50) |
|
result += "\n\n⚠伦理声明/Ethics statement:\n--禁止直接复制生成的评论用于任何论文审稿工作!\n--Direct copying of generated comments for any paper review work is prohibited!" |
|
usage = response.usage.total_tokens |
|
except Exception as e: |
|
|
|
result = "⚠:非常抱歉>_<,生了一个错误:"+ str(e) |
|
usage = 'xxxxx' |
|
print("********"*10) |
|
print(result) |
|
print("********"*10) |
|
return result, usage |
|
|
|
|
|
|
|
|
|
|
|
def extract_chapter(self, pdf_path): |
|
file_object = BytesIO(pdf_path) |
|
pdf_reader = PyPDF2.PdfReader(file_object) |
|
|
|
num_pages = len(pdf_reader.pages) |
|
|
|
extraction_started = False |
|
extracted_text = "" |
|
|
|
for page_number in range(num_pages): |
|
page = pdf_reader.pages[page_number] |
|
page_text = page.extract_text() |
|
|
|
|
|
extraction_started = True |
|
page_number_start = page_number |
|
|
|
if extraction_started: |
|
extracted_text += page_text |
|
|
|
if page_number_start + 1 < page_number: |
|
break |
|
return extracted_text |
|
|
|
def main(api,api_base, review_format, paper_pdf, language): |
|
start_time = time.time() |
|
comments = '' |
|
output2 = '' |
|
if not api or not review_format or not paper_pdf: |
|
comments = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" |
|
output2 = "⚠:API-key或审稿要求或论文pdf未输入!请检测!" |
|
|
|
else: |
|
|
|
reviewer1 = Reviewer(api,api_base, review_format, paper_pdf, language) |
|
|
|
comments, total_token_used = reviewer1.review_by_chatgpt(paper_list=paper_pdf) |
|
time_used = time.time() - start_time |
|
output2 ="使用token数:"+ str(total_token_used)+"\n花费时间:"+ str(round(time_used, 2)) +"秒" |
|
return comments, output2 |
|
|
|
|
|
|
|
|
|
|
|
title = "🤖ChatReviewer🤖" |
|
|
|
|
|
description = '''<div align='left'> |
|
<img align='right' src='http://i.imgtg.com/2023/03/22/94PLN.png' width="220"> |
|
<strong>ChatReviewer是一款基于ChatGPT-3.5的API开发的智能论文分析与建议助手。(本系统不会获取上传的pdf内容)</strong>其用途如下: |
|
|
|
⭐️对论文的优缺点进行快速总结和分析,提高科研人员的文献阅读和理解的效率,紧跟研究前沿。 |
|
|
|
⭐️对自己的论文进行分析,根据ChatReviewer生成的改进建议进行查漏补缺,进一步提高自己的论文质量。 |
|
|
|
如果觉得很卡,可以点击右上角的Duplicate this Space,把ChatReviewer复制到你自己的Space中!(🈲:禁止直接复制生成的评论用于任何论文审稿工作!) |
|
|
|
本项目的[Github](https://github.com/nishiwen1214/ChatReviewer),欢迎Star和Fork,也欢迎大佬赞助让本项目快速成长!💗 |
|
|
|
|
|
|
|
</div> |
|
''' |
|
|
|
|
|
inp = [gradio.inputs.Textbox(label="请输入你的API-key(sk开头的字符串)", |
|
default="", |
|
type='password'), |
|
gradio.inputs.Textbox(label="请输入第三方中转网址(以/v1结尾,使用原始OpenAI的API请跳过这里)", |
|
default="https://api.openai.com/v1"), |
|
gradio.inputs.Textbox(lines=5, |
|
label="请输入特定的分析要求和格式(否则为默认格式)", |
|
default="""* Overall Review |
|
Please briefly summarize the main points and contributions of this paper. |
|
xxx |
|
* Paper Strength |
|
Please provide a list of the strengths of this paper, including but not limited to: innovative and practical methodology, insightful empirical findings or in-depth theoretical analysis, |
|
well-structured review of relevant literature, and any other factors that may make the paper valuable to readers. (Maximum length: 2,000 characters) |
|
(1) xxx |
|
(2) xxx |
|
(3) xxx |
|
* Paper Weakness |
|
Please provide a numbered list of your main concerns regarding this paper (so authors could respond to the concerns individually). |
|
These may include, but are not limited to: inadequate implementation details for reproducing the study, limited evaluation and ablation studies for the proposed method, |
|
correctness of the theoretical analysis or experimental results, lack of comparisons or discussions with widely-known baselines in the field, lack of clarity in exposition, |
|
or any other factors that may impede the reader's understanding or benefit from the paper. Please kindly refrain from providing a general assessment of the paper's novelty without providing detailed explanations. (Maximum length: 2,000 characters) |
|
(1) xxx |
|
(2) xxx |
|
(3) xxx |
|
* Questions To Authors And Suggestions For Rebuttal |
|
Please provide a numbered list of specific and clear questions that pertain to the details of the proposed method, evaluation setting, or additional results that would aid in supporting the authors' claims. |
|
The questions should be formulated in a manner that, after the authors have answered them during the rebuttal, it would enable a more thorough assessment of the paper's quality. (Maximum length: 2,000 characters) |
|
*Overall score (1-10) |
|
The paper is scored on a scale of 1-10, with 10 being the full mark, and 6 stands for borderline accept. Then give the reason for your rating. |
|
xxx""" |
|
), |
|
gradio.inputs.File(label="请上传论文PDF文件(请务必等pdf上传完成后再点击Submit!)",type="bytes"), |
|
gradio.inputs.Radio(choices=["English", "Chinese", "French", "German","Japenese"], |
|
default="English", |
|
label="选择输出语言"), |
|
] |
|
|
|
chat_reviewer_gui = gradio.Interface(fn=main, |
|
inputs=inp, |
|
outputs = [gradio.Textbox(lines=25, label="分析结果"), gradio.Textbox(lines=2, label="资源统计")], |
|
title=title, |
|
description=description) |
|
|
|
|
|
chat_reviewer_gui .launch(quiet=True, show_api=False) |