maxidl commited on
Commit
8d36e4b
1 Parent(s): 909f699
Files changed (1) hide show
  1. app.py +119 -11
app.py CHANGED
@@ -14,6 +14,7 @@ from surya.settings import settings as surya_settings
14
  import traceback
15
 
16
 
 
17
  configure_logging()
18
  MAX_PAGES = 20
19
  MIN_LENGTH=200
@@ -24,7 +25,7 @@ surya_settings.IN_STREAMLIT = True
24
  model_refs = load_all_models()
25
  metadata = {}
26
 
27
-
28
  model_name = "maxidl/arena-test"
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_name,
@@ -33,10 +34,111 @@ model = AutoModelForCausalLM.from_pretrained(
33
  )
34
  tokenizer = AutoTokenizer.from_pretrained(model_name)
35
 
36
-
37
- title = "# Placeholder Title"
38
- steps = """Placeholder Description"""
39
- # steps = """1. Converts uploaded pdf file to markdown. You can edit the intermediate markdown output.\n2. Generates a review for the paper"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  @spaces.GPU(duration=60)
42
  def convert_file(filepath):
@@ -69,19 +171,20 @@ def process_file(file):
69
 
70
  @spaces.GPU(duration=60)
71
  def generate(paper_text):
72
- messages = [
73
- {"role": "system", "content": "You are a pirate."},
74
- {"role": "user", "content": paper_text}
75
- ]
 
76
  input_ids = tokenizer.apply_chat_template(
77
  messages,
78
  add_generation_prompt=True,
79
  return_tensors='pt'
80
  ).to(model.device)
81
-
82
  generated_ids = model.generate(
83
  input_ids=input_ids,
84
- max_new_tokens=256
85
  )
86
  generated_ids = [
87
  output_ids[len(input_ids):] for input_ids, output_ids in zip(input_ids, generated_ids)
@@ -93,6 +196,11 @@ def generate(paper_text):
93
 
94
 
95
 
 
 
 
 
 
96
  with gr.Blocks() as demo:
97
  title = gr.Markdown(title)
98
  steps = gr.Markdown(steps)
 
14
  import traceback
15
 
16
 
17
+ # marker
18
  configure_logging()
19
  MAX_PAGES = 20
20
  MIN_LENGTH=200
 
25
  model_refs = load_all_models()
26
  metadata = {}
27
 
28
+ # prepare LLM
29
  model_name = "maxidl/arena-test"
30
  model = AutoModelForCausalLM.from_pretrained(
31
  model_name,
 
34
  )
35
  tokenizer = AutoTokenizer.from_pretrained(model_name)
36
 
37
+ # Define prompts
38
+ SYSTEM_PROMPT_TEMPLATE = """You are an expert reviewer for AI conferences. You follow best practices and review papers according to the reviewer guidelines.
39
+
40
+ Reviewer guidelines:
41
+ 1. Read the paper: It’s important to carefully read through the entire paper, and to look up any related work and citations that will help you comprehensively evaluate it. Be sure to give yourself sufficient time for this step.
42
+ 2. While reading, consider the following:
43
+ - Objective of the work: What is the goal of the paper? Is it to better address a known application or problem, draw attention to a new application or problem, or to introduce and/or explain a new theoretical finding? A combination of these? Different objectives will require different considerations as to potential value and impact.
44
+ - Strong points: is the submission clear, technically correct, experimentally rigorous, reproducible, does it present novel findings (e.g. theoretically, algorithmically, etc.)?
45
+ - Weak points: is it weak in any of the aspects listed in b.?
46
+ - Be mindful of potential biases and try to be open-minded about the value and interest a paper can hold for the community, even if it may not be very interesting for you.
47
+ 3. Answer four key questions for yourself, to make a recommendation to Accept or Reject:
48
+ - What is the specific question and/or problem tackled by the paper?
49
+ - Is the approach well motivated, including being well-placed in the literature?
50
+ - Does the paper support the claims? This includes determining if results, whether theoretical or empirical, are correct and if they are scientifically rigorous.
51
+ - What is the significance of the work? Does it contribute new knowledge and sufficient value to the community? Note, this does not necessarily require state-of-the-art results. Submissions bring value to the community when they convincingly demonstrate new, relevant, impactful knowledge (incl., empirical, theoretical, for practitioners, etc).
52
+ 4. Write your review including the following information:
53
+ - Summarize what the paper claims to contribute. Be positive and constructive.
54
+ - List strong and weak points of the paper. Be as comprehensive as possible.
55
+ - Clearly state your initial recommendation (accept or reject) with one or two key reasons for this choice.
56
+ - Provide supporting arguments for your recommendation.
57
+ - Ask questions you would like answered by the authors to help you clarify your understanding of the paper and provide the additional evidence you need to be confident in your assessment.
58
+ - Provide additional feedback with the aim to improve the paper. Make it clear that these points are here to help, and not necessarily part of your decision assessment.
59
+
60
+ Your write reviews in markdown format. Your reviews contain the following sections:
61
+
62
+ # Review
63
+
64
+ {review_fields}
65
+
66
+ Your response must only contain the review in markdown format with sections as defined above.
67
+ """
68
+
69
+ USER_PROMPT_TEMPLATE = """Review the following paper:
70
+
71
+ {paper_text}
72
+ """
73
+
74
+ # For now, use fixed review fields
75
+ REVIEW_FIELDS = """## Summary
76
+ Briefly summarize the paper and its contributions. This is not the place to critique the paper; the authors should generally agree with a well-written summary.
77
+
78
+ ## Soundness
79
+ Please assign the paper a numerical rating on the following scale to indicate the soundness of the technical claims, experimental and research methodology and on whether the central claims of the paper are adequately supported with evidence. Choose from the following:
80
+ 4: excellent
81
+ 3: good
82
+ 2: fair
83
+ 1: poor
84
+
85
+ ## Presentation
86
+ Please assign the paper a numerical rating on the following scale to indicate the quality of the presentation. This should take into account the writing style and clarity, as well as contextualization relative to prior work. Choose from the following:
87
+ 4: excellent
88
+ 3: good
89
+ 2: fair
90
+ 1: poor
91
+
92
+ ## Contribution
93
+ Please assign the paper a numerical rating on the following scale to indicate the quality of the overall contribution this paper makes to the research area being studied. Are the questions being asked important? Does the paper bring a significant originality of ideas and/or execution? Are the results valuable to share with the broader ICLR community? Choose from the following:
94
+ 4: excellent
95
+ 3: good
96
+ 2: fair
97
+ 1: poor
98
+
99
+ ## Strengths
100
+ A substantive assessment of the strengths of the paper, touching on each of the following dimensions: originality, quality, clarity, and significance. We encourage reviewers to be broad in their definitions of originality and significance. For example, originality may arise from a new definition or problem formulation, creative combinations of existing ideas, application to a new domain, or removing limitations from prior results.
101
+
102
+ ## Weaknesses
103
+ A substantive assessment of the weaknesses of the paper. Focus on constructive and actionable insights on how the work could improve towards its stated goals. Be specific, avoid generic remarks. For example, if you believe the contribution lacks novelty, provide references and an explanation as evidence; if you believe experiments are insufficient, explain why and exactly what is missing, etc.
104
+
105
+ ## Questions
106
+ Please list up and carefully describe any questions and suggestions for the authors. Think of the things where a response from the author can change your opinion, clarify a confusion or address a limitation. This is important for a productive rebuttal and discussion phase with the authors.
107
+
108
+ ## Flag For Ethics Review
109
+ If there are ethical issues with this paper, please flag the paper for an ethics review and select area of expertise that would be most useful for the ethics reviewer to have. Please select all that apply. Choose from the following:
110
+ No ethics review needed.
111
+ Yes, Discrimination / bias / fairness concerns
112
+ Yes, Privacy, security and safety
113
+ Yes, Legal compliance (e.g., GDPR, copyright, terms of use)
114
+ Yes, Potentially harmful insights, methodologies and applications
115
+ Yes, Responsible research practice (e.g., human subjects, data release)
116
+ Yes, Research integrity issues (e.g., plagiarism, dual submission)
117
+ Yes, Unprofessional behaviors (e.g., unprofessional exchange between authors and reviewers)
118
+ Yes, Other reasons (please specify below)
119
+
120
+ ## Details Of Ethics Concerns
121
+ Please provide details of your concerns.
122
+
123
+ ## Rating
124
+ Please provide an "overall score" for this submission. Choose from the following:
125
+ 1: strong reject
126
+ 3: reject, not good enough
127
+ 5: marginally below the acceptance threshold
128
+ 6: marginally above the acceptance threshold
129
+ 8: accept, good paper
130
+ 10: strong accept, should be highlighted at the conference
131
+
132
+
133
+ """
134
+
135
+ # functions
136
+ def create_messages(review_fields, paper_text):
137
+ messages = [
138
+ {"role": "system", "content": SYSTEM_PROMPT_TEMPLATE.format(review_fields=review_fields)},
139
+ {"role": "user", "content": USER_PROMPT_TEMPLATE.format(paper_text=paper_text)},
140
+ ]
141
+ return messages
142
 
143
  @spaces.GPU(duration=60)
144
  def convert_file(filepath):
 
171
 
172
  @spaces.GPU(duration=60)
173
  def generate(paper_text):
174
+ # messages = [
175
+ # {"role": "system", "content": "You are a pirate."},
176
+ # {"role": "user", "content": paper_text}
177
+ # ]
178
+ messages = create_messages(REVIEW_FIELDS, paper_text)
179
  input_ids = tokenizer.apply_chat_template(
180
  messages,
181
  add_generation_prompt=True,
182
  return_tensors='pt'
183
  ).to(model.device)
184
+ print(f"input_ids shape: {input_ids.shape}")
185
  generated_ids = model.generate(
186
  input_ids=input_ids,
187
+ max_new_tokens=512
188
  )
189
  generated_ids = [
190
  output_ids[len(input_ids):] for input_ids, output_ids in zip(input_ids, generated_ids)
 
196
 
197
 
198
 
199
+ # ui
200
+ title = "# Placeholder Title"
201
+ steps = """Placeholder Description"""
202
+ # steps = """1. Converts uploaded pdf file to markdown. You can edit the intermediate markdown output.\n2. Generates a review for the paper"""
203
+
204
  with gr.Blocks() as demo:
205
  title = gr.Markdown(title)
206
  steps = gr.Markdown(steps)