openreviewer commited on
Commit
38a86d9
β€’
1 Parent(s): a5df6bb

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +23 -20
  2. aws_utils.py +91 -30
app.py CHANGED
@@ -30,7 +30,7 @@ use_real_api = False
30
 
31
  # Function to generate a paper_id using SHA-512 hash
32
  def generate_paper_id(paper_content):
33
- return hashlib.sha512(paper_content).hexdigest()
34
 
35
  # Function to get user IP address
36
  def get_user_ip():
@@ -165,6 +165,9 @@ def setup_interface():
165
  }
166
  """
167
  with gr.Blocks(css=css) as demo:
 
 
 
168
  with gr.Tabs():
169
  with gr.TabItem("Reviewer Arena"):
170
  gr.Markdown("## Reviewer Arena")
@@ -188,13 +191,13 @@ def setup_interface():
188
 
189
  model_identity_message = gr.HTML("", visible=False)
190
 
191
- def handle_vote_interface(vote, model_identity_message_a, model_identity_message_b, paper_content):
192
- return handle_vote(vote, model_identity_message_a, model_identity_message_b, paper_content)
193
 
194
  submit_button.click(fn=review_papers, inputs=[file_input],
195
- outputs=[review1, review2, vote, vote_button, model_identity_message, model_identity_message])
196
 
197
- vote_button.click(fn=handle_vote_interface, inputs=[vote, model_identity_message, model_identity_message],
198
  outputs=[vote_message, vote, vote_button, another_paper_button])
199
 
200
  another_paper_button.click(fn=lambda: None, inputs=None, outputs=None, js="() => { location.reload(); }")
@@ -204,7 +207,7 @@ def setup_interface():
204
 
205
  # Fetch the leaderboard data from the database
206
  leaderboard_data = get_leaderboard()
207
- print(leaderboard_data)
208
 
209
  # Create the leaderboard HTML dynamically
210
  leaderboard_html = """
@@ -224,19 +227,19 @@ def setup_interface():
224
  <tbody>
225
  """
226
 
227
- # for rank, model in enumerate(leaderboard_data, start=1):
228
- # leaderboard_html += f"""
229
- # <tr style="border: 1px solid #444; padding: 12px;">
230
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{rank}</td>
231
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['ModelID']}</td>
232
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['EloScore']}</td>
233
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+3/-3</td> <!-- Adjust as needed -->
234
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['Votes']}</td>
235
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Organization</td> <!-- Add actual data if available -->
236
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">License</td> <!-- Add actual data if available -->
237
- # <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Knowledge Cutoff</td> <!-- Add actual data if available -->
238
- # </tr>
239
- # """
240
 
241
  leaderboard_html += """
242
  </tbody>
@@ -253,4 +256,4 @@ def setup_interface():
253
  if __name__ == "__main__":
254
  logging.basicConfig(level=logging.INFO)
255
  demo = setup_interface()
256
- demo.launch()
 
30
 
31
  # Function to generate a paper_id using SHA-512 hash
32
  def generate_paper_id(paper_content):
33
+ return hashlib.sha512(paper_content.encode('utf-8')).hexdigest()
34
 
35
  # Function to get user IP address
36
  def get_user_ip():
 
165
  }
166
  """
167
  with gr.Blocks(css=css) as demo:
168
+ paper_content_state = gr.State()
169
+ model_a_state = gr.State()
170
+ model_b_state = gr.State()
171
  with gr.Tabs():
172
  with gr.TabItem("Reviewer Arena"):
173
  gr.Markdown("## Reviewer Arena")
 
191
 
192
  model_identity_message = gr.HTML("", visible=False)
193
 
194
+ def handle_vote_interface(vote, model_a, model_b, paper_content):
195
+ return handle_vote(vote, model_a, model_b, paper_content)
196
 
197
  submit_button.click(fn=review_papers, inputs=[file_input],
198
+ outputs=[review1, review2, vote, vote_button, model_a_state, model_b_state, paper_content_state])
199
 
200
+ vote_button.click(fn=handle_vote_interface, inputs=[vote, model_a_state, model_b_state, paper_content_state],
201
  outputs=[vote_message, vote, vote_button, another_paper_button])
202
 
203
  another_paper_button.click(fn=lambda: None, inputs=None, outputs=None, js="() => { location.reload(); }")
 
207
 
208
  # Fetch the leaderboard data from the database
209
  leaderboard_data = get_leaderboard()
210
+ # print(leaderboard_data)
211
 
212
  # Create the leaderboard HTML dynamically
213
  leaderboard_html = """
 
227
  <tbody>
228
  """
229
 
230
+ for rank, model in enumerate(leaderboard_data, start=1):
231
+ leaderboard_html += f"""
232
+ <tr style="border: 1px solid #444; padding: 12px;">
233
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{rank}</td>
234
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['ModelID']}</td>
235
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['EloScore']}</td>
236
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['CI_Lower']} - {model['CI_Upper']}</td>
237
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">{model['Votes']}</td>
238
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Organization</td>
239
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">License</td>
240
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Knowledge Cutoff</td>
241
+ </tr>
242
+ """
243
 
244
  leaderboard_html += """
245
  </tbody>
 
256
  if __name__ == "__main__":
257
  logging.basicConfig(level=logging.INFO)
258
  demo = setup_interface()
259
+ demo.launch()
aws_utils.py CHANGED
@@ -2,6 +2,7 @@ import boto3
2
  import uuid
3
  import datetime
4
  import os
 
5
  from dotenv import load_dotenv
6
 
7
  try:
@@ -27,7 +28,7 @@ leaderboards_table = dynamodb.Table('reviewer_arena_leaderboard')
27
  # Function to write a request to the Requests table
28
  def write_request(user_id, paper_id, model_a, model_b, vote):
29
  request_id = str(uuid.uuid4())
30
- timestamp = datetime.datetime.now().isoformat()
31
 
32
  response = requests_table.put_item(
33
  Item={
@@ -44,55 +45,115 @@ def write_request(user_id, paper_id, model_a, model_b, vote):
44
 
45
  # Function to update leaderboard after a vote
46
  def update_leaderboard(model_a, model_b, vote):
 
 
 
 
 
 
 
 
 
47
  # Retrieve current stats for ModelA and ModelB
48
  model_a_stats = leaderboards_table.get_item(Key={'ModelID': model_a}).get('Item', {})
49
  model_b_stats = leaderboards_table.get_item(Key={'ModelID': model_b}).get('Item', {})
50
 
51
  # Initialize stats if they don't exist
52
  if not model_a_stats:
53
- model_a_stats = {'ModelID': model_a, 'Wins': 0, 'Losses': 0, 'Ties': 0, 'EloScore': 1200, 'Votes': 0}
 
54
  if not model_b_stats:
55
- model_b_stats = {'ModelID': model_b, 'Wins': 0, 'Losses': 0, 'Ties': 0, 'EloScore': 1200, 'Votes': 0}
 
56
 
57
  # Update stats based on the vote
58
- if vote == "A is better":
59
- model_a_stats['Wins'] += 1
60
- model_b_stats['Losses'] += 1
61
- elif vote == "B is better":
62
- model_a_stats['Losses'] += 1
63
- model_b_stats['Wins'] += 1
64
- elif vote == "Tie":
65
- model_a_stats['Ties'] += 1
66
- model_b_stats['Ties'] += 1
67
- model_a_stats['Votes'] += 1
68
- model_b_stats['Votes'] += 1
 
 
 
69
 
70
- # Calculate new Elo scores (simple Elo calculation for illustration)
71
- model_a_stats['EloScore'], model_b_stats['EloScore'] = calculate_elo(model_a_stats['EloScore'], model_b_stats['EloScore'], vote)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- # Write updated stats back to the Leaderboards table
74
- leaderboards_table.put_item(Item=model_a_stats)
75
- leaderboards_table.put_item(Item=model_b_stats)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  # Function to calculate new Elo scores
78
  def calculate_elo(elo_a, elo_b, vote, k=32):
79
- expected_a = 1 / (1 + 10 ** ((elo_b - elo_a) / 400))
80
- expected_b = 1 / (1 + 10 ** ((elo_a - elo_b) / 400))
 
 
 
 
81
 
82
  if vote == "A is better":
83
- actual_a = 1
84
- actual_b = 0
85
  elif vote == "B is better":
86
- actual_a = 0
87
- actual_b = 1
88
  else: # Tie
89
- actual_a = 0.5
90
- actual_b = 0.5
91
 
92
- new_elo_a = elo_a + k * (actual_a - expected_a)
93
- new_elo_b = elo_b + k * (actual_b - expected_b)
94
 
95
- return round(new_elo_a), round(new_elo_b)
 
 
 
 
 
 
 
 
 
96
 
97
  # Function to query leaderboard
98
  def get_leaderboard():
 
2
  import uuid
3
  import datetime
4
  import os
5
+ from decimal import Decimal, getcontext
6
  from dotenv import load_dotenv
7
 
8
  try:
 
28
  # Function to write a request to the Requests table
29
  def write_request(user_id, paper_id, model_a, model_b, vote):
30
  request_id = str(uuid.uuid4())
31
+ timestamp = str(Decimal(datetime.datetime.now().timestamp()))
32
 
33
  response = requests_table.put_item(
34
  Item={
 
45
 
46
  # Function to update leaderboard after a vote
47
  def update_leaderboard(model_a, model_b, vote):
48
+ # Map vote options to simpler keys
49
+ vote_mapping = {
50
+ "πŸ‘ A is better": "A is better",
51
+ "πŸ‘ B is better": "B is better",
52
+ "πŸ‘” Tie": "Tie",
53
+ "πŸ‘Ž Both are bad": "Tie" # Assuming "Both are bad" is treated as a tie
54
+ }
55
+ vote = vote_mapping.get(vote, "Tie") # Default to "Tie" if vote is not found
56
+
57
  # Retrieve current stats for ModelA and ModelB
58
  model_a_stats = leaderboards_table.get_item(Key={'ModelID': model_a}).get('Item', {})
59
  model_b_stats = leaderboards_table.get_item(Key={'ModelID': model_b}).get('Item', {})
60
 
61
  # Initialize stats if they don't exist
62
  if not model_a_stats:
63
+ model_a_stats = {'ModelID': model_a, 'Wins': 0, 'Losses': 0, 'Ties': 0, 'EloScore': Decimal(1200), 'Votes': 0}
64
+ leaderboards_table.put_item(Item=model_a_stats)
65
  if not model_b_stats:
66
+ model_b_stats = {'ModelID': model_b, 'Wins': 0, 'Losses': 0, 'Ties': 0, 'EloScore': Decimal(1200), 'Votes': 0}
67
+ leaderboards_table.put_item(Item=model_b_stats)
68
 
69
  # Update stats based on the vote
70
+ update_expressions = {
71
+ "A is better": {
72
+ "model_a": "SET Wins = Wins + :inc, Votes = Votes + :inc",
73
+ "model_b": "SET Losses = Losses + :inc, Votes = Votes + :inc"
74
+ },
75
+ "B is better": {
76
+ "model_a": "SET Losses = Losses + :inc, Votes = Votes + :inc",
77
+ "model_b": "SET Wins = Wins + :inc, Votes = Votes + :inc"
78
+ },
79
+ "Tie": {
80
+ "model_a": "SET Ties = Ties + :inc, Votes = Votes + :inc",
81
+ "model_b": "SET Ties = Ties + :inc, Votes = Votes + :inc"
82
+ }
83
+ }
84
 
85
+ expression_a = update_expressions[vote]["model_a"]
86
+ expression_b = update_expressions[vote]["model_b"]
87
+
88
+ # Update ModelA stats
89
+ leaderboards_table.update_item(
90
+ Key={'ModelID': model_a},
91
+ UpdateExpression=expression_a,
92
+ ExpressionAttributeValues={':inc': 1}
93
+ )
94
+
95
+ # Update ModelB stats
96
+ leaderboards_table.update_item(
97
+ Key={'ModelID': model_b},
98
+ UpdateExpression=expression_b,
99
+ ExpressionAttributeValues={':inc': 1}
100
+ )
101
 
102
+ # Calculate new Elo scores (simple Elo calculation for illustration)
103
+ new_elo_a, new_elo_b = calculate_elo(model_a_stats['EloScore'], model_b_stats['EloScore'], vote)
104
+
105
+ # Calculate 95% CI for new Elo scores
106
+ ci_a_lower, ci_a_upper = calculate_95_ci(new_elo_a, model_a_stats['Votes'] + 1)
107
+ ci_b_lower, ci_b_upper = calculate_95_ci(new_elo_b, model_b_stats['Votes'] + 1)
108
+
109
+ # Update Elo scores and 95% CI
110
+ leaderboards_table.update_item(
111
+ Key={'ModelID': model_a},
112
+ UpdateExpression="SET EloScore = :new_elo, CI_Lower = :ci_lower, CI_Upper = :ci_upper",
113
+ ExpressionAttributeValues={':new_elo': Decimal(new_elo_a), ':ci_lower': Decimal(ci_a_lower), ':ci_upper': Decimal(ci_a_upper)}
114
+ )
115
+
116
+ leaderboards_table.update_item(
117
+ Key={'ModelID': model_b},
118
+ UpdateExpression="SET EloScore = :new_elo, CI_Lower = :ci_lower, CI_Upper = :ci_upper",
119
+ ExpressionAttributeValues={':new_elo': Decimal(new_elo_b), ':ci_lower': Decimal(ci_b_lower), ':ci_upper': Decimal(ci_b_upper)}
120
+ )
121
+
122
+ # Set the precision for Decimal
123
+ getcontext().prec = 28
124
 
125
  # Function to calculate new Elo scores
126
  def calculate_elo(elo_a, elo_b, vote, k=32):
127
+ # Ensure elo_a and elo_b are Decimals
128
+ elo_a = Decimal(elo_a)
129
+ elo_b = Decimal(elo_b)
130
+
131
+ expected_a = 1 / (1 + Decimal(10) ** ((elo_b - elo_a) / Decimal(400)))
132
+ expected_b = 1 / (1 + Decimal(10) ** ((elo_a - elo_b) / Decimal(400)))
133
 
134
  if vote == "A is better":
135
+ actual_a = Decimal(1)
136
+ actual_b = Decimal(0)
137
  elif vote == "B is better":
138
+ actual_a = Decimal(0)
139
+ actual_b = Decimal(1)
140
  else: # Tie
141
+ actual_a = Decimal(0.5)
142
+ actual_b = Decimal(0.5)
143
 
144
+ new_elo_a = elo_a + Decimal(k) * (actual_a - expected_a)
145
+ new_elo_b = elo_b + Decimal(k) * (actual_b - expected_b)
146
 
147
+ return round(new_elo_a, 2), round(new_elo_b, 2)
148
+
149
+ # Function to calculate 95% CI for Elo scores
150
+ def calculate_95_ci(elo, votes, z=1.96):
151
+ if votes == 0:
152
+ return Decimal(0), Decimal(0)
153
+ elo = Decimal(elo) # Ensure elo is a Decimal
154
+ std_error = Decimal(400) / (Decimal(votes).sqrt())
155
+ margin = Decimal(z) * std_error
156
+ return round(elo - margin, 2), round(elo + margin, 2)
157
 
158
  # Function to query leaderboard
159
  def get_leaderboard():