openreviewer commited on
Commit
25f01d1
1 Parent(s): 0bf9463

Upload folder using huggingface_hub

Browse files
Files changed (11) hide show
  1. .gitattributes +16 -16
  2. .github/workflows/deploy.yml +53 -53
  3. .gitignore +2 -2
  4. app.py +254 -168
  5. file_utils.py +2 -2
  6. iclr2024/question11.txt +6 -6
  7. iclr2024/systemrole.txt +10 -10
  8. logging_config.py +8 -8
  9. models.py +158 -158
  10. requirements.txt +108 -108
  11. utils.py +45 -49
.gitattributes CHANGED
@@ -1,16 +1,16 @@
1
- # HIDE ALL OF THE FILES IN THE DIRECTORY
2
- *.py
3
- *.log
4
- *.md
5
- *.txt
6
- iclr2024/**
7
- *.github/**
8
- *.gitignore
9
- *.gitattributes
10
- *.git/**
11
- *.__pycache__/**
12
-
13
-
14
-
15
-
16
-
 
1
+ # HIDE ALL OF THE FILES IN THE DIRECTORY
2
+ *.py
3
+ *.log
4
+ *.md
5
+ *.txt
6
+ iclr2024/**
7
+ *.github/**
8
+ *.gitignore
9
+ *.gitattributes
10
+ *.git/**
11
+ *.__pycache__/**
12
+
13
+
14
+
15
+
16
+
.github/workflows/deploy.yml CHANGED
@@ -1,54 +1,54 @@
1
- name: Deploy Gradio App
2
-
3
- on:
4
- push:
5
- branches:
6
- - main
7
-
8
- jobs:
9
- deploy:
10
- runs-on: ubuntu-latest
11
-
12
- steps:
13
- - name: Checkout code
14
- uses: actions/checkout@v3
15
-
16
- - name: Set up Python
17
- uses: actions/setup-python@v4
18
- with:
19
- python-version: '3.12.3' # Specify the Python version you are using
20
-
21
- - name: Install dependencies
22
- run: |
23
- python -m pip install --upgrade pip
24
- pip install -r requirements.txt # Ensure you have a requirements.txt file
25
-
26
- - name: Login to Hugging Face
27
- env:
28
- HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
29
- run: |
30
- huggingface-cli login --token $HUGGINGFACE_TOKEN
31
-
32
- - name: Deploy Gradio App
33
- env:
34
- HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
35
- run: gradio deploy
36
- # - name: Upload to Hugging Face Spaces
37
- # env:
38
- # HF_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
39
- # run: |
40
- # git lfs install
41
- # huggingface-cli lfs-enable-largefiles .
42
- # huggingface-cli repo create reviewerarena/reviewer-arena --type=space
43
- # huggingface-cli repo upload reviewerarena/reviewer-arena . --all-yes
44
- # - name: Login to Hugging Face
45
- # env:
46
- # HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
47
- # run: |
48
- # echo "$HUGGINGFACE_TOKEN" | huggingface-cli login --token
49
-
50
- # - name: Deploy Gradio App
51
- # env:
52
- # HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
53
- # run: |
54
  # gradio deploy --token $HUGGINGFACE_TOKEN
 
1
+ name: Deploy Gradio App
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout code
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v4
18
+ with:
19
+ python-version: '3.12.3' # Specify the Python version you are using
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt # Ensure you have a requirements.txt file
25
+
26
+ - name: Login to Hugging Face
27
+ env:
28
+ HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
29
+ run: |
30
+ huggingface-cli login --token $HUGGINGFACE_TOKEN
31
+
32
+ - name: Deploy Gradio App
33
+ env:
34
+ HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
35
+ run: gradio deploy
36
+ # - name: Upload to Hugging Face Spaces
37
+ # env:
38
+ # HF_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
39
+ # run: |
40
+ # git lfs install
41
+ # huggingface-cli lfs-enable-largefiles .
42
+ # huggingface-cli repo create reviewerarena/reviewer-arena --type=space
43
+ # huggingface-cli repo upload reviewerarena/reviewer-arena . --all-yes
44
+ # - name: Login to Hugging Face
45
+ # env:
46
+ # HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
47
+ # run: |
48
+ # echo "$HUGGINGFACE_TOKEN" | huggingface-cli login --token
49
+
50
+ # - name: Deploy Gradio App
51
+ # env:
52
+ # HUGGINGFACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
53
+ # run: |
54
  # gradio deploy --token $HUGGINGFACE_TOKEN
.gitignore CHANGED
@@ -1,3 +1,3 @@
1
- my-venv/
2
- old/
3
  arena.log
 
1
+ my-venv/
2
+ old/
3
  arena.log
app.py CHANGED
@@ -1,168 +1,254 @@
1
- import gradio as gr
2
- from utils import process_paper
3
- import os
4
- import logging
5
- import html
6
- from logging_config import setup_logging
7
-
8
-
9
- setup_logging() # Ensure logging is initialized
10
- # Define global variables for directories and API keys
11
- paper_dir = 'path_to_temp_storage'
12
- prompt_dir = 'iclr2024'
13
- api_keys = {
14
- 'openai_api_key': os.environ.get('openai_api_key'),
15
- 'claude_api_key': os.environ.get('anthropic_api_key'),
16
- 'gemini_api_key': os.environ.get('google_api_key'),
17
- 'commandr_api_key': os.environ.get('cohere_api_key')
18
- }
19
-
20
- # Configure whether to use real API or not
21
- use_real_api = False # Set this to True to use real APIs, False to use dummy data
22
-
23
- def review_papers(pdf_file):
24
- logging.info(f"Received file type: {type(pdf_file)}")
25
- if use_real_api:
26
- reviews = process_paper(pdf_file, paper_dir, prompt_dir, api_keys)
27
- processed_reviews = []
28
- for review in reviews:
29
- processed_review = {}
30
- for section in review:
31
- if ':' in section: # Ensure there is a colon to split on
32
- key, value = section.split(':', 1) # Split on the first colon only
33
- # Replace newline characters with <br> for HTML line breaks
34
- processed_value = value.strip().replace('\n', '<br>')
35
- processed_review[key.strip()] = html.escape(processed_value) # Ensure HTML escaping
36
- processed_reviews.append(processed_review)
37
- reviews = processed_reviews
38
- else:
39
- # Dummy reviews for testing with structured sections
40
- reviews = [
41
- {
42
- "Summary": "This is a placeholder review for Model 1. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
43
- "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
44
- "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
45
- "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
46
- "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
47
- "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
48
- "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
49
- "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
50
- "Overall Score": "3/5",
51
- "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
52
- "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
53
- },
54
- {
55
- "Summary": "This is a placeholder review for Model 2. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
56
- "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
57
- "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
58
- "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
59
- "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
60
- "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
61
- "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
62
- "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
63
- "Overall Score": "3/5",
64
- "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
65
- "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
66
- }
67
- ]
68
- processed_reviews = []
69
- for review in reviews:
70
- processed_review = {}
71
- for key, value in review.items():
72
- # Replace newline characters with <br> for HTML line breaks and escape HTML
73
- processed_value = value.strip().replace('\n', '<br>')
74
- processed_review[key.strip()] = html.escape(processed_value) # Ensure HTML escaping
75
- processed_reviews.append(processed_review)
76
- reviews = processed_reviews
77
-
78
- review_texts = []
79
- for review in reviews:
80
- formatted_review = "<div class='review-container'>"
81
- for section, content in review.items():
82
- formatted_review += f"<div class='review-section'><strong>{section}:</strong> <span>{html.unescape(content)}</span></div>"
83
- formatted_review += "</div>"
84
- review_texts.append(formatted_review)
85
- logging.debug(f"Final formatted reviews: {review_texts}")
86
- return review_texts
87
-
88
- def setup_interface():
89
- logging.debug("Setting up Gradio interface.")
90
- css = """
91
- .review-container {
92
- padding: 10px;
93
- margin-bottom: 20px;
94
- border: 1px solid #ccc;
95
- background-color: #f9f9f9;
96
- }
97
- .review-section {
98
- margin-bottom: 12px;
99
- padding: 8px;
100
- background-color: #ffffff;
101
- border-left: 4px solid #007BFF;
102
- padding-left: 10px;
103
- }
104
- .review-section strong {
105
- color: #333;
106
- font-weight: bold;
107
- display: block;
108
- margin-bottom: 5px;
109
- }
110
- .review-section span, .gr-markdown {
111
- color: #000;
112
- font-size: 14px;
113
- line-height: 1.5;
114
- display: block;
115
- white-space: normal;
116
- opacity: 1;
117
- }
118
- .model-label {
119
- font-size: 18px;
120
- font-weight: bold;
121
- color: #007BFF;
122
- margin-bottom: 10px;
123
- }
124
- .gr-file, .gr-button, .gr-radio {
125
- width: 300px;
126
- margin: auto;
127
- }
128
- """
129
- with gr.Blocks(css=css) as demo:
130
- gr.Markdown("## Reviewer Arena")
131
- gr.Markdown("Upload an academic paper to get reviews from two randomly selected LLMs.")
132
- with gr.Row():
133
- file_input = gr.File(label="Upload Academic Paper")
134
- submit_button = gr.Button("Submit!!")
135
- with gr.Row():
136
- with gr.Column():
137
- gr.HTML("<div class='model-label'>Model A</div>")
138
- review1 = gr.Markdown()
139
- with gr.Column():
140
- gr.HTML("<div class='model-label'>Model B</div>")
141
- review2 = gr.Markdown()
142
-
143
- # Voting options
144
- vote_options = ["👍 A is better", "👍 B is better", "👔 Tie", "👎 Both are bad"]
145
- vote = gr.Radio(label="Vote on the best model", choices=vote_options, value="Tie")
146
- vote_button = gr.Button("Submit Vote")
147
-
148
- def handle_vote(vote):
149
- print(f"Vote received: {vote}")
150
- return f"Vote for '{vote}' received!"
151
-
152
- vote_button.click(fn=handle_vote, inputs=vote, outputs=gr.Textbox(visible=False))
153
-
154
- submit_button.click(
155
- fn=review_papers,
156
- inputs=[file_input],
157
- outputs=[review1, review2]
158
- )
159
- logging.debug("Gradio interface setup complete.")
160
- return demo
161
-
162
- if __name__ == "__main__":
163
- logging.basicConfig(level=logging.INFO)
164
- demo = setup_interface()
165
- # BLOCK PATHS OF ALL THE FILES AND LAUNCH THE APP
166
-
167
- # demo.launch(auth=(os.environ.get('login_username'), os.environ.get('login_password')), share=True)
168
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from utils import process_paper
3
+ import os
4
+ import logging
5
+ import html
6
+ from logging_config import setup_logging
7
+
8
+ setup_logging()
9
+ paper_dir = 'path_to_temp_storage'
10
+ prompt_dir = 'iclr2024'
11
+ api_keys = {
12
+ 'openai_api_key': os.environ.get('openai_api_key'),
13
+ 'claude_api_key': os.environ.get('anthropic_api_key'),
14
+ 'gemini_api_key': os.environ.get('google_api_key'),
15
+ 'commandr_api_key': os.environ.get('cohere_api_key')
16
+ }
17
+
18
+ use_real_api = False
19
+
20
+
21
+ def review_papers(pdf_file):
22
+ logging.info(f"Received file type: {type(pdf_file)}")
23
+ if use_real_api:
24
+ reviews, selected_models = process_paper(
25
+ pdf_file, paper_dir, prompt_dir, api_keys)
26
+ processed_reviews = []
27
+ for review in reviews:
28
+ processed_review = {}
29
+ for section in review:
30
+ if ':' in section:
31
+ key, value = section.split(':', 1)
32
+ processed_value = value.strip().replace('\n', '<br>')
33
+ processed_review[key.strip()] = html.escape(
34
+ processed_value)
35
+ processed_reviews.append(processed_review)
36
+ reviews = processed_reviews
37
+ else:
38
+ reviews = [
39
+ {
40
+ "Summary": "This is a placeholder review for Model 1. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
41
+ "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
42
+ "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
43
+ "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
44
+ "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
45
+ "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
46
+ "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
47
+ "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
48
+ "Overall Score": "3/5",
49
+ "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
50
+ "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
51
+ },
52
+ {
53
+ "Summary": "This is a placeholder review for Model 2. The paper explores advanced methodologies in reinforcement learning applied to autonomous driving systems, proposing significant enhancements to decision-making algorithms that could improve safety and operational efficiency. The authors provide a detailed analysis of the current limitations of existing systems and suggest innovative solutions that could transform the field.",
54
+ "Soundness": "The assumptions underlying the proposed enhancements are occasionally not fully justified, particularly concerning the scalability of the algorithms under varied and unpredictable traffic conditions. A more rigorous examination of these assumptions is necessary to solidify the paper's foundation.",
55
+ "Presentation": "While the paper is structured adequately, some sections delve into technical details that are not sufficiently elucidated for a broader audience. This could potentially limit the paper's impact and accessibility, making it challenging for non-specialists to fully grasp the implications of the research.",
56
+ "Contribution": "The paper makes a moderate contribution to the existing body of knowledge, offering incremental improvements over current methodologies rather than a completely novel approach. However, these improvements are significant and could lead to better practical implementations in the field of autonomous driving.",
57
+ "Strengths": "The initial results presented in the paper are promising, showing potential for the proposed methods. The inclusion of real-world data in the preliminary experiments adds a layer of credibility and relevance to the results, showcasing the practical applicability of the research.",
58
+ "Weaknesses": "The paper lacks detailed exposition on the methodology, particularly in how the algorithms adapt to unexpected or novel scenarios. This is a critical area that requires further development and testing to ensure the robustness and reliability of the proposed solutions.",
59
+ "Questions/Suggestions": "The statistical analysis section could be enhanced by incorporating more robust statistical techniques and a wider array of metrics. Additionally, conducting tests in a variety of driving environments could help in substantiating the claims made and strengthen the overall findings of the research.",
60
+ "Ethics Review": "The research complies with all ethical standards, addressing potential ethical issues related to autonomous driving comprehensively. Issues such as privacy concerns, decision-making in critical situations, and the overall impact on societal norms are discussed and handled with the utmost care.",
61
+ "Overall Score": "3/5",
62
+ "Confidence": "Confidence in the findings is moderate. While the initial results are encouraging, the limited scope of testing and some unresolved questions regarding scalability and robustness temper the confidence in these results.",
63
+ "Code of Conduct": "There are no violations of the code of conduct noted. The research upholds ethical standards and maintains transparency in methodologies and data usage, contributing to its integrity and the trustworthiness of the findings."
64
+ }
65
+ ]
66
+ selected_models = ['model1-placeholder', 'model2-placeholder']
67
+
68
+ review_texts = []
69
+ for review in reviews:
70
+ formatted_review = "<div class='review-container'>"
71
+ for section, content in review.items():
72
+ formatted_review += f"<div class='review-section'><strong>{section}:</strong> <span>{html.unescape(content)}</span></div>"
73
+ formatted_review += "</div>"
74
+ review_texts.append(formatted_review)
75
+
76
+ model_a = selected_models[0]
77
+ model_b = selected_models[1]
78
+
79
+ logging.debug(f"Final formatted reviews: {review_texts}")
80
+ return review_texts[0], review_texts[1], gr.update(visible=True), gr.update(visible=True), model_a, model_b
81
+
82
+
83
+ def setup_interface():
84
+ logging.debug("Setting up Gradio interface.")
85
+ css = """
86
+ .review-container {
87
+ padding: 10px;
88
+ margin-bottom: 20px;
89
+ border: 1px solid #ccc;
90
+ background-color: #f9f9f9;
91
+ }
92
+ .review-section {
93
+ margin-bottom: 12px;
94
+ padding: 8px;
95
+ background-color: #ffffff;
96
+ border-left: 4px solid #007BFF;
97
+ padding-left: 10px;
98
+ }
99
+ .review-section strong {
100
+ color: #333;
101
+ font-weight: bold;
102
+ display: block;
103
+ margin-bottom: 5px;
104
+ }
105
+ .review-section span, .gr-markdown {
106
+ color: #000;
107
+ font-size: 14px;
108
+ line-height: 1.5;
109
+ display: block;
110
+ white-space: normal;
111
+ opacity: 1;
112
+ }
113
+ .model-label {
114
+ font-size: 18px;
115
+ font-weight: bold;
116
+ color: #007BFF;
117
+ margin-bottom: 10px;
118
+ }
119
+ .gr-file, .gr-button, .gr-radio {
120
+ width: 300px;
121
+ margin: auto;
122
+ }
123
+ .gr-button-small {
124
+ width: 150px;
125
+ height: 40px;
126
+ font-size: 16px;
127
+ }
128
+ """
129
+ with gr.Blocks(css=css) as demo:
130
+ with gr.Tabs():
131
+ with gr.TabItem("Reviewer Arena"):
132
+ gr.Markdown("## Reviewer Arena")
133
+ gr.Markdown(
134
+ "Upload an academic paper to get reviews from two randomly selected LLMs.")
135
+ with gr.Row():
136
+ file_input = gr.File(label="Upload Academic Paper")
137
+ submit_button = gr.Button(
138
+ "Submit!", elem_id="submit-button")
139
+ with gr.Row():
140
+ with gr.Column():
141
+ gr.HTML("<div class='model-label'>Model A</div>")
142
+ review1 = gr.Markdown()
143
+ with gr.Column():
144
+ gr.HTML("<div class='model-label'>Model B</div>")
145
+ review2 = gr.Markdown()
146
+
147
+ vote_options = ["👍 A is better",
148
+ "👍 B is better", "👔 Tie", "👎 Both are bad"]
149
+ vote = gr.Radio(label="Vote on the best model",
150
+ choices=vote_options, value="Tie", visible=False)
151
+ vote_button = gr.Button("Submit Vote", visible=False)
152
+ vote_message = gr.HTML("", visible=False)
153
+ another_paper_button = gr.Button(
154
+ "Review another paper", visible=False)
155
+
156
+ model_identity_message = gr.HTML("", visible=False)
157
+
158
+ def handle_vote(vote, model_a, model_b):
159
+ print(f"Vote received: {vote}")
160
+ message = f"<p>Thank you for your vote!</p><p>Model A: {model_a}</p><p>Model B: {model_b}</p>"
161
+ return gr.update(value=message, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
162
+
163
+ vote_button.click(fn=handle_vote, inputs=[vote, model_identity_message, model_identity_message], outputs=[
164
+ vote_message, vote, vote_button, another_paper_button])
165
+
166
+ submit_button.click(
167
+ fn=review_papers,
168
+ inputs=[file_input],
169
+ outputs=[review1, review2, vote, vote_button,
170
+ model_identity_message, model_identity_message]
171
+ )
172
+
173
+ another_paper_button.click(
174
+ fn=lambda: None, inputs=None, outputs=None, js="() => { location.reload(); }")
175
+ with gr.TabItem("Leaderboard"):
176
+ gr.Markdown("## Leaderboard")
177
+ leaderboard_html = """
178
+ <table style="width:100%; border: 1px solid #444; border-collapse: collapse; font-family: Arial, sans-serif; background-color: #2b2b2b;">
179
+ <thead>
180
+ <tr style="border: 1px solid #444; padding: 12px; background-color: #1a1a1a;">
181
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Rank</th>
182
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Model</th>
183
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Arena Elo</th>
184
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">95% CI</th>
185
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Votes</th>
186
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Organization</th>
187
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">License</th>
188
+ <th style="border: 1px solid #444; padding: 12px; color: #ddd;">Knowledge Cutoff</th>
189
+ </tr>
190
+ </thead>
191
+ <tbody>
192
+ <tr style="border: 1px solid #444; padding: 12px;">
193
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1</td>
194
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">GPT-4-Turbo-2024-04-09</td>
195
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1258</td>
196
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+3/-3</td>
197
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">44592</td>
198
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">OpenAI</td>
199
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
200
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/12</td>
201
+ </tr>
202
+ <tr style="border: 1px solid #444; padding: 12px;">
203
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2</td>
204
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">GPT-4-1106-preview</td>
205
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1252</td>
206
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+2/-3</td>
207
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">76173</td>
208
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">OpenAI</td>
209
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
210
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/4</td>
211
+ </tr>
212
+ <tr style="border: 1px solid #444; padding: 12px;">
213
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2</td>
214
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Gemini 1.5 Pro API-0409-Preview</td>
215
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1249</td>
216
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+3/-3</td>
217
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">61011</td>
218
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Google</td>
219
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
220
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/11</td>
221
+ </tr>
222
+ <tr style="border: 1px solid #444; padding: 12px;">
223
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2</td>
224
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Claude 3 Opus</td>
225
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1248</td>
226
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+2/-2</td>
227
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">101063</td>
228
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Anthropic</td>
229
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
230
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/8</td>
231
+ </tr>
232
+ <tr style="border: 1px solid #444; padding: 12px;">
233
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">3</td>
234
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">GPT-4-0125-preview</td>
235
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">1246</td>
236
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">+3/-2</td>
237
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">70239</td>
238
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">OpenAI</td>
239
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">Proprietary</td>
240
+ <td style="border: 1px solid #444; padding: 12px; color: #ddd;">2023/12</td>
241
+ </tr>
242
+ </tbody>
243
+ </table>
244
+ """
245
+ gr.HTML(leaderboard_html)
246
+
247
+ logging.debug("Gradio interface setup complete.")
248
+ return demo
249
+
250
+
251
+ if __name__ == "__main__":
252
+ logging.basicConfig(level=logging.INFO)
253
+ demo = setup_interface()
254
+ demo.launch()
file_utils.py CHANGED
@@ -1,3 +1,3 @@
1
- def read_file(file_path):
2
- with open(file_path, 'r', encoding='utf-8') as f:
3
  return f.read()
 
1
+ def read_file(file_path):
2
+ with open(file_path, 'r', encoding='utf-8') as f:
3
  return f.read()
iclr2024/question11.txt CHANGED
@@ -1,7 +1,7 @@
1
- If there are no violations of the Code of Conduct with this paper, please respond with NO. Otherwise, if this paper violates the Code of Conduct, please indicate the relevant section(s) from the following options:
2
-
3
- Yes, Harassment, bullying, or discrimination based on personal characteristics
4
- Yes, Inappropriate physical contact, sexual harassment, or unwelcome sexual attention
5
- Yes, Offensive comments related to gender, race, religion, or other protected characteristics
6
- Yes, Disruption of talks or other events, or behavior interfering with participation
7
  Yes, Inappropriate use of imagery, language, or personal attacks in virtual interactions
 
1
+ If there are no violations of the Code of Conduct with this paper, please respond with NO. Otherwise, if this paper violates the Code of Conduct, please indicate the relevant section(s) from the following options:
2
+
3
+ Yes, Harassment, bullying, or discrimination based on personal characteristics
4
+ Yes, Inappropriate physical contact, sexual harassment, or unwelcome sexual attention
5
+ Yes, Offensive comments related to gender, race, religion, or other protected characteristics
6
+ Yes, Disruption of talks or other events, or behavior interfering with participation
7
  Yes, Inappropriate use of imagery, language, or personal attacks in virtual interactions
iclr2024/systemrole.txt CHANGED
@@ -1,11 +1,11 @@
1
- You are a very critical but fair peer reviewer. You will be provided with papers submitted to a conference/journal to review. The papers will be delimited with #### characters.
2
-
3
- We are aiming for a 20-25% acceptance rate. Average score thresholds of 5.5-5.7 roughly correspond to acceptance rates of 25%-20%. It is certainly possible to both accept papers below this threshold and reject papers above it. But any such decision should be properly explained.
4
-
5
- The statistics for the previous year was: A total of 3422 submissions were received. The average score of all submissions was 5.47 with standard deviation 1.30, with scores ranging from 1.00 to 9.00. Aim for a similar distribution of scores and use the full range of scores between 1-10.
6
-
7
- Out of all submissions, 32% (1095 submissions) were accepted, with scores ranging from 4.50 to 9.00 and an average score of 6.61 with a standard deviation of 0.75. Only 2.1% (55 submissions) were accepted for oral presentation, with scores ranging from 5.00 to 9.00 and an average score of 7.80 with a standard deviation of 0.63.
8
-
9
- 6.64% (174 submissions) were selected for the spotlight, with scores ranging from 5.60 to 8.60 and an average score of 7.33 with a standard deviation of 0.58. 33.04% (866 submissions) were accepted for poster presentation, with scores ranging from 4.50 to 8.00 and an average score of 6.39 with a standard deviation of 0.61.
10
-
11
  60.36% (1582 submissions) were rejected, with scores ranging from 1.00 to 7.50 and an average score of 4.69 with a standard deviation of 0.97. Additionally, 775 submissions were withdrawn and 26 were desk rejected.
 
1
+ You are a very critical but fair peer reviewer. You will be provided with papers submitted to a conference/journal to review. The papers will be delimited with #### characters.
2
+
3
+ We are aiming for a 20-25% acceptance rate. Average score thresholds of 5.5-5.7 roughly correspond to acceptance rates of 25%-20%. It is certainly possible to both accept papers below this threshold and reject papers above it. But any such decision should be properly explained.
4
+
5
+ The statistics for the previous year was: A total of 3422 submissions were received. The average score of all submissions was 5.47 with standard deviation 1.30, with scores ranging from 1.00 to 9.00. Aim for a similar distribution of scores and use the full range of scores between 1-10.
6
+
7
+ Out of all submissions, 32% (1095 submissions) were accepted, with scores ranging from 4.50 to 9.00 and an average score of 6.61 with a standard deviation of 0.75. Only 2.1% (55 submissions) were accepted for oral presentation, with scores ranging from 5.00 to 9.00 and an average score of 7.80 with a standard deviation of 0.63.
8
+
9
+ 6.64% (174 submissions) were selected for the spotlight, with scores ranging from 5.60 to 8.60 and an average score of 7.33 with a standard deviation of 0.58. 33.04% (866 submissions) were accepted for poster presentation, with scores ranging from 4.50 to 8.00 and an average score of 6.39 with a standard deviation of 0.61.
10
+
11
  60.36% (1582 submissions) were rejected, with scores ranging from 1.00 to 7.50 and an average score of 4.69 with a standard deviation of 0.97. Additionally, 775 submissions were withdrawn and 26 were desk rejected.
logging_config.py CHANGED
@@ -1,9 +1,9 @@
1
- import logging
2
-
3
- def setup_logging():
4
- logging.basicConfig(
5
- filename="arena.log",
6
- level=logging.DEBUG, # Change to DEBUG level
7
- format='%(asctime)s - %(levelname)s - %(message)s'
8
- )
9
  logging.info("Logging setup complete.")
 
1
+ import logging
2
+
3
+ def setup_logging():
4
+ logging.basicConfig(
5
+ filename="arena.log",
6
+ level=logging.DEBUG, # Change to DEBUG level
7
+ format='%(asctime)s - %(levelname)s - %(message)s'
8
+ )
9
  logging.info("Logging setup complete.")
models.py CHANGED
@@ -1,158 +1,158 @@
1
- import os
2
- import logging
3
- import openai
4
- import tiktoken
5
- import re
6
- import anthropic
7
- import cohere
8
- import google.generativeai as genai
9
- import time
10
- from file_utils import read_file
11
- from openai import OpenAI
12
-
13
- class Paper:
14
- def __init__(self, arxiv_id, tex_file):
15
- self.arxiv_id = arxiv_id
16
- self.tex_file = tex_file
17
-
18
- class PaperProcessor:
19
- MAX_TOKENS = 127192
20
- encoding = tiktoken.encoding_for_model("gpt-4-0125-preview")
21
-
22
- def __init__(self, prompt_dir, model, openai_api_key, claude_api_key, gemini_api_key, commandr_api_key):
23
- self.prompt_dir = prompt_dir
24
- self.model = model
25
- self.openai_api_key = openai_api_key
26
- self.claude_api_key = claude_api_key
27
- self.gemini_api_key = gemini_api_key
28
- self.commandr_api_key = commandr_api_key
29
-
30
- def count_tokens(self, text):
31
- return len(self.encoding.encode(text))
32
-
33
- def truncate_content(self, content):
34
- token_count = self.count_tokens(content)
35
- logging.debug(f"Token count before truncation: {token_count}")
36
- if token_count > self.MAX_TOKENS:
37
- tokens = self.encoding.encode(content)
38
- truncated_tokens = tokens[:self.MAX_TOKENS]
39
- truncated_content = self.encoding.decode(truncated_tokens)
40
- logging.debug(f"Content truncated. Token count after truncation: {self.count_tokens(truncated_content)}")
41
- return truncated_content
42
- return content
43
-
44
- def prepare_base_prompt(self, paper):
45
- return paper.tex_file
46
-
47
- def call_model(self, prompt, model_type):
48
- system_role_file_path = os.path.join(self.prompt_dir, "systemrole.txt")
49
- if not os.path.exists(system_role_file_path):
50
- logging.error(f"System role file not found: {system_role_file_path}")
51
- return None
52
-
53
- system_role = read_file(system_role_file_path)
54
- logging.debug(f"Token count of full prompt: {self.count_tokens(prompt)}")
55
- logging.info(f"Sending the following prompt to {model_type}: {prompt}")
56
-
57
- try:
58
- if model_type == 'gpt':
59
- client = OpenAI(api_key=self.openai_api_key)
60
- messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
61
- completion = client.chat.completions.create(
62
- model="gpt-4-turbo-2024-04-09",
63
- messages=messages,
64
- temperature=1
65
- )
66
- return completion.choices[0].message.content.strip()
67
-
68
- elif model_type == 'claude':
69
- client = anthropic.Anthropic(api_key=self.claude_api_key)
70
- response = client.messages.create(
71
- model='claude-3-opus-20240229',
72
- max_tokens=4096,
73
- system=system_role,
74
- temperature=0.5,
75
- messages=[{"role": "user", "content": prompt}]
76
- )
77
- return response.content[0].text
78
-
79
- elif model_type == 'commandr':
80
- co = cohere.Client(self.commandr_api_key)
81
- response = co.chat(
82
- model="command-r-plus",
83
- message=prompt,
84
- preamble=system_role
85
- )
86
- return response.text
87
-
88
- elif model_type == 'gemini':
89
- genai.configure(api_key=self.gemini_api_key)
90
- model = genai.GenerativeModel('gemini-pro')
91
- response = model.generate_content(prompt)
92
- return response.candidates[0].content.parts[0].text
93
-
94
- except Exception as e:
95
- logging.error(f"Exception occurred: {e}")
96
- return None
97
-
98
- def is_content_appropriate(self, content):
99
- try:
100
- response = openai.moderations.create(input=content)
101
- return not response["results"][0]["flagged"]
102
- except Exception as e:
103
- logging.error(f"Exception occurred while checking content appropriateness: {e}")
104
- return True # In case of an error, default to content being appropriate
105
-
106
- def get_prompt_files(self, prompt_dir):
107
- return [f for f in os.listdir(prompt_dir) if f.endswith('.txt') and f.startswith('question')]
108
-
109
- def process_paper(self, paper):
110
- openai.api_key = self.openai_api_key
111
- start_time = time.time()
112
-
113
- base_prompt = self.prepare_base_prompt(paper)
114
- if base_prompt is None:
115
- return "Error: Base prompt could not be prepared."
116
-
117
- moderation_response = openai.moderations.create(input=base_prompt)
118
- if moderation_response.results[0].flagged:
119
- return ["Desk Rejected", "The paper contains inappropriate or harmful content."]
120
-
121
- review_output = []
122
- previous_responses = []
123
- header = ['Summary:', 'Soundness:', 'Presentation:', 'Contribution:', 'Strengths:', 'Weaknesses:', 'Questions:', 'Flag For Ethics Review:', 'Rating:', 'Confidence:', 'Code Of Conduct:']
124
- for i in range(1, 12):
125
- question_file = os.path.join(self.prompt_dir, f"question{i}.txt")
126
- question_text = read_file(question_file)
127
-
128
- if i == 1:
129
- prompt = f"{question_text}\n\n####\n{base_prompt}\n####"
130
- else:
131
- prompt = f"\nHere is your review so far:\n{' '.join(previous_responses)}\n\nHere are your reviewer instructions. Please answer the following question:\n{question_text}"
132
-
133
- truncated_prompt = self.truncate_content(prompt)
134
- logging.info(f"Processing prompt for question {i}")
135
-
136
- response = self.call_model(truncated_prompt, self.model)
137
- if response is None:
138
- response = "N/A"
139
-
140
- if i in [2, 3, 4, 10]:
141
- number_match = re.search(r'\b\d+\b', response)
142
- if number_match:
143
- number = int(number_match.group(0))
144
- response = '5/5' if number > 5 else number_match.group(0) + '/5'
145
- elif i == 9:
146
- number_match = re.search(r'\b\d+\b', response)
147
- if number_match:
148
- response = number_match.group(0) + '/10'
149
-
150
- response_with_header = f"{header[i-1]} {response}"
151
- review_output.append(response_with_header)
152
- previous_responses.append(response)
153
-
154
- end_time = time.time()
155
- elapsed_time = end_time - start_time
156
- print(f"Time taken to process paper: {elapsed_time:.2f} seconds")
157
- return review_output
158
-
 
1
+ import os
2
+ import logging
3
+ import openai
4
+ import tiktoken
5
+ import re
6
+ import anthropic
7
+ import cohere
8
+ import google.generativeai as genai
9
+ import time
10
+ from file_utils import read_file
11
+ from openai import OpenAI
12
+
13
+ class Paper:
14
+ def __init__(self, arxiv_id, tex_file):
15
+ self.arxiv_id = arxiv_id
16
+ self.tex_file = tex_file
17
+
18
+ class PaperProcessor:
19
+ MAX_TOKENS = 127192
20
+ encoding = tiktoken.encoding_for_model("gpt-4-0125-preview")
21
+
22
+ def __init__(self, prompt_dir, model, openai_api_key, claude_api_key, gemini_api_key, commandr_api_key):
23
+ self.prompt_dir = prompt_dir
24
+ self.model = model
25
+ self.openai_api_key = openai_api_key
26
+ self.claude_api_key = claude_api_key
27
+ self.gemini_api_key = gemini_api_key
28
+ self.commandr_api_key = commandr_api_key
29
+
30
+ def count_tokens(self, text):
31
+ return len(self.encoding.encode(text))
32
+
33
+ def truncate_content(self, content):
34
+ token_count = self.count_tokens(content)
35
+ logging.debug(f"Token count before truncation: {token_count}")
36
+ if token_count > self.MAX_TOKENS:
37
+ tokens = self.encoding.encode(content)
38
+ truncated_tokens = tokens[:self.MAX_TOKENS]
39
+ truncated_content = self.encoding.decode(truncated_tokens)
40
+ logging.debug(f"Content truncated. Token count after truncation: {self.count_tokens(truncated_content)}")
41
+ return truncated_content
42
+ return content
43
+
44
+ def prepare_base_prompt(self, paper):
45
+ return paper.tex_file
46
+
47
+ def call_model(self, prompt, model_type):
48
+ system_role_file_path = os.path.join(self.prompt_dir, "systemrole.txt")
49
+ if not os.path.exists(system_role_file_path):
50
+ logging.error(f"System role file not found: {system_role_file_path}")
51
+ return None
52
+
53
+ system_role = read_file(system_role_file_path)
54
+ logging.debug(f"Token count of full prompt: {self.count_tokens(prompt)}")
55
+ logging.info(f"Sending the following prompt to {model_type}: {prompt}")
56
+
57
+ try:
58
+ if model_type == 'gpt':
59
+ client = OpenAI(api_key=self.openai_api_key)
60
+ messages = [{"role": "system", "content": system_role}, {"role": "user", "content": prompt}]
61
+ completion = client.chat.completions.create(
62
+ model="gpt-4-turbo-2024-04-09",
63
+ messages=messages,
64
+ temperature=1
65
+ )
66
+ return completion.choices[0].message.content.strip()
67
+
68
+ elif model_type == 'claude':
69
+ client = anthropic.Anthropic(api_key=self.claude_api_key)
70
+ response = client.messages.create(
71
+ model='claude-3-opus-20240229',
72
+ max_tokens=4096,
73
+ system=system_role,
74
+ temperature=0.5,
75
+ messages=[{"role": "user", "content": prompt}]
76
+ )
77
+ return response.content[0].text
78
+
79
+ elif model_type == 'commandr':
80
+ co = cohere.Client(self.commandr_api_key)
81
+ response = co.chat(
82
+ model="command-r-plus",
83
+ message=prompt,
84
+ preamble=system_role
85
+ )
86
+ return response.text
87
+
88
+ elif model_type == 'gemini':
89
+ genai.configure(api_key=self.gemini_api_key)
90
+ model = genai.GenerativeModel('gemini-pro')
91
+ response = model.generate_content(prompt)
92
+ return response.candidates[0].content.parts[0].text
93
+
94
+ except Exception as e:
95
+ logging.error(f"Exception occurred: {e}")
96
+ return None
97
+
98
+ def is_content_appropriate(self, content):
99
+ try:
100
+ response = openai.moderations.create(input=content)
101
+ return not response["results"][0]["flagged"]
102
+ except Exception as e:
103
+ logging.error(f"Exception occurred while checking content appropriateness: {e}")
104
+ return True # In case of an error, default to content being appropriate
105
+
106
+ def get_prompt_files(self, prompt_dir):
107
+ return [f for f in os.listdir(prompt_dir) if f.endswith('.txt') and f.startswith('question')]
108
+
109
+ def process_paper(self, paper):
110
+ openai.api_key = self.openai_api_key
111
+ start_time = time.time()
112
+
113
+ base_prompt = self.prepare_base_prompt(paper)
114
+ if base_prompt is None:
115
+ return "Error: Base prompt could not be prepared."
116
+
117
+ moderation_response = openai.moderations.create(input=base_prompt)
118
+ if moderation_response.results[0].flagged:
119
+ return ["Desk Rejected", "The paper contains inappropriate or harmful content."]
120
+
121
+ review_output = []
122
+ previous_responses = []
123
+ header = ['Summary:', 'Soundness:', 'Presentation:', 'Contribution:', 'Strengths:', 'Weaknesses:', 'Questions:', 'Flag For Ethics Review:', 'Rating:', 'Confidence:', 'Code Of Conduct:']
124
+ for i in range(1, 12):
125
+ question_file = os.path.join(self.prompt_dir, f"question{i}.txt")
126
+ question_text = read_file(question_file)
127
+
128
+ if i == 1:
129
+ prompt = f"{question_text}\n\n####\n{base_prompt}\n####"
130
+ else:
131
+ prompt = f"\nHere is your review so far:\n{' '.join(previous_responses)}\n\nHere are your reviewer instructions. Please answer the following question:\n{question_text}"
132
+
133
+ truncated_prompt = self.truncate_content(prompt)
134
+ logging.info(f"Processing prompt for question {i}")
135
+
136
+ response = self.call_model(truncated_prompt, self.model)
137
+ if response is None:
138
+ response = "N/A"
139
+
140
+ if i in [2, 3, 4, 10]:
141
+ number_match = re.search(r'\b\d+\b', response)
142
+ if number_match:
143
+ number = int(number_match.group(0))
144
+ response = '5/5' if number > 5 else number_match.group(0) + '/5'
145
+ elif i == 9:
146
+ number_match = re.search(r'\b\d+\b', response)
147
+ if number_match:
148
+ response = number_match.group(0) + '/10'
149
+
150
+ response_with_header = f"{header[i-1]} {response}"
151
+ review_output.append(response_with_header)
152
+ previous_responses.append(response)
153
+
154
+ end_time = time.time()
155
+ elapsed_time = end_time - start_time
156
+ print(f"Time taken to process paper: {elapsed_time:.2f} seconds")
157
+ return review_output
158
+
requirements.txt CHANGED
@@ -1,108 +1,108 @@
1
- aiofiles==23.2.1
2
- altair==5.3.0
3
- annotated-types==0.6.0
4
- anthropic==0.25.8
5
- anyio==4.3.0
6
- attrs==23.2.0
7
- beautifulsoup4==4.12.3
8
- boto3==1.34.103
9
- botocore==1.34.103
10
- cachetools==5.3.3
11
- certifi==2024.2.2
12
- charset-normalizer==3.3.2
13
- click==8.1.7
14
- cohere==5.4.0
15
- colorama==0.4.6
16
- contourpy==1.2.1
17
- cycler==0.12.1
18
- distro==1.9.0
19
- dnspython==2.6.1
20
- email_validator==2.1.1
21
- fastapi==0.111.0
22
- fastapi-cli==0.0.3
23
- fastavro==1.9.4
24
- ffmpy==0.3.2
25
- filelock==3.14.0
26
- fonttools==4.51.0
27
- fsspec==2024.3.1
28
- google==3.0.0
29
- google-ai-generativelanguage==0.6.2
30
- google-api-core==2.19.0
31
- google-api-python-client==2.129.0
32
- google-auth==2.29.0
33
- google-auth-httplib2==0.2.0
34
- google-generativeai==0.5.2
35
- googleapis-common-protos==1.63.0
36
- gradio==4.31.0
37
- gradio_client==0.16.2
38
- grpcio==1.63.0
39
- grpcio-status==1.62.2
40
- h11==0.14.0
41
- httpcore==1.0.5
42
- httplib2==0.22.0
43
- httptools==0.6.1
44
- httpx==0.27.0
45
- httpx-sse==0.4.0
46
- huggingface-hub==0.23.0
47
- idna==3.7
48
- importlib_resources==6.4.0
49
- Jinja2==3.1.4
50
- jmespath==1.0.1
51
- jsonschema==4.22.0
52
- jsonschema-specifications==2023.12.1
53
- kiwisolver==1.4.5
54
- markdown-it-py==3.0.0
55
- MarkupSafe==2.1.5
56
- matplotlib==3.8.4
57
- mdurl==0.1.2
58
- numpy==1.26.4
59
- openai==1.28.1
60
- orjson==3.10.3
61
- packaging==24.0
62
- pandas==2.2.2
63
- pillow==10.3.0
64
- proto-plus==1.23.0
65
- protobuf==4.25.3
66
- pyasn1==0.6.0
67
- pyasn1_modules==0.4.0
68
- pydantic==2.7.1
69
- pydantic_core==2.18.2
70
- pydub==0.25.1
71
- Pygments==2.18.0
72
- PyMuPDF==1.24.3
73
- PyMuPDFb==1.24.3
74
- pyparsing==3.1.2
75
- python-dateutil==2.9.0.post0
76
- python-dotenv==1.0.1
77
- python-multipart==0.0.9
78
- pytz==2024.1
79
- PyYAML==6.0.1
80
- referencing==0.35.1
81
- regex==2024.5.10
82
- requests==2.31.0
83
- rich==13.7.1
84
- rpds-py==0.18.1
85
- rsa==4.9
86
- ruff==0.4.4
87
- s3transfer==0.10.1
88
- semantic-version==2.10.0
89
- shellingham==1.5.4
90
- six==1.16.0
91
- sniffio==1.3.1
92
- soupsieve==2.5
93
- starlette==0.37.2
94
- tiktoken==0.6.0
95
- tokenizers==0.19.1
96
- tomlkit==0.12.0
97
- toolz==0.12.1
98
- tqdm==4.66.4
99
- typer==0.12.3
100
- types-requests==2.31.0.20240406
101
- typing_extensions==4.11.0
102
- tzdata==2024.1
103
- ujson==5.9.0
104
- uritemplate==4.1.1
105
- urllib3==2.2.1
106
- uvicorn==0.29.0
107
- watchfiles==0.21.0
108
- websockets==11.0.3
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.6.0
4
+ anthropic==0.25.8
5
+ anyio==4.3.0
6
+ attrs==23.2.0
7
+ beautifulsoup4==4.12.3
8
+ boto3==1.34.103
9
+ botocore==1.34.103
10
+ cachetools==5.3.3
11
+ certifi==2024.2.2
12
+ charset-normalizer==3.3.2
13
+ click==8.1.7
14
+ cohere==5.4.0
15
+ colorama==0.4.6
16
+ contourpy==1.2.1
17
+ cycler==0.12.1
18
+ distro==1.9.0
19
+ dnspython==2.6.1
20
+ email_validator==2.1.1
21
+ fastapi==0.111.0
22
+ fastapi-cli==0.0.3
23
+ fastavro==1.9.4
24
+ ffmpy==0.3.2
25
+ filelock==3.14.0
26
+ fonttools==4.51.0
27
+ fsspec==2024.3.1
28
+ google==3.0.0
29
+ google-ai-generativelanguage==0.6.2
30
+ google-api-core==2.19.0
31
+ google-api-python-client==2.129.0
32
+ google-auth==2.29.0
33
+ google-auth-httplib2==0.2.0
34
+ google-generativeai==0.5.2
35
+ googleapis-common-protos==1.63.0
36
+ gradio==4.31.0
37
+ gradio_client==0.16.2
38
+ grpcio==1.63.0
39
+ grpcio-status==1.62.2
40
+ h11==0.14.0
41
+ httpcore==1.0.5
42
+ httplib2==0.22.0
43
+ httptools==0.6.1
44
+ httpx==0.27.0
45
+ httpx-sse==0.4.0
46
+ huggingface-hub==0.23.0
47
+ idna==3.7
48
+ importlib_resources==6.4.0
49
+ Jinja2==3.1.4
50
+ jmespath==1.0.1
51
+ jsonschema==4.22.0
52
+ jsonschema-specifications==2023.12.1
53
+ kiwisolver==1.4.5
54
+ markdown-it-py==3.0.0
55
+ MarkupSafe==2.1.5
56
+ matplotlib==3.8.4
57
+ mdurl==0.1.2
58
+ numpy==1.26.4
59
+ openai==1.28.1
60
+ orjson==3.10.3
61
+ packaging==24.0
62
+ pandas==2.2.2
63
+ pillow==10.3.0
64
+ proto-plus==1.23.0
65
+ protobuf==4.25.3
66
+ pyasn1==0.6.0
67
+ pyasn1_modules==0.4.0
68
+ pydantic==2.7.1
69
+ pydantic_core==2.18.2
70
+ pydub==0.25.1
71
+ Pygments==2.18.0
72
+ PyMuPDF==1.24.3
73
+ PyMuPDFb==1.24.3
74
+ pyparsing==3.1.2
75
+ python-dateutil==2.9.0.post0
76
+ python-dotenv==1.0.1
77
+ python-multipart==0.0.9
78
+ pytz==2024.1
79
+ PyYAML==6.0.1
80
+ referencing==0.35.1
81
+ regex==2024.5.10
82
+ requests==2.31.0
83
+ rich==13.7.1
84
+ rpds-py==0.18.1
85
+ rsa==4.9
86
+ ruff==0.4.4
87
+ s3transfer==0.10.1
88
+ semantic-version==2.10.0
89
+ shellingham==1.5.4
90
+ six==1.16.0
91
+ sniffio==1.3.1
92
+ soupsieve==2.5
93
+ starlette==0.37.2
94
+ tiktoken==0.6.0
95
+ tokenizers==0.19.1
96
+ tomlkit==0.12.0
97
+ toolz==0.12.1
98
+ tqdm==4.66.4
99
+ typer==0.12.3
100
+ types-requests==2.31.0.20240406
101
+ typing_extensions==4.11.0
102
+ tzdata==2024.1
103
+ ujson==5.9.0
104
+ uritemplate==4.1.1
105
+ urllib3==2.2.1
106
+ uvicorn==0.29.0
107
+ watchfiles==0.21.0
108
+ websockets==11.0.3
utils.py CHANGED
@@ -1,49 +1,45 @@
1
- import fitz
2
- import os
3
- import logging
4
- import random
5
- from models import Paper, PaperProcessor
6
-
7
- def extract_text_from_pdf(filename):
8
- with fitz.open(filename) as pdf_document:
9
- text = ""
10
- for page in pdf_document:
11
- text += page.get_text()
12
- return text.encode('latin-1', 'replace').decode('latin-1')
13
-
14
- def process_paper(pdf_file, paper_dir, prompt_dir, api_keys):
15
- logging.info(f"Processing file type in process_paper: {type(pdf_file)}") # Log the type of the file here as well
16
- logging.debug(f"Starting to process paper: {pdf_file}")
17
- # Ensure the directory exists
18
- os.makedirs(paper_dir, exist_ok=True)
19
-
20
- # Handle file based on its type
21
- if isinstance(pdf_file, str):
22
- # Assume pdf_file is a path to the PDF file
23
- pdf_path = pdf_file
24
- elif hasattr(pdf_file, 'name') and hasattr(pdf_file, 'read'):
25
- # It's a file-like object
26
- pdf_path = os.path.join(paper_dir, pdf_file.name)
27
- with open(pdf_path, "wb") as f:
28
- f.write(pdf_file.read())
29
- else:
30
- logging.error("Received object is neither a path nor a file-like object.")
31
- return []
32
-
33
- # Extract text from the PDF
34
- extracted_text = extract_text_from_pdf(pdf_path)
35
- paper = Paper(pdf_file.name if hasattr(pdf_file, 'name') else os.path.basename(pdf_path), extracted_text)
36
-
37
- # Randomly select two models
38
- models = ['gpt', 'claude', 'gemini', 'commandr']
39
- selected_models = random.sample(models, 2)
40
-
41
- # Process the paper with each selected model
42
- reviews = []
43
- for model in selected_models:
44
- processor = PaperProcessor(prompt_dir, model, **api_keys)
45
- review_text = processor.process_paper(paper)
46
- #review_dict = {section.split(':')[0]: section.split(':')[1].strip() for section in review_text}
47
- reviews.append(review_text)
48
- logging.debug(f"Reviews generated: {reviews}")
49
- return reviews
 
1
+ import fitz
2
+ import os
3
+ import logging
4
+ import random
5
+ from models import Paper, PaperProcessor
6
+
7
+
8
+ def extract_text_from_pdf(filename):
9
+ with fitz.open(filename) as pdf_document:
10
+ text = ""
11
+ for page in pdf_document:
12
+ text += page.get_text()
13
+ return text.encode('latin-1', 'replace').decode('latin-1')
14
+
15
+
16
+ def process_paper(pdf_file, paper_dir, prompt_dir, api_keys):
17
+ logging.info(f"Processing file type in process_paper: {type(pdf_file)}")
18
+ logging.debug(f"Starting to process paper: {pdf_file}")
19
+ os.makedirs(paper_dir, exist_ok=True)
20
+
21
+ if isinstance(pdf_file, str):
22
+ pdf_path = pdf_file
23
+ elif hasattr(pdf_file, 'name') and hasattr(pdf_file, 'read'):
24
+ pdf_path = os.path.join(paper_dir, pdf_file.name)
25
+ with open(pdf_path, "wb") as f:
26
+ f.write(pdf_file.read())
27
+ else:
28
+ logging.error(
29
+ "Received object is neither a path nor a file-like object.")
30
+ return [], []
31
+
32
+ extracted_text = extract_text_from_pdf(pdf_path)
33
+ paper = Paper(pdf_file.name if hasattr(pdf_file, 'name')
34
+ else os.path.basename(pdf_path), extracted_text)
35
+
36
+ models = ['gpt', 'claude', 'gemini', 'commandr']
37
+ selected_models = random.sample(models, 2)
38
+
39
+ reviews = []
40
+ for model in selected_models:
41
+ processor = PaperProcessor(prompt_dir, model, **api_keys)
42
+ review_text = processor.process_paper(paper)
43
+ reviews.append(review_text)
44
+ logging.debug(f"Reviews generated: {reviews}")
45
+ return reviews, selected_models