cyberosa commited on
Commit
ede0fdc
·
1 Parent(s): 35db90a

disabling run_benchmark tab temporarily

Browse files
Files changed (2) hide show
  1. app.py +80 -78
  2. start.py +15 -15
app.py CHANGED
@@ -11,7 +11,9 @@ from tabs.faq import (
11
  about_the_tools,
12
  )
13
  from tabs.howto_benchmark import how_to_run
14
- from tabs.run_benchmark import run_benchmark_main
 
 
15
 
16
  demo = gr.Blocks()
17
 
@@ -109,83 +111,83 @@ with demo:
109
  gr.Markdown(how_to_run)
110
 
111
  # fourth tab - run the benchmark
112
- with gr.TabItem("🔥 Run the Benchmark"):
113
- with gr.Row():
114
- tool_name = gr.Dropdown(
115
- [
116
- "prediction-offline",
117
- "prediction-online",
118
- # "prediction-online-summarized-info",
119
- # "prediction-offline-sme",
120
- # "prediction-online-sme",
121
- "prediction-request-rag",
122
- "prediction-request-reasoning",
123
- # "prediction-url-cot-claude",
124
- # "prediction-request-rag-cohere",
125
- # "prediction-with-research-conservative",
126
- # "prediction-with-research-bold",
127
- ],
128
- label="Tool Name",
129
- info="Choose the tool to run",
130
- )
131
- model_name = gr.Dropdown(
132
- [
133
- "gpt-3.5-turbo-0125",
134
- "gpt-4-0125-preview",
135
- "claude-3-haiku-20240307",
136
- "claude-3-sonnet-20240229",
137
- "claude-3-opus-20240229",
138
- "databricks/dbrx-instruct:nitro",
139
- "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
140
- # "cohere/command-r-plus",
141
- ],
142
- label="Model Name",
143
- info="Choose the model to use",
144
- )
145
- with gr.Row():
146
- openai_api_key = gr.Textbox(
147
- label="OpenAI API Key",
148
- placeholder="Enter your OpenAI API key here",
149
- type="password",
150
- )
151
- anthropic_api_key = gr.Textbox(
152
- label="Anthropic API Key",
153
- placeholder="Enter your Anthropic API key here",
154
- type="password",
155
- )
156
- openrouter_api_key = gr.Textbox(
157
- label="OpenRouter API Key",
158
- placeholder="Enter your OpenRouter API key here",
159
- type="password",
160
- )
161
- with gr.Row():
162
- num_questions = gr.Slider(
163
- minimum=1,
164
- maximum=340,
165
- value=10,
166
- label="Number of questions to run the benchmark on",
167
- )
168
- with gr.Row():
169
- run_button = gr.Button("Run Benchmark")
170
- with gr.Row():
171
- with gr.Accordion("Results", open=True):
172
- result = gr.Dataframe()
173
- with gr.Row():
174
- with gr.Accordion("Summary", open=False):
175
- summary = gr.Dataframe()
176
-
177
- run_button.click(
178
- run_benchmark_gradio,
179
- inputs=[
180
- tool_name,
181
- model_name,
182
- num_questions,
183
- openai_api_key,
184
- anthropic_api_key,
185
- openrouter_api_key,
186
- ],
187
- outputs=[result, summary],
188
- )
189
 
190
 
191
  demo.queue(default_concurrency_limit=40).launch()
 
11
  about_the_tools,
12
  )
13
  from tabs.howto_benchmark import how_to_run
14
+
15
+ # disabling temporarily
16
+ # from tabs.run_benchmark import run_benchmark_main
17
 
18
  demo = gr.Blocks()
19
 
 
111
  gr.Markdown(how_to_run)
112
 
113
  # fourth tab - run the benchmark
114
+ # with gr.TabItem("🔥 Run the Benchmark"):
115
+ # with gr.Row():
116
+ # tool_name = gr.Dropdown(
117
+ # [
118
+ # "prediction-offline",
119
+ # "prediction-online",
120
+ # # "prediction-online-summarized-info",
121
+ # # "prediction-offline-sme",
122
+ # # "prediction-online-sme",
123
+ # "prediction-request-rag",
124
+ # "prediction-request-reasoning",
125
+ # # "prediction-url-cot-claude",
126
+ # # "prediction-request-rag-cohere",
127
+ # # "prediction-with-research-conservative",
128
+ # # "prediction-with-research-bold",
129
+ # ],
130
+ # label="Tool Name",
131
+ # info="Choose the tool to run",
132
+ # )
133
+ # model_name = gr.Dropdown(
134
+ # [
135
+ # "gpt-3.5-turbo-0125",
136
+ # "gpt-4-0125-preview",
137
+ # "claude-3-haiku-20240307",
138
+ # "claude-3-sonnet-20240229",
139
+ # "claude-3-opus-20240229",
140
+ # "databricks/dbrx-instruct:nitro",
141
+ # "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
142
+ # # "cohere/command-r-plus",
143
+ # ],
144
+ # label="Model Name",
145
+ # info="Choose the model to use",
146
+ # )
147
+ # with gr.Row():
148
+ # openai_api_key = gr.Textbox(
149
+ # label="OpenAI API Key",
150
+ # placeholder="Enter your OpenAI API key here",
151
+ # type="password",
152
+ # )
153
+ # anthropic_api_key = gr.Textbox(
154
+ # label="Anthropic API Key",
155
+ # placeholder="Enter your Anthropic API key here",
156
+ # type="password",
157
+ # )
158
+ # openrouter_api_key = gr.Textbox(
159
+ # label="OpenRouter API Key",
160
+ # placeholder="Enter your OpenRouter API key here",
161
+ # type="password",
162
+ # )
163
+ # with gr.Row():
164
+ # num_questions = gr.Slider(
165
+ # minimum=1,
166
+ # maximum=340,
167
+ # value=10,
168
+ # label="Number of questions to run the benchmark on",
169
+ # )
170
+ # with gr.Row():
171
+ # run_button = gr.Button("Run Benchmark")
172
+ # with gr.Row():
173
+ # with gr.Accordion("Results", open=True):
174
+ # result = gr.Dataframe()
175
+ # with gr.Row():
176
+ # with gr.Accordion("Summary", open=False):
177
+ # summary = gr.Dataframe()
178
+
179
+ # run_button.click(
180
+ # run_benchmark_gradio,
181
+ # inputs=[
182
+ # tool_name,
183
+ # model_name,
184
+ # num_questions,
185
+ # openai_api_key,
186
+ # anthropic_api_key,
187
+ # openrouter_api_key,
188
+ # ],
189
+ # outputs=[result, summary],
190
+ # )
191
 
192
 
193
  demo.queue(default_concurrency_limit=40).launch()
start.py CHANGED
@@ -45,25 +45,25 @@ def start():
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
- olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
- mech_dir = os.path.join(olas_dir, "benchmark", "mech")
50
 
51
  commands = [
52
  ("git submodule init", base_dir),
53
  ("git submodule update --init --recursive", base_dir),
54
  ("git submodule update --remote --recursive", base_dir),
55
- (
56
- 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
57
- olas_dir,
58
- ),
59
- ("git remote update", olas_dir),
60
- ("git fetch --all", olas_dir),
61
- ("git checkout main", olas_dir),
62
- ("git pull origin main", olas_dir),
63
- ("git checkout main", mech_dir),
64
- ("git pull origin main", mech_dir),
65
- ("pip install -e .", os.path.join(olas_dir, "benchmark")),
66
- ("pip install -e .", mech_dir),
67
  ("pip install lxml[html_clean]", base_dir),
68
  ("pip install --upgrade huggingface_hub", base_dir),
69
  ]
@@ -72,7 +72,7 @@ def start():
72
  run_command(command, cwd=cwd)
73
 
74
  # add benchmark to the path
75
- sys.path.append(os.path.join(olas_dir, "benchmark"))
76
 
77
  # Download the dataset
78
  download_dataset()
 
45
  """Start commands."""
46
  print("Starting commands...")
47
  base_dir = os.getcwd()
48
+ # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
49
+ # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
50
 
51
  commands = [
52
  ("git submodule init", base_dir),
53
  ("git submodule update --init --recursive", base_dir),
54
  ("git submodule update --remote --recursive", base_dir),
55
+ # (
56
+ # 'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
57
+ # olas_dir,
58
+ # ),
59
+ # ("git remote update", olas_dir),
60
+ # ("git fetch --all", olas_dir),
61
+ # ("git checkout main", olas_dir),
62
+ # ("git pull origin main", olas_dir),
63
+ # ("git checkout main", mech_dir),
64
+ # ("git pull origin main", mech_dir),
65
+ # ("pip install -e .", os.path.join(olas_dir, "benchmark")),
66
+ # ("pip install -e .", mech_dir),
67
  ("pip install lxml[html_clean]", base_dir),
68
  ("pip install --upgrade huggingface_hub", base_dir),
69
  ]
 
72
  run_command(command, cwd=cwd)
73
 
74
  # add benchmark to the path
75
+ # sys.path.append(os.path.join(olas_dir, "benchmark"))
76
 
77
  # Download the dataset
78
  download_dataset()