Spaces:

valory
/

olas-prediction-leaderboard

Running

App Files Files Community

cyberosa commited on Jul 5

Commit

dbaa2bd

•

1 Parent(s): 9b36cb7

trying to activate run_benchmark tab again

Browse files

Files changed (4) hide show

.gitmodules +3 -0
app.py +125 -125
olas-predict-benchmark +1 -0
start.py +18 -19

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "olas-predict-benchmark"]
+	path = olas-predict-benchmark
+	url = https://github.com/valory-xyz/olas-predict-benchmark.git

app.py CHANGED Viewed

@@ -13,69 +13,69 @@ from tabs.faq import (
 from tabs.howto_benchmark import how_to_run
 # Feature temporarily disabled til HF support helps us with the Space Error
-# from tabs.run_benchmark import run_benchmark_main
 demo = gr.Blocks()
-# def run_benchmark_gradio(
-#     tool_name,
-#     model_name,
-#     num_questions,
-#     openai_api_key,
-#     anthropic_api_key,
-#     openrouter_api_key,
-# ):
-#     """Run the benchmark using inputs."""
-#     if tool_name is None:
-#         return "Please enter the name of your tool."
-#     if (
-#         openai_api_key is None
-#         and anthropic_api_key is None
-#         and openrouter_api_key is None
-#     ):
-#         return "Please enter either OpenAI or Anthropic or OpenRouter API key."
-#     result = run_benchmark_main(
-#         tool_name,
-#         model_name,
-#         num_questions,
-#         openai_api_key,
-#         anthropic_api_key,
-#         openrouter_api_key,
-#     )
-#     if result == "completed":
-#         # get the results file in the results directory
-#         fns = glob("results/*.csv")
-#         print(f"Number of files in results directory: {len(fns)}")
-#         # convert to Path
-#         files = [Path(file) for file in fns]
-#         # get results and summary files
-#         results_files = [file for file in files if "results" in file.name]
-#         # the other file is the summary file
-#         summary_files = [file for file in files if "summary" in file.name]
-#         print(results_files, summary_files)
-#         # get the path with results
-#         results_df = pd.read_csv(results_files[0])
-#         summary_df = pd.read_csv(summary_files[0])
-#         # make sure all df float values are rounded to 4 decimal places
-#         results_df = results_df.round(4)
-#         summary_df = summary_df.round(4)
-#         return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
-#     return gr.Textbox(
-#         label="Benchmark Result", value=result, interactive=False
-#     ), gr.Textbox(label="Summary", value="")
 with demo:
@@ -112,83 +112,83 @@ with demo:
             gr.Markdown(how_to_run)
         # fourth tab - run the benchmark
-        # with gr.TabItem("🔥 Run the Benchmark"):
-        #     with gr.Row():
-        #         tool_name = gr.Dropdown(
-        #             [
-        #                 "prediction-offline",
-        #                 "prediction-online",
-        #                 # "prediction-online-summarized-info",
-        #                 # "prediction-offline-sme",
-        #                 # "prediction-online-sme",
-        #                 "prediction-request-rag",
-        #                 "prediction-request-reasoning",
-        #                 # "prediction-url-cot-claude",
-        #                 # "prediction-request-rag-cohere",
-        #                 # "prediction-with-research-conservative",
-        #                 # "prediction-with-research-bold",
-        #             ],
-        #             label="Tool Name",
-        #             info="Choose the tool to run",
-        #         )
-        #         model_name = gr.Dropdown(
-        #             [
-        #                 "gpt-3.5-turbo-0125",
-        #                 "gpt-4-0125-preview",
-        #                 "claude-3-haiku-20240307",
-        #                 "claude-3-sonnet-20240229",
-        #                 "claude-3-opus-20240229",
-        #                 "databricks/dbrx-instruct:nitro",
-        #                 "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
-        #                 # "cohere/command-r-plus",
-        #             ],
-        #             label="Model Name",
-        #             info="Choose the model to use",
-        #         )
-        #     with gr.Row():
-        #         openai_api_key = gr.Textbox(
-        #             label="OpenAI API Key",
-        #             placeholder="Enter your OpenAI API key here",
-        #             type="password",
-        #         )
-        #         anthropic_api_key = gr.Textbox(
-        #             label="Anthropic API Key",
-        #             placeholder="Enter your Anthropic API key here",
-        #             type="password",
-        #         )
-        #         openrouter_api_key = gr.Textbox(
-        #             label="OpenRouter API Key",
-        #             placeholder="Enter your OpenRouter API key here",
-        #             type="password",
-        #         )
-        #     with gr.Row():
-        #         num_questions = gr.Slider(
-        #             minimum=1,
-        #             maximum=340,
-        #             value=10,
-        #             label="Number of questions to run the benchmark on",
-        #         )
-        #     with gr.Row():
-        #         run_button = gr.Button("Run Benchmark")
-        #     with gr.Row():
-        #         with gr.Accordion("Results", open=True):
-        #             result = gr.Dataframe()
-        #     with gr.Row():
-        #         with gr.Accordion("Summary", open=False):
-        #             summary = gr.Dataframe()
-        #     run_button.click(
-        #         run_benchmark_gradio,
-        #         inputs=[
-        #             tool_name,
-        #             model_name,
-        #             num_questions,
-        #             openai_api_key,
-        #             anthropic_api_key,
-        #             openrouter_api_key,
-        #         ],
-        #         outputs=[result, summary],
-        #     )
 demo.queue(default_concurrency_limit=40).launch()

 from tabs.howto_benchmark import how_to_run
 # Feature temporarily disabled til HF support helps us with the Space Error
+from tabs.run_benchmark import run_benchmark_main
 demo = gr.Blocks()
+def run_benchmark_gradio(
+    tool_name,
+    model_name,
+    num_questions,
+    openai_api_key,
+    anthropic_api_key,
+    openrouter_api_key,
+):
+    """Run the benchmark using inputs."""
+    if tool_name is None:
+        return "Please enter the name of your tool."
+    if (
+        openai_api_key is None
+        and anthropic_api_key is None
+        and openrouter_api_key is None
+    ):
+        return "Please enter either OpenAI or Anthropic or OpenRouter API key."
+    result = run_benchmark_main(
+        tool_name,
+        model_name,
+        num_questions,
+        openai_api_key,
+        anthropic_api_key,
+        openrouter_api_key,
+    )
+    if result == "completed":
+        # get the results file in the results directory
+        fns = glob("results/*.csv")
+        print(f"Number of files in results directory: {len(fns)}")
+        # convert to Path
+        files = [Path(file) for file in fns]
+        # get results and summary files
+        results_files = [file for file in files if "results" in file.name]
+        # the other file is the summary file
+        summary_files = [file for file in files if "summary" in file.name]
+        print(results_files, summary_files)
+        # get the path with results
+        results_df = pd.read_csv(results_files[0])
+        summary_df = pd.read_csv(summary_files[0])
+        # make sure all df float values are rounded to 4 decimal places
+        results_df = results_df.round(4)
+        summary_df = summary_df.round(4)
+        return gr.Dataframe(value=results_df), gr.Dataframe(value=summary_df)
+    return gr.Textbox(
+        label="Benchmark Result", value=result, interactive=False
+    ), gr.Textbox(label="Summary", value="")
 with demo:
             gr.Markdown(how_to_run)
         # fourth tab - run the benchmark
+        with gr.TabItem("🔥 Run the Benchmark"):
+            with gr.Row():
+                tool_name = gr.Dropdown(
+                    [
+                        "prediction-offline",
+                        "prediction-online",
+                        # "prediction-online-summarized-info",
+                        # "prediction-offline-sme",
+                        # "prediction-online-sme",
+                        "prediction-request-rag",
+                        "prediction-request-reasoning",
+                        # "prediction-url-cot-claude",
+                        # "prediction-request-rag-cohere",
+                        # "prediction-with-research-conservative",
+                        # "prediction-with-research-bold",
+                    ],
+                    label="Tool Name",
+                    info="Choose the tool to run",
+                )
+                model_name = gr.Dropdown(
+                    [
+                        "gpt-3.5-turbo-0125",
+                        "gpt-4-0125-preview",
+                        "claude-3-haiku-20240307",
+                        "claude-3-sonnet-20240229",
+                        "claude-3-opus-20240229",
+                        "databricks/dbrx-instruct:nitro",
+                        "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
+                        # "cohere/command-r-plus",
+                    ],
+                    label="Model Name",
+                    info="Choose the model to use",
+                )
+            with gr.Row():
+                openai_api_key = gr.Textbox(
+                    label="OpenAI API Key",
+                    placeholder="Enter your OpenAI API key here",
+                    type="password",
+                )
+                anthropic_api_key = gr.Textbox(
+                    label="Anthropic API Key",
+                    placeholder="Enter your Anthropic API key here",
+                    type="password",
+                )
+                openrouter_api_key = gr.Textbox(
+                    label="OpenRouter API Key",
+                    placeholder="Enter your OpenRouter API key here",
+                    type="password",
+                )
+            with gr.Row():
+                num_questions = gr.Slider(
+                    minimum=1,
+                    maximum=340,
+                    value=10,
+                    label="Number of questions to run the benchmark on",
+                )
+            with gr.Row():
+                run_button = gr.Button("Run Benchmark")
+            with gr.Row():
+                with gr.Accordion("Results", open=True):
+                    result = gr.Dataframe()
+            with gr.Row():
+                with gr.Accordion("Summary", open=False):
+                    summary = gr.Dataframe()
+            run_button.click(
+                run_benchmark_gradio,
+                inputs=[
+                    tool_name,
+                    model_name,
+                    num_questions,
+                    openai_api_key,
+                    anthropic_api_key,
+                    openrouter_api_key,
+                ],
+                outputs=[result, summary],
+            )
 demo.queue(default_concurrency_limit=40).launch()

olas-predict-benchmark ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit eb1b1276bd576e38af86ee334ebf1922da0ac035

start.py CHANGED Viewed

@@ -45,26 +45,25 @@ def start():
     """Start commands."""
     print("Starting commands...")
     base_dir = os.getcwd()
-    # olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
-    # mech_dir = os.path.join(olas_dir, "benchmark", "mech")
     commands = [
-        # ("git submodule init", base_dir),
-        # no updates
-        # ("git submodule update --init --recursive", base_dir),
-        # ("git submodule update --remote --recursive", base_dir),
-        # (
-        #     'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
-        #     olas_dir,
-        # ),
-        # ("git remote update", olas_dir),
-        # ("git fetch --all", olas_dir),
-        # ("git checkout main", olas_dir),
-        # ("git pull origin main", olas_dir),
-        # ("git checkout 56ecf18a982c4548feac5efe787690a3ec37c835", mech_dir),
-        # # ("git pull origin main", mech_dir),
-        # ("pip install -e .", os.path.join(olas_dir, "benchmark")),
-        # ("pip install -e .", mech_dir),
         ("pip install lxml[html_clean]", base_dir),
         ("pip install --upgrade huggingface_hub", base_dir),
     ]
@@ -73,7 +72,7 @@ def start():
         run_command(command, cwd=cwd)
     # add benchmark to the path
-    # sys.path.append(os.path.join(olas_dir, "benchmark"))
     # Download the dataset
     download_dataset()

     """Start commands."""
     print("Starting commands...")
     base_dir = os.getcwd()
+    olas_dir = os.path.join(base_dir, "olas-predict-benchmark")
+    mech_dir = os.path.join(olas_dir, "benchmark", "mech")
     commands = [
+        ("git submodule init", base_dir),
+        ("git submodule update --init --recursive", base_dir),
+        ("git submodule update --remote --recursive", base_dir),
+        (
+            'git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"',
+            olas_dir,
+        ),
+        ("git remote update", olas_dir),
+        ("git fetch --all", olas_dir),
+        ("git checkout main", olas_dir),
+        ("git pull origin main", olas_dir),
+        ("git checkout main", mech_dir),
+        ("git pull origin main", mech_dir),
+        ("pip install -e .", os.path.join(olas_dir, "benchmark")),
+        ("pip install -e .", mech_dir),
         ("pip install lxml[html_clean]", base_dir),
         ("pip install --upgrade huggingface_hub", base_dir),
     ]
         run_command(command, cwd=cwd)
     # add benchmark to the path
+    sys.path.append(os.path.join(olas_dir, "benchmark"))
     # Download the dataset
     download_dataset()