initial commit
Browse files- .gitignore +1 -0
- app.py +60 -0
- formatted_data.csv +8 -0
- requirements.txt +1 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
app.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
|
5 |
+
csv_file_path = "formatted_data.csv"
|
6 |
+
|
7 |
+
# Reading the CSV file
|
8 |
+
df = pd.read_csv(csv_file_path)
|
9 |
+
|
10 |
+
# Markdown text with HTML formatting for Gradio
|
11 |
+
markdown_text = """
|
12 |
+
## Benchmark Overview
|
13 |
+
- The benchmark evaluates the performance of Olas Predict tools on the Autocast dataset.
|
14 |
+
- The dataset has been refined to enhance the evaluation of the tools.
|
15 |
+
- The leaderboard shows the performance of the tools based on the refined dataset.
|
16 |
+
- The script to run the benchmark is available in the repo [here](https://github.com/valory-xyz/olas-predict-benchmark).
|
17 |
+
|
18 |
+
## How to run your tools on the benchmark
|
19 |
+
- Fork the repo [here](https://github.com/valory-xyz/olas-predict-benchmark).
|
20 |
+
- Git init the submodules and update the submodule to get the latest dataset `mech` tool.
|
21 |
+
- `git submodule init`
|
22 |
+
- `git submodule update --remote --recursive`
|
23 |
+
- Include your tool in the `mech/packages` directory accordingly.
|
24 |
+
- Guidelines on how to include your tool can be found [here](xxx).
|
25 |
+
- Run the benchmark script.
|
26 |
+
|
27 |
+
## Dataset Overview
|
28 |
+
This project leverages the Autocast dataset from the research paper titled ["Forecasting Future World Events with Neural Networks"](https://arxiv.org/abs/2206.15474).
|
29 |
+
The dataset has undergone further refinement to enhance the performance evaluation of Olas mech prediction tools.
|
30 |
+
Both the original and refined datasets are hosted on HuggingFace.
|
31 |
+
|
32 |
+
### Refined Dataset Files
|
33 |
+
- You can find the refined dataset on HuggingFace [here](https://huggingface.co/datasets/valory/autocast).
|
34 |
+
- `autocast_questions_filtered.json`: A JSON subset of the initial autocast dataset.
|
35 |
+
- `autocast_questions_filtered.pkl`: A pickle file mapping URLs to their respective scraped documents within the filtered dataset.
|
36 |
+
- `retrieved_docs.pkl`: Contains all the scraped texts.
|
37 |
+
|
38 |
+
### Filtering Criteria
|
39 |
+
To refine the dataset, we applied the following criteria to ensure the reliability of the URLs:
|
40 |
+
- URLs not returning HTTP 200 status codes are excluded.
|
41 |
+
- Difficult-to-scrape sites, such as Twitter and Bloomberg, are omitted.
|
42 |
+
- Links with less than 1000 words are removed.
|
43 |
+
- Only samples with a minimum of 5 and a maximum of 20 working URLs are retained.
|
44 |
+
|
45 |
+
### Scraping Approach
|
46 |
+
The content of the filtered URLs has been scraped using various libraries, depending on the source:
|
47 |
+
- `pypdf2` for PDF URLs.
|
48 |
+
- `wikipediaapi` for Wikipedia pages.
|
49 |
+
- `requests`, `readability-lxml`, and `html2text` for most other sources.
|
50 |
+
- `requests`, `beautifulsoup`, and `html2text` for BBC links.
|
51 |
+
"""
|
52 |
+
|
53 |
+
|
54 |
+
with gr.Blocks() as demo:
|
55 |
+
gr.Markdown("# Olas Predict Benchmark")
|
56 |
+
gr.Markdown("Leaderboard showing the performance of Olas Predict tools on the Autocast dataset and overview of the project.")
|
57 |
+
gr.DataFrame(df)
|
58 |
+
gr.Markdown(markdown_text)
|
59 |
+
|
60 |
+
demo.launch()
|
formatted_data.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Tool,Accuracy,Correct,Total,Mean Tokens Used,Mean Cost ($)
|
2 |
+
claude-prediction-offline,0.7201834862385321,157,218,779.4770642201835,0.006891669724770637
|
3 |
+
claude-prediction-online,0.6600660066006601,200,303,1505.3135313531352,0.013348171617161701
|
4 |
+
prediction-online,0.676737160120846,224,331,1219.6918429003022,0.001332990936555879
|
5 |
+
prediction-offline,0.6599326599326599,196,297,579.6565656565657,0.000621023569023569
|
6 |
+
prediction-online-summarized-info,0.6209150326797386,190,306,1008.4542483660131,0.0011213790849673195
|
7 |
+
prediction-offline-sme,0.599406528189911,202,337,1190.2017804154302,0.0013518635014836643
|
8 |
+
prediction-online-sme,0.5905044510385756,199,337,1834.919881305638,0.0020690207715133428
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gradio
|