import os import sys import subprocess from huggingface_hub import hf_hub_download def run_command(command: str, cwd: str = None) -> tuple: """Run a shell command in the specified directory and return the output.""" process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd) stdout, stderr = process.communicate() if process.returncode != 0: print(f"Error: {stderr.decode()}") else: print(f"Output: {stdout.decode()}") return stdout, stderr def download_dataset(): """Download the dataset.""" print("Downloading the dataset...") repo_id = "valory/autocast" base_dir = os.getcwd() output_dir = os.path.join(base_dir, "olas-predict-benchmark", "benchmark", "data", "autocast") if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True) filenames = [ "autocast_questions_filtered.json", "autocast_questions_filtered.pkl", ] for filename in filenames: hf_hub_download(repo_id=repo_id, filename=filename, local_dir=output_dir, repo_type="dataset") print("Dataset downloaded successfully.") def start(): """Start commands.""" print("Starting commands...") base_dir = os.getcwd() olas_dir = os.path.join(base_dir, "olas-predict-benchmark") mech_dir = os.path.join(olas_dir, "benchmark", "mech") commands = [ ("git submodule init", base_dir), ("git submodule update --init --recursive", base_dir), ("git submodule update --remote --recursive", base_dir), ('git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"', olas_dir), ("git remote update", olas_dir), ("git fetch --all", olas_dir), ("git checkout main", olas_dir), ("git pull origin main", olas_dir), ("git checkout main", mech_dir), ("git pull origin main", mech_dir), ("pip install -e .", os.path.join(olas_dir, "benchmark")), ("pip install -e .", mech_dir), ("pip install lxml[html_clean]", base_dir), ("pip install -U huggingface-hub", base_dir), ] for command, cwd in commands: run_command(command, cwd=cwd) # add benchmark to the path sys.path.append(os.path.join(olas_dir, "benchmark")) # Download the dataset download_dataset() start()