|
name: Self-hosted runner (past) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
on: |
|
workflow_call: |
|
inputs: |
|
framework: |
|
required: true |
|
type: string |
|
version: |
|
required: true |
|
type: string |
|
|
|
sha: |
|
default: 'main' |
|
required: false |
|
type: string |
|
|
|
env: |
|
HF_HOME: /mnt/cache |
|
TRANSFORMERS_IS_CI: yes |
|
OMP_NUM_THREADS: 8 |
|
MKL_NUM_THREADS: 8 |
|
RUN_SLOW: yes |
|
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} |
|
TF_FORCE_GPU_ALLOW_GROWTH: true |
|
RUN_PT_TF_CROSS_TESTS: 1 |
|
|
|
jobs: |
|
check_runner_status: |
|
name: Check Runner Status |
|
runs-on: ubuntu-latest |
|
steps: |
|
- name: Checkout transformers |
|
uses: actions/checkout@v3 |
|
with: |
|
fetch-depth: 2 |
|
|
|
- name: Check Runner Status |
|
run: python utils/check_self_hosted_runner.py --target_runners single-gpu-past-ci-runner-docker,multi-gpu-past-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
|
|
|
check_runners: |
|
name: Check Runners |
|
needs: check_runner_status |
|
strategy: |
|
matrix: |
|
machine_type: [single-gpu, multi-gpu] |
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} |
|
container: |
|
image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu |
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
steps: |
|
- name: NVIDIA-SMI |
|
run: | |
|
nvidia-smi |
|
|
|
setup: |
|
name: Setup |
|
needs: check_runners |
|
strategy: |
|
matrix: |
|
machine_type: [single-gpu, multi-gpu] |
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} |
|
container: |
|
image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu |
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
outputs: |
|
matrix: ${{ steps.set-matrix.outputs.matrix }} |
|
steps: |
|
- name: Update clone |
|
working-directory: /transformers |
|
run: git fetch && git checkout ${{ inputs.sha }} |
|
|
|
- name: Cleanup |
|
working-directory: /transformers |
|
run: | |
|
rm -rf tests/__pycache__ |
|
rm -rf tests/models/__pycache__ |
|
rm -rf reports |
|
|
|
- name: Show installed libraries and their versions |
|
working-directory: /transformers |
|
run: pip freeze |
|
|
|
- id: set-matrix |
|
working-directory: /transformers |
|
name: Identify models to test |
|
run: | |
|
cd tests |
|
echo "matrix=$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" >> $GITHUB_OUTPUT |
|
|
|
run_tests_single_gpu: |
|
name: Model tests |
|
strategy: |
|
fail-fast: false |
|
matrix: |
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }} |
|
machine_type: [single-gpu] |
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} |
|
container: |
|
image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu |
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
needs: setup |
|
steps: |
|
- name: Update clone |
|
working-directory: /transformers |
|
run: git fetch && git checkout ${{ inputs.sha }} |
|
|
|
- name: Echo folder ${{ matrix.folders }} |
|
shell: bash |
|
|
|
|
|
run: | |
|
echo "${{ matrix.folders }}" |
|
matrix_folders=${{ matrix.folders }} |
|
matrix_folders=${matrix_folders/'models/'/'models_'} |
|
echo "$matrix_folders" |
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV |
|
|
|
- name: NVIDIA-SMI |
|
run: | |
|
nvidia-smi |
|
|
|
- name: Environment |
|
working-directory: /transformers |
|
run: | |
|
python3 utils/print_env.py |
|
|
|
- name: Show installed libraries and their versions |
|
working-directory: /transformers |
|
run: pip freeze |
|
|
|
- name: Run all tests on GPU |
|
working-directory: /transformers |
|
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} |
|
|
|
- name: Failure short reports |
|
if: ${{ failure() }} |
|
continue-on-error: true |
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt |
|
|
|
- name: Save job name |
|
if: ${{ always() }} |
|
shell: bash |
|
run: | |
|
matrix_folders=${matrix_folders/'models_'/'models/'} |
|
job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})" |
|
echo "$job_name" |
|
echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt |
|
|
|
- name: Test suite reports artifacts |
|
if: ${{ always() }} |
|
uses: actions/upload-artifact@v3 |
|
with: |
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports |
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} |
|
|
|
run_tests_multi_gpu: |
|
name: Model tests |
|
strategy: |
|
fail-fast: false |
|
matrix: |
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }} |
|
machine_type: [multi-gpu] |
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} |
|
container: |
|
image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu |
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
needs: setup |
|
steps: |
|
- name: Update clone |
|
working-directory: /transformers |
|
run: git fetch && git checkout ${{ inputs.sha }} |
|
|
|
- name: Echo folder ${{ matrix.folders }} |
|
shell: bash |
|
|
|
|
|
run: | |
|
echo "${{ matrix.folders }}" |
|
matrix_folders=${{ matrix.folders }} |
|
matrix_folders=${matrix_folders/'models/'/'models_'} |
|
echo "$matrix_folders" |
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV |
|
|
|
- name: NVIDIA-SMI |
|
run: | |
|
nvidia-smi |
|
|
|
- name: Environment |
|
working-directory: /transformers |
|
run: | |
|
python3 utils/print_env.py |
|
|
|
- name: Show installed libraries and their versions |
|
working-directory: /transformers |
|
run: pip freeze |
|
|
|
- name: Run all tests on GPU |
|
working-directory: /transformers |
|
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} |
|
|
|
- name: Failure short reports |
|
if: ${{ failure() }} |
|
continue-on-error: true |
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt |
|
|
|
- name: Save job name |
|
if: ${{ always() }} |
|
shell: bash |
|
run: | |
|
matrix_folders=${matrix_folders/'models_'/'models/'} |
|
job_name="Model tests ($matrix_folders, ${{ matrix.machine_type }})" |
|
echo "$job_name" |
|
echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt |
|
|
|
- name: Test suite reports artifacts |
|
if: ${{ always() }} |
|
uses: actions/upload-artifact@v3 |
|
with: |
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports |
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} |
|
|
|
send_results: |
|
name: Send results to webhook |
|
runs-on: ubuntu-latest |
|
if: always() |
|
needs: [check_runner_status, check_runners, setup, run_tests_single_gpu, run_tests_multi_gpu] |
|
steps: |
|
- name: Preliminary job status |
|
shell: bash |
|
|
|
run: | |
|
echo "Runner availability: ${{ needs.check_runner_status.result }}" |
|
echo "Runner status: ${{ needs.check_runners.result }}" |
|
echo "Setup status: ${{ needs.setup.result }}" |
|
|
|
- uses: actions/checkout@v3 |
|
- uses: actions/download-artifact@v3 |
|
|
|
|
|
- name: Create directory |
|
run: mkdir test_failure_tables |
|
|
|
- name: Send message to Slack |
|
env: |
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} |
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} |
|
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} |
|
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} |
|
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }} |
|
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
|
CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }} |
|
RUNNER_STATUS: ${{ needs.check_runner_status.result }} |
|
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} |
|
SETUP_STATUS: ${{ needs.setup.result }} |
|
|
|
|
|
run: | |
|
pip install slack_sdk |
|
pip show slack_sdk |
|
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" |
|
|
|
|
|
- name: Failure table artifacts |
|
if: ${{ always() }} |
|
uses: actions/upload-artifact@v3 |
|
with: |
|
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }} |
|
path: test_failure_tables |