soutrik commited on
Commit
8149d3a
·
1 Parent(s): 70d861c

pushed main pipeline

Browse files
.github/workflows/hf_deploy.yaml CHANGED
@@ -6,6 +6,8 @@ on:
6
  - main
7
  - feat/litserve_gpu_gradio
8
 
 
 
9
  jobs:
10
  sync-to-hub:
11
  runs-on: ubuntu-latest
 
6
  - main
7
  - feat/litserve_gpu_gradio
8
 
9
+ workflow_dispatch:
10
+
11
  jobs:
12
  sync-to-hub:
13
  runs-on: ubuntu-latest
.github/workflows/main_cd.yaml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy PyTorch Training with all advanced features like self-hosted EC2 runner, Docker Buildx, Amazon ECR, Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ start-runner:
11
+ name: Start self-hosted EC2 runner
12
+ runs-on: ubuntu-latest
13
+ outputs:
14
+ label: ${{ steps.start-ec2-runner.outputs.label }}
15
+ ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
16
+ steps:
17
+ - name: Configure AWS credentials
18
+ uses: aws-actions/configure-aws-credentials@v4
19
+ with:
20
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
21
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
22
+ aws-region: ${{ secrets.AWS_REGION }}
23
+
24
+ - name: Start EC2 runner
25
+ id: start-ec2-runner
26
+ uses: machulav/ec2-github-runner@v2
27
+ with:
28
+ mode: start
29
+ github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
30
+ ec2-image-id: ami-0deffac7fd1fd3c70
31
+ ec2-instance-type: g4dn.xlarge
32
+ subnet-id: subnet-0665e4d868c759e3f
33
+ security-group-id: sg-0596459527ead65c8
34
+
35
+ deploy:
36
+ name: Deploy PyTorch Training Pipeline
37
+ needs: start-runner
38
+ runs-on: ${{ needs.start-runner.outputs.label }}
39
+ steps:
40
+ - name: Checkout repository
41
+ uses: actions/checkout@v4
42
+
43
+ - name: Set up Docker Buildx
44
+ uses: docker/setup-buildx-action@v3
45
+
46
+ - name: Configure AWS credentials
47
+ uses: aws-actions/configure-aws-credentials@v4
48
+ with:
49
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
50
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
51
+ aws-region: ${{ secrets.AWS_REGION }}
52
+
53
+ - name: Cache Docker layers
54
+ uses: actions/cache@v3
55
+ with:
56
+ path: /tmp/.buildx-cache
57
+ key: ${{ runner.os }}-docker-${{ github.sha }}
58
+ restore-keys: |
59
+ ${{ runner.os }}-docker-
60
+
61
+ - name: Log in to Amazon ECR
62
+ id: login-ecr
63
+ uses: aws-actions/amazon-ecr-login@v2
64
+
65
+ - name: Create .env file
66
+ run: |
67
+ echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> .env
68
+ echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> .env
69
+ echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> .env
70
+ echo "::add-mask::${{ secrets.AWS_ACCESS_KEY_ID }}"
71
+ echo "::add-mask::${{ secrets.AWS_SECRET_ACCESS_KEY }}"
72
+
73
+ - name: Run Docker Compose for all services
74
+ run: |
75
+ docker-compose build --no-cache
76
+ docker-compose up -d train eval
77
+ docker-compose logs --follow train eval
78
+ docker-compose down --remove-orphans
79
+
80
+ - name: Build, tag, and push Docker image to Amazon ECR
81
+ env:
82
+ REGISTRY: ${{ steps.login-ecr.outputs.registry }}
83
+ REPOSITORY: soutrik71/mnist
84
+ IMAGE_TAG: ${{ github.sha }}
85
+ run: |
86
+ docker build -t $REGISTRY/$REPOSITORY:$IMAGE_TAG .
87
+ docker push $REGISTRY/$REPOSITORY:$IMAGE_TAG
88
+ docker tag $REGISTRY/$REPOSITORY:$IMAGE_TAG $REGISTRY/$REPOSITORY:latest
89
+ docker push $REGISTRY/$REPOSITORY:latest
90
+
91
+ - name: Pull Docker image from ECR and verify
92
+ env:
93
+ REGISTRY: ${{ steps.login-ecr.outputs.registry }}
94
+ REPOSITORY: soutrik71/mnist
95
+ IMAGE_TAG: ${{ github.sha }}
96
+ run: |
97
+ docker pull $REGISTRY/$REPOSITORY:$IMAGE_TAG
98
+ docker images | grep "$REGISTRY/$REPOSITORY"
99
+
100
+ - name: Clean up environment
101
+ run: |
102
+ rm -f .env
103
+ docker system prune -af --volumes
104
+
105
+ stop-runner:
106
+ name: Stop self-hosted EC2 runner
107
+ needs:
108
+ - start-runner
109
+ - deploy
110
+ runs-on: ubuntu-latest
111
+ if: ${{ always() }}
112
+ steps:
113
+ - name: Configure AWS credentials
114
+ uses: aws-actions/configure-aws-credentials@v4
115
+ with:
116
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
117
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
118
+ aws-region: ${{ secrets.AWS_REGION }}
119
+
120
+ - name: Stop EC2 runner
121
+ uses: machulav/ec2-github-runner@v2
122
+ with:
123
+ mode: stop
124
+ github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
125
+ label: ${{ needs.start-runner.outputs.label }}
126
+ ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
127
+
128
+ - name: Validate EC2 termination
129
+ run: |
130
+ aws ec2 describe-instances --instance-ids ${{ needs.start-runner.outputs.ec2-instance-id }} \
131
+ --query "Reservations[].Instances[].State.Name" --output text | grep "terminated" || echo "Runner not terminated."