JCai commited on
Commit
6574024
1 Parent(s): 089e773

add files for cs2

Browse files
Files changed (4) hide show
  1. automated_deployment +0 -48
  2. automated_deployment.sh +22 -0
  3. requirements.txt +1 -1
  4. watcher.py +104 -0
automated_deployment DELETED
@@ -1,48 +0,0 @@
1
- #! /bin/bash
2
-
3
- PORT=21003
4
- MACHINE=paffenroth-23.dyn.wpi.edu
5
- STUDENT_ADMIN_KEY_PATH=kfkey
6
-
7
- #insert key into list of authorized keys
8
- cat kfkey.pub > authorized_keys
9
-
10
- #set permissions
11
- chmod 600 authorized_keys
12
-
13
- #remove default key
14
- rm -f student-admin_key*
15
-
16
- # Copy the authorized_keys file to the server
17
- scp -i student-admin_key -P ${PORT} -o StrictHostKeyChecking=no authorized_keys student-admin@${MACHINE}:~/.ssh/
18
-
19
- # Add the key to the ssh-agent
20
- eval "$(ssh-agent -s)"
21
- ssh-add mykey
22
-
23
- # Check the key file on the server
24
- echo "checking that the authorized_keys file is correct"
25
- ssh -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE} "cat ~/.ssh/authorized_keys"
26
-
27
- # clone the repo
28
- git clone https://github.com/rcpaffenroth/CS553_example
29
-
30
- # Copy the files to the server
31
- scp -P ${PORT} -o StrictHostKeyChecking=no -r CS553_example student-admin@${MACHINE}:~/
32
-
33
- # check that the code in installed and start up the product
34
- # COMMAND="ssh -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE}"
35
-
36
- # ${COMMAND} "ls CS553_example"
37
- # ${COMMAND} "sudo apt install -qq -y python3-venv"
38
- # ${COMMAND} "cd CS553_example && python3 -m venv venv"
39
- # ${COMMAND} "cd CS553_example && source venv/bin/activate && pip install -r requirements.txt"
40
- # ${COMMAND} "nohup CS553_example/venv/bin/python3 CS553_example/app.py > log.txt 2>&1 &"
41
-
42
- # nohup ./whatever > /dev/null 2>&1
43
-
44
- # debugging ideas
45
- # sudo apt-get install gh
46
- # gh auth login
47
- # requests.exceptions.HTTPError: 429 Client Error: Too Many Requests for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions
48
- # log.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
automated_deployment.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/bash
2
+
3
+ # Load environment variables from .env file
4
+ if [ -f .env ]; then
5
+ source .env # loads the variables into current shell session
6
+ else
7
+ echo ".env file not found"
8
+ exit 1
9
+ fi
10
+
11
+ # ssh into the vm
12
+ # check that the code in installed and start up the product
13
+ COMMAND="ssh -i ${STUDENT_ADMIN_KEY_PATH} -p ${PORT} -o StrictHostKeyChecking=no student-admin@${MACHINE}"
14
+
15
+ # clone the repo
16
+ ${COMMAND} "git clone https://github.com/jcai0o0/cs553-case-study-1.git"
17
+ ${COMMAND} "sudo apt install -qq -y python3-venv"
18
+ ${COMMAND} "cd cs553-case-study-1 && python3 -m venv cs2_venv"
19
+ ${COMMAND} "cd cs553-case-study-1 && source cs2_venv/bin/activate && pip install -r requirements.txt"
20
+ ${COMMAND} "cd cs553-case-study-1 && source cs2_venv/bin/activate && pip install -r requirements.txt"
21
+ ${COMMAND} "nohup cs553-case-study-1/cs2_venv/bin/python3 cs553-case-study-1/app.py > log.txt 2>&1 &"
22
+
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  --extra-index-url https://download.pytorch.org/whl/cpu
2
  huggingface_hub==0.23.*
3
- gradio==4.39.*
4
  torch==2.4.*
5
  transformers==4.43.*
6
  accelerate==0.33.*
 
1
  --extra-index-url https://download.pytorch.org/whl/cpu
2
  huggingface_hub==0.23.*
3
+ gradio==4.43.0
4
  torch==2.4.*
5
  transformers==4.43.*
6
  accelerate==0.33.*
watcher.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import time
4
+ import smtplib
5
+ from email.mime.text import MIMEText
6
+ from email.mime.multipart import MIMEMultipart
7
+ from dotenv import load_dotenv
8
+ import subprocess
9
+
10
+ load_dotenv()
11
+
12
+ # Settings
13
+ VM_IP = "paffenroth-23.dyn.wpi.edu"
14
+ GRADIO_UI_URL = "http://paffenroth-23.dyn.wpi.edu:8005/"
15
+ CHECK_INTERVAL = 60 # 60 seconds
16
+ RECOVERY_SCRIPT_PATH = 'automated_deployment.sh'
17
+
18
+
19
+ # Email settings (if EMAIL_ALERT is True)
20
+ EMAIL_SENDER = os.getenv("EMAIL_SENDER")
21
+ EMAIL_RECEIVER = os.getenv("EMAIL_RECEIVER")
22
+ EMAIL_PASSWORD = os.getenv("EMAIL_PASSWORD")
23
+
24
+
25
+ def send_email_alert(subject, body):
26
+ msg = MIMEMultipart()
27
+ msg['From'] = EMAIL_SENDER
28
+ msg['To'] = EMAIL_RECEIVER
29
+ msg['Subject'] = subject
30
+
31
+ msg.attach(MIMEText(body, 'plain'))
32
+
33
+ try:
34
+ with smtplib.SMTP('smtp.gmail.com', 587) as server:
35
+ server.starttls()
36
+ server.login(EMAIL_SENDER, EMAIL_PASSWORD)
37
+ text = msg.as_string()
38
+ server.sendmail(EMAIL_SENDER, EMAIL_RECEIVER, text)
39
+ print("Email alert sent successfully!")
40
+ except Exception as e:
41
+ print(f"Failed to send email alert: {e}")
42
+
43
+
44
+ def check_vm_status():
45
+ response = os.system(f"ping -c 1 {VM_IP}") # send one ICMP echo request
46
+ return response == 0 # successful ping
47
+
48
+
49
+ def check_ui_status():
50
+ try:
51
+ response = requests.get(GRADIO_UI_URL)
52
+ if response.status_code == 200:
53
+ return True
54
+ else:
55
+ print(f"UI returned status code {response.status_code}")
56
+ return False
57
+ except requests.ConnectionError:
58
+ print("Failed to connect to UI.")
59
+ return False
60
+
61
+
62
+ def monitor_vm(EMAIL_ALERT = False):
63
+ while True:
64
+ ui_status = check_ui_status()
65
+ # vm_status = check_vm_status()
66
+ if ui_status: # and vm_status:
67
+ print(f"{time.ctime()} ---- Meowthematical chatbot is up and running")
68
+ else:
69
+ if not ui_status:
70
+ print(f"{time.ctime()} ---- PRODUCT is DOWN!!!")
71
+ if check_vm_status(): # check if VM is up and running
72
+ print(f"{time.ctime()} ---- VM {VM_IP} is up and running, starting recovery process...")
73
+ try:
74
+ result = subprocess.run(['bash', RECOVERY_SCRIPT_PATH],
75
+ check=True,
76
+ capture_output=True,
77
+ text=True)
78
+ # Output from the script
79
+ # print("Output of the script:")
80
+ # print(result.stdout) # Standard output of the script
81
+ # print("Errors (if any):")
82
+ # print(result.stderr) # Standard error output of the script
83
+ except subprocess.CalledProcessError as e:
84
+ print(f"An error occurred: {e}")
85
+ print(f"Return code: {e.returncode}")
86
+ print(f"Output: {e.output}")
87
+ print(f"Error: {e.stderr}")
88
+
89
+ if EMAIL_ALERT:
90
+ send_email_alert(
91
+ subject="MEOWTHMATICAL Down Alert",
92
+ body=f"MEOWTHEMATICAL with IP {VM_IP} is down as of {time.ctime()}"
93
+ )
94
+ else: # both product and VM is gone
95
+ if EMAIL_ALERT:
96
+ send_email_alert(
97
+ subject="VM Down Alert",
98
+ body=f"Vitural Machine at IP {VM_IP} is down as of {time.ctime()}"
99
+ )
100
+ time.sleep(CHECK_INTERVAL)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ monitor_vm(EMAIL_ALERT=False)