File size: 3,717 Bytes
8ca00c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/bin/bash

#################################################
## TEMPLATE VERSION 1.01                       ##
#################################################
## ALL SBATCH COMMANDS WILL START WITH #SBATCH ##
## DO NOT REMOVE THE # SYMBOL                  ## 
#################################################

#SBATCH --nodes=1                   # How many nodes required? Usually 1
#SBATCH --cpus-per-task=10           # Number of CPU to request for the job
#SBATCH --mem=128GB                   # How much memory does your job require?
#SBATCH --gres=gpu:1                # Do you require GPUS? If not delete this line
#SBATCH --time=05-00:00:00          # How long to run the job for? Jobs exceed this time will be terminated
                                    # Format <DD-HH:MM:SS> eg. 5 days 05-00:00:00
                                    # Format <DD-HH:MM:SS> eg. 24 hours 1-00:00:00 or 24:00:00
#SBATCH --mail-type=BEGIN,END,FAIL  # When should you receive an email?
#SBATCH --output=%u.%j.out          # Where should the log files go?
                                    # You must provide an absolute path eg /common/home/module/username/
                                    # If no paths are provided, the output file will be placed in your current working directory
#SBATCH --requeue                   # Remove if you are not want the workload scheduler to requeue your job after preemption
#SBATCH --constraint=l40		# This tells the workload scheduler to provision you l40 nodes 

################################################################
## EDIT AFTER THIS LINE IF YOU ARE OKAY WITH DEFAULT SETTINGS ##
################################################################

# ================ Account parameters ================

# Description			| Value
# ---------------------------------------------
# Account name                    | tanahhweeresearch
# List of Assigned Partition      | researchlong researchshort tanahhweeresearch
# List of Assigned QOS            | research-1-qos tanahhweeresearch-priority
# ---------------------------------------------


#SBATCH --partition=researchlong                 # The partition you've been assigned
#SBATCH --account=tanahhweeresearch   # The account you've been assigned (normally student)
#SBATCH --qos=research-1-qos       # What is the QOS assigned to you? Check with myinfo command
#SBATCH --mail-user=haotian.hu.2021@scis.smu.edu.sg # Who should receive the email notifications
#SBATCH --job-name=1GPU_LLM_HT # Give the job a name

#################################################
##            END OF SBATCH COMMANDS           ##
#################################################

# Purge the environment, load the modules we require.
# Refer to https://violet.smu.edu.sg/origami/module/ for more information
module purge
module load Anaconda3/2022.05
module load CUDA/12.1.1

# Do not remove this line even if you have executed conda init
eval "$(conda shell.bash hook)"

# Create a virtual environment can be commented off if you already have a virtual environment
# conda create -n llm_ht python=3.11

# This command assumes that you've already created the environment previously
# We're using an absolute path here. You may use a relative path, as long as SRUN is execute in the same working directory
# conda activate tgi
conda activate llm_ht 

# If you require any packages, install it before the srun job submission.
# conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia

# Submit your job to the cluster
BASEDIR=$HOME/logical-reasoning/scripts
JOB=$1
echo "Submitting job: $BASEDIR/$JOB"
srun --gres=gpu:1 $BASEDIR/$JOB
# sbatch logical-reasoning/scripts/1gpu_llm_ht.sh tune-mgtv-qwen2_7b.sh