#!/bin/bash ################################################# ## TEMPLATE VERSION 1.01 ## ################################################# ## ALL SBATCH COMMANDS WILL START WITH #SBATCH ## ## DO NOT REMOVE THE # SYMBOL ## ################################################# #SBATCH --nodes=1 # How many nodes required? Usually 1 #SBATCH --cpus-per-task=10 # Number of CPU to request for the job #SBATCH --mem=128GB # How much memory does your job require? #SBATCH --gres=gpu:1 # Do you require GPUS? If not delete this line #SBATCH --time=05-00:00:00 # How long to run the job for? Jobs exceed this time will be terminated # Format eg. 5 days 05-00:00:00 # Format eg. 24 hours 1-00:00:00 or 24:00:00 #SBATCH --mail-type=BEGIN,END,FAIL # When should you receive an email? #SBATCH --output=%u.%j.out # Where should the log files go? # You must provide an absolute path eg /common/home/module/username/ # If no paths are provided, the output file will be placed in your current working directory #SBATCH --requeue # Remove if you are not want the workload scheduler to requeue your job after preemption #SBATCH --constraint=l40 # This tells the workload scheduler to provision you l40 nodes ################################################################ ## EDIT AFTER THIS LINE IF YOU ARE OKAY WITH DEFAULT SETTINGS ## ################################################################ # ================ Account parameters ================ # Description | Value # --------------------------------------------- # Account name | tanahhweeresearch # List of Assigned Partition | researchlong researchshort tanahhweeresearch # List of Assigned QOS | research-1-qos tanahhweeresearch-priority # --------------------------------------------- #SBATCH --partition=researchlong # The partition you've been assigned #SBATCH --account=tanahhweeresearch # The account you've been assigned (normally student) #SBATCH --qos=research-1-qos # What is the QOS assigned to you? Check with myinfo command #SBATCH --mail-user=haotian.hu.2021@scis.smu.edu.sg # Who should receive the email notifications #SBATCH --job-name=1GPU_LLM_HT # Give the job a name ################################################# ## END OF SBATCH COMMANDS ## ################################################# # Purge the environment, load the modules we require. # Refer to https://violet.smu.edu.sg/origami/module/ for more information module purge module load Anaconda3/2022.05 module load CUDA/12.1.1 # Do not remove this line even if you have executed conda init eval "$(conda shell.bash hook)" # Create a virtual environment can be commented off if you already have a virtual environment # conda create -n llm_ht python=3.11 # This command assumes that you've already created the environment previously # We're using an absolute path here. You may use a relative path, as long as SRUN is execute in the same working directory # conda activate tgi conda activate llm_ht # If you require any packages, install it before the srun job submission. # conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia # Submit your job to the cluster BASEDIR=$HOME/logical-reasoning/scripts JOB=$1 echo "Submitting job: $BASEDIR/$JOB" srun --gres=gpu:1 $BASEDIR/$JOB # sbatch logical-reasoning/scripts/1gpu_llm_ht.sh tune-mgtv-qwen2_7b.sh