From aabb2431072bb484b58beaa2f2febf5c20e8102d Mon Sep 17 00:00:00 2001 From: junchen14 Date: Wed, 25 Oct 2023 09:43:44 +0300 Subject: [PATCH] remove jobs --- .gitignore | 2 +- jobs/sbatch_finetune.sh | 24 ------------------------ jobs/srun_test.sh | 30 ------------------------------ 3 files changed, 1 insertion(+), 55 deletions(-) delete mode 100644 jobs/sbatch_finetune.sh delete mode 100644 jobs/srun_test.sh diff --git a/.gitignore b/.gitignore index d143271..ddbd354 100755 --- a/.gitignore +++ b/.gitignore @@ -181,4 +181,4 @@ slurm* sbatch_generate* eval_data/ dataset/Evaluation.md -jupyter_notebook.slurm \ No newline at end of file +jupyter_notebook.slurm diff --git a/jobs/sbatch_finetune.sh b/jobs/sbatch_finetune.sh deleted file mode 100644 index b739d32..0000000 --- a/jobs/sbatch_finetune.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -#SBATCH --mem=400G # memory pool for all cores` -#SBATCH --time 24:00:00 # time, specify max time allocation` -#SBATCH --mail-type=END,FAIL # notifications for job done & fail` -#SBATCH --mail-user=jun.chen@kaust.edu.sa -#SBATCH --gres=gpu:4 -#SBATCH --cpus-per-gpu=8 -#SBATCH --reservation=A100 -#SBATCH --job-name=finetune_test -#SBATCH --output=/ibex/project/c2090/logs/fientune_test - -cd .. - -job_name=finetune_test -read LOWERPORT UPPERPORT < /proc/sys/net/ipv4/ip_local_port_range -while : -do - PORT="`shuf -i $LOWERPORT-$UPPERPORT -n 1`" - ss -lpn | grep -q ":$PORT " || break -done - - -torchrun --master-port ${PORT} --nproc-per-node 4 train.py --job_name=${job_name} --cfg-path train_configs/minigpt_v2_finetune.yaml - diff --git a/jobs/srun_test.sh b/jobs/srun_test.sh deleted file mode 100644 index ecd9ab3..0000000 --- a/jobs/srun_test.sh +++ /dev/null @@ -1,30 +0,0 @@ - -cd .. - -job_name=minigpt4_v2_test -read LOWERPORT UPPERPORT < /proc/sys/net/ipv4/ip_local_port_range -while : -do - PORT="`shuf -i $LOWERPORT-$UPPERPORT -n 1`" - ss -lpn | grep -q ":$PORT " || break -done - - -#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_llama2/336_final_v1_gqa.yaml - - -#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_llama2/448_final_v1_gqa_ablation2.yaml -torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs/minigpt_v2_finetune.yaml - -#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path finetune_conversation_ablation/conversation_v2_last_336_test.yaml - -#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_llama2/336_final_v1_13B.yaml - -# torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_final_ablations/448_v2_llama2.yaml -#accelerate launch train.py --job_name ${job_name} --cfg-path train_configs_final_ablations/336_v2_llama2.yaml - - -# torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_final_ablations/336_v2_llama2_clip_encoder.yaml - -#best_data_ratio_336_full_dataset_lr2e4_v1.yaml -