mirror of
https://github.com/Vision-CAIR/MiniGPT-4.git
synced 2025-04-03 09:30:48 +00:00
remove jobs
This commit is contained in:
parent
507deb3edb
commit
aabb243107
2
.gitignore
vendored
2
.gitignore
vendored
@ -181,4 +181,4 @@ slurm*
|
||||
sbatch_generate*
|
||||
eval_data/
|
||||
dataset/Evaluation.md
|
||||
jupyter_notebook.slurm
|
||||
jupyter_notebook.slurm
|
||||
|
@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --mem=400G # memory pool for all cores`
|
||||
#SBATCH --time 24:00:00 # time, specify max time allocation`
|
||||
#SBATCH --mail-type=END,FAIL # notifications for job done & fail`
|
||||
#SBATCH --mail-user=jun.chen@kaust.edu.sa
|
||||
#SBATCH --gres=gpu:4
|
||||
#SBATCH --cpus-per-gpu=8
|
||||
#SBATCH --reservation=A100
|
||||
#SBATCH --job-name=finetune_test
|
||||
#SBATCH --output=/ibex/project/c2090/logs/fientune_test
|
||||
|
||||
cd ..
|
||||
|
||||
job_name=finetune_test
|
||||
read LOWERPORT UPPERPORT < /proc/sys/net/ipv4/ip_local_port_range
|
||||
while :
|
||||
do
|
||||
PORT="`shuf -i $LOWERPORT-$UPPERPORT -n 1`"
|
||||
ss -lpn | grep -q ":$PORT " || break
|
||||
done
|
||||
|
||||
|
||||
torchrun --master-port ${PORT} --nproc-per-node 4 train.py --job_name=${job_name} --cfg-path train_configs/minigpt_v2_finetune.yaml
|
||||
|
@ -1,30 +0,0 @@
|
||||
|
||||
cd ..
|
||||
|
||||
job_name=minigpt4_v2_test
|
||||
read LOWERPORT UPPERPORT < /proc/sys/net/ipv4/ip_local_port_range
|
||||
while :
|
||||
do
|
||||
PORT="`shuf -i $LOWERPORT-$UPPERPORT -n 1`"
|
||||
ss -lpn | grep -q ":$PORT " || break
|
||||
done
|
||||
|
||||
|
||||
#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_llama2/336_final_v1_gqa.yaml
|
||||
|
||||
|
||||
#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_llama2/448_final_v1_gqa_ablation2.yaml
|
||||
torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs/minigpt_v2_finetune.yaml
|
||||
|
||||
#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path finetune_conversation_ablation/conversation_v2_last_336_test.yaml
|
||||
|
||||
#torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_llama2/336_final_v1_13B.yaml
|
||||
|
||||
# torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_final_ablations/448_v2_llama2.yaml
|
||||
#accelerate launch train.py --job_name ${job_name} --cfg-path train_configs_final_ablations/336_v2_llama2.yaml
|
||||
|
||||
|
||||
# torchrun --master-port ${PORT} --nproc-per-node 2 train.py --job_name ${job_name} --cfg-path train_configs_final_ablations/336_v2_llama2_clip_encoder.yaml
|
||||
|
||||
#best_data_ratio_336_full_dataset_lr2e4_v1.yaml
|
||||
|
Loading…
Reference in New Issue
Block a user