MiniGPT-4/train_configs/minigpt_v2_finetune.yaml
2023-10-24 06:31:52 +03:00

300 lines
5.5 KiB
YAML

model:
arch: minigpt_v2
model_type: pretrain
freeze_vit: True
freeze_qformer: True
max_txt_len: 1024
low_resource: False
image_size: 448
end_sym: "</s>"
llama_model: "/ibex/project/c2133/llama_v2/llama-2-7b-chat-pytorch_update"
ckpt: "/ibex/project/c2090/minigpt4_ckpt/448_perforamnce_correct_v10_vg/20230925064/checkpoint_32.pth"
use_grad_checkpoint: True
chat_template: True
lora_r: 64
lora_alpha: 16
datasets:
multitask_conversation:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 40
llava_conversation: # 77k
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
unnatural_instruction:
batch_size: 1
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 15
refvg:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 40
llava_detail: #23K
batch_size: 4
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 20
llava_reason: # 77k
batch_size: 4
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
flickr_grounded_caption:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
flickr_CaptionToPhrase:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
flickr_ObjectToPhrase:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
coco_caption:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
textcaps_caption: #
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
refcoco: # 142k
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 15
refcocop:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 15
refcocog:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 15
invrefcoco:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
invrefcocop:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
invrefcocog:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
coco_vqa: # 82K
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 15
ok_vqa: # 9k
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 8
aok_vqa: # 17k
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 12
gqa: # 82K
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 40
ocrvqa: # 800K
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 30
run:
task: image_text_pretrain
# optimizer
lr_sched: "linear_warmup_cosine_lr"
init_lr: 1e-5
min_lr: 8e-5
warmup_lr: 1e-6
weight_decay: 0.05
max_epoch: 50
num_workers: 6
warmup_steps: 1000
iters_per_epoch: 1000
seed: 42
output_dir: "/ibex/project/c2090/minigpt4_ckpt/448_finetune_test_online"
amp: True
resume_ckpt_path: null
evaluate: False
train_splits: ["train"]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
wandb_log: True
job_name: minigptv2_finetune