MiniGPT-4/train_configs/minigptv2_finetune.yaml

295 lines
5.3 KiB
YAML
Raw Normal View History

2023-10-22 18:37:45 +00:00
model:
arch: minigpt_v2
model_type: pretrain
max_txt_len: 1024
image_size: 448
end_sym: "</s>"
2023-10-25 04:52:44 +00:00
llama_model: "/path/to/llama_checkpoint"
ckpt: "/path/to/pretrained_checkpoint"
2023-10-22 18:37:45 +00:00
use_grad_checkpoint: True
chat_template: True
lora_r: 64
lora_alpha: 16
datasets:
multitask_conversation:
2023-10-23 04:05:27 +00:00
batch_size: 2
2023-10-22 18:37:45 +00:00
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 04:52:44 +00:00
sample_ratio: 50
2023-10-22 18:37:45 +00:00
2023-10-25 04:52:44 +00:00
llava_conversation:
2023-10-23 06:43:07 +00:00
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 04:52:44 +00:00
sample_ratio: 30
2023-10-23 06:43:07 +00:00
unnatural_instruction:
batch_size: 1
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 07:00:27 +00:00
sample_ratio: 10
2023-10-23 06:43:07 +00:00
refvg:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 40
2023-10-25 04:52:44 +00:00
llava_detail:
2023-10-23 06:43:07 +00:00
batch_size: 4
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 20
2023-10-25 04:52:44 +00:00
llava_reason:
2023-10-23 06:43:07 +00:00
batch_size: 4
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
2023-10-22 18:37:45 +00:00
2023-10-23 06:43:07 +00:00
flickr_grounded_caption:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
flickr_CaptionToPhrase:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
flickr_ObjectToPhrase:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
coco_caption:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
2023-10-22 18:37:45 +00:00
2023-10-23 06:43:07 +00:00
textcaps_caption: #
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 07:00:27 +00:00
sample_ratio: 30
2023-10-23 06:43:07 +00:00
2023-10-25 04:52:44 +00:00
refcoco:
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 07:00:27 +00:00
sample_ratio: 25
2023-10-23 06:43:07 +00:00
refcocop:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 07:00:27 +00:00
sample_ratio: 25
2023-10-23 06:43:07 +00:00
refcocog:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 07:00:27 +00:00
sample_ratio: 25
2023-10-23 06:43:07 +00:00
invrefcoco:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
invrefcocop:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
invrefcocog:
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
2023-10-25 04:52:44 +00:00
coco_vqa:
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 15
2023-10-25 04:52:44 +00:00
ok_vqa:
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 8
2023-10-25 04:52:44 +00:00
aok_vqa:
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 12
2023-10-25 04:52:44 +00:00
gqa:
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 04:52:44 +00:00
sample_ratio: 50
2023-10-23 06:43:07 +00:00
2023-10-25 04:52:44 +00:00
ocrvqa:
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 30
2023-10-22 18:37:45 +00:00
run:
task: image_text_pretrain
# optimizer
lr_sched: "linear_warmup_cosine_lr"
init_lr: 1e-5
2024-01-03 19:35:53 +00:00
min_lr: 1e-6
2023-10-22 18:37:45 +00:00
warmup_lr: 1e-6
weight_decay: 0.05
max_epoch: 50
num_workers: 6
warmup_steps: 1000
iters_per_epoch: 1000
seed: 42
2023-10-25 04:52:44 +00:00
output_dir: "/path/to/save_checkpoint"
2023-10-22 18:37:45 +00:00
amp: True
resume_ckpt_path: null
evaluate: False
train_splits: ["train"]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
wandb_log: True
2024-01-03 19:35:53 +00:00
job_name: minigptv2_finetune