MiniGPT-4/minigpt4/projects/minigpt/train/minigptv2_finetune.yaml

293 lines
5.9 KiB
YAML
Raw Normal View History

2023-10-22 18:37:45 +00:00
model:
arch: minigpt_v2
model_type: pretrain
max_txt_len: 1024
image_size: 448
end_sym: "</s>"
2023-10-27 08:39:56 +00:00
llama_model: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/llama_2_7b_chat"
ckpt: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/minigptv2/checkpoint_stage2.pth"
2023-10-22 18:37:45 +00:00
use_grad_checkpoint: True
chat_template: True
lora_r: 64
lora_alpha: 16
datasets:
2023-10-30 08:13:28 +00:00
# multitask_conversation: # in-house data 12171
# batch_size: 2
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 50
llava_conversation: # 56681
2023-10-23 06:43:07 +00:00
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 04:52:44 +00:00
sample_ratio: 30
2023-10-23 06:43:07 +00:00
2023-10-30 08:13:28 +00:00
unnatural_instruction: # pure text 65852
2023-10-23 06:43:07 +00:00
batch_size: 1
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 07:00:27 +00:00
sample_ratio: 10
2023-10-23 06:43:07 +00:00
2023-10-30 08:13:28 +00:00
# refvg: # [refer] return the location
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 40
2023-10-23 06:43:07 +00:00
2023-10-30 08:13:28 +00:00
llava_detail: # 23240
2023-10-23 06:43:07 +00:00
batch_size: 4
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 20
2023-10-30 08:13:28 +00:00
llava_reason: # 76643
2023-10-23 06:43:07 +00:00
batch_size: 4
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
2023-10-22 18:37:45 +00:00
2023-10-30 08:13:28 +00:00
# flickr_grounded_caption: # [grounding] : TODO
# batch_size: 2
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 80
# flickr_CaptionToPhrase: # [detection]
# batch_size: 2
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 80
# flickr_ObjectToPhrase: # [detection]
# batch_size: 2
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 80
coco_caption: # 414113 train
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 10
2023-10-22 18:37:45 +00:00
2023-10-30 08:13:28 +00:00
textcaps_caption: # 109765 train
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 07:00:27 +00:00
sample_ratio: 30
2023-10-23 06:43:07 +00:00
2023-10-30 08:13:28 +00:00
# refcoco:
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 25
# refcocop:
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 25
# refcocog:
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 25
# invrefcoco:
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 10
# invrefcocop:
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 10
# invrefcocog:
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 10
coco_vqa: # 658104
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 15
2023-10-30 08:13:28 +00:00
ok_vqa: # train, valid (9009, 5046)
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 8
2023-10-30 08:13:28 +00:00
aok_vqa: # (17056, 1145, 6702)
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 12
2023-10-30 08:13:28 +00:00
gqa: # (943000, 12578, 12578)
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
2023-10-25 04:52:44 +00:00
sample_ratio: 50
2023-10-23 06:43:07 +00:00
2023-10-30 08:13:28 +00:00
ocrvqa: # 207572
2023-10-23 06:43:07 +00:00
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 30
2023-10-22 18:37:45 +00:00
run:
task: image_text_pretrain
# optimizer
lr_sched: "linear_warmup_cosine_lr"
init_lr: 1e-5
min_lr: 8e-5
warmup_lr: 1e-6
weight_decay: 0.05
2023-11-23 05:39:24 +00:00
max_epoch: 1
2023-10-22 18:37:45 +00:00
num_workers: 6
warmup_steps: 1000
iters_per_epoch: 1000
seed: 42
2023-11-23 05:39:24 +00:00
output_dir: "/mnt/pfs-guan-ssai/nlu/wanghanzi/experiments/blip2/minigpt/v2/vqa_pretrain_3B_llama2_7b_chat_stage3_train_linear_lora_test_1030"
2023-10-22 18:37:45 +00:00
amp: True
resume_ckpt_path: null
evaluate: False
train_splits: ["train"]
2023-11-23 05:39:24 +00:00
valid_splits: ["val"]
test_splits: ["test"]
2023-10-22 18:37:45 +00:00
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
wandb_log: True
job_name: minigptv2_finetune