MiniGPT-4/minigpt4/projects/minigpt/train/minigptv2_finetune_vqa.yaml
2023-12-01 23:17:44 +08:00

171 lines
3.6 KiB
YAML

model:
arch: minigpt_v2
model_type: pretrain
max_txt_len: 1024
image_size: 448
end_sym: "</s>"
llama_model: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/llama_2_7b_chat"
ckpt: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/minigptv2/checkpoint_stage2.pth"
use_grad_checkpoint: True
chat_template: True
lora_r: 64
lora_alpha: 16
datasets:
# llava_conversation: # 56681
# batch_size: 2
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 30
# unnatural_instruction: # pure text 65852
# batch_size: 1
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 10
# llava_detail: # 23240
# batch_size: 4
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 20
llava_reason: # 76643
batch_size: 4
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 80
# coco_caption: # 414113 train
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 10
# textcaps_caption: # 109765 train
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 30
# coco_vqa: # 658104
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 15
ok_vqa: # train, valid (9009, 5046)
batch_size: 6
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 8
# aok_vqa: # train: 17056, val: 1145
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 12
# gqa: # train: 943000, 12578, 12578)
# type: balanced_sft_raw
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 50
# ocrvqa: # train 207572
# batch_size: 6
# vis_processor:
# train:
# name: "blip2_image_train"
# image_size: 448
# text_processor:
# train:
# name: "blip_caption"
# sample_ratio: 30
run:
task: image_text_pretrain
# optimizer
lr_sched: "linear_warmup_cosine_lr"
init_lr: 1e-5
min_lr: 8e-5
warmup_lr: 1e-6
weight_decay: 0.05
max_epoch: 1
num_workers: 6
warmup_steps: 1000
iters_per_epoch: 1000
seed: 42
output_dir: "/mnt/pfs-guan-ssai/nlu/wanghanzi/experiments/blip2/minigpt/v2/vqa_pretrain_3B_llama2_7b_chat_stage3_train_linear_lora_test_1030"
amp: True
resume_ckpt_path: null
evaluate: False
train_splits: ["train"]
valid_splits: ["val"]
test_splits: ["test"]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
wandb_log: False
job_name: minigptv2_finetune