update dataset readme

This commit is contained in:
junchen14 2023-10-25 07:52:44 +03:00
parent f976c7800f
commit 89878d661e
28 changed files with 114 additions and 185 deletions

2
.gitignore vendored
View File

@ -179,3 +179,5 @@ jobs/
*.slurm *.slurm
slurm* slurm*
sbatch_generate* sbatch_generate*
eval_data/
dataset/Evaluation.md

22
MiniGPTv2_Train .md Normal file
View File

@ -0,0 +1,22 @@
## Finetune of MiniGPT-4
The training of MiniGPT-4 contains two alignment stages.
**1. First pretraining stage**
In the first pretrained stage, the model is trained using image-text pairs from Laion and CC datasets
to align the vision and language model. To download and prepare the datasets, please check
our [first stage dataset preparation instruction](dataset/README_1_STAGE.md).
After the first stage, the visual features are mapped and can be understood by the language
model.
To launch the first stage training, run the following command. In our experiments, we use 4 A100.
You can change the save path in the config file
[train_configs/minigpt4_stage1_pretrain.yaml](train_configs/minigpt4_stage1_pretrain.yaml)
```bash
torchrun --nproc-per-node NUM_GPU train.py --cfg-path train_configs/minigpt4_stage1_pretrain.yaml
```
A MiniGPT-4 checkpoint with only stage one training can be downloaded
[here (13B)](https://drive.google.com/file/d/1u9FRRBB3VovP1HxCAlpD9Lw4t4P6-Yq8/view?usp=share_link) or [here (7B)](https://drive.google.com/file/d/1HihQtCEXUyBM1i9DQbaK934wW3TZi-h5/view?usp=share_link).
Compared to the model after stage two, this checkpoint generate incomplete and repeated sentences frequently.

View File

@ -93,9 +93,10 @@ Then, set the variable *llama_model* in the model config file to the LLM weight
Download the pretrained model checkpoints Download the pretrained model checkpoints
| MiniGPT-v2 (LLaMA-2 Chat 7B) | | MiniGPT-v2 (developing model (online demo)) | MiniGPT-v2 (after stage-2) | MiniGPT-v2 (after stage-3)
|------------------------------| |------------------------------|------------------------------|------------------------------|
| [Download](https://drive.google.com/file/d/1aVbfW7nkCSYx99_vCRyP1sOlQiWVSnAl/view?usp=sharing) | | [Download](https://drive.google.com/file/d/1aVbfW7nkCSYx99_vCRyP1sOlQiWVSnAl/view?usp=sharing) |[Download](https://drive.google.com/file/d/1Vi_E7ZtZXRAQcyz4f8E6LtLh2UXABCmu/view?usp=sharing) |[Download](https://drive.google.com/file/d/1jAbxUiyl04SFJMN4sF1vvUU69Etuz4qa/view?usp=sharing) |
For **MiniGPT-v2**, set the path to the pretrained checkpoint in the evaluation config file For **MiniGPT-v2**, set the path to the pretrained checkpoint in the evaluation config file
in [eval_configs/minigptv2_eval.yaml](eval_configs/minigptv2_eval.yaml#L10) at Line 8. in [eval_configs/minigptv2_eval.yaml](eval_configs/minigptv2_eval.yaml#L10) at Line 8.
@ -146,6 +147,7 @@ Thanks [@WangRongsheng](https://github.com/WangRongsheng), you can also run Mini
### Training ### Training
For training details of MiniGPT-4, check [here](MiniGPT4_Train.md). For training details of MiniGPT-4, check [here](MiniGPT4_Train.md).
For finetuning details of MiniGPT-v2, check [here](MiniGPTv2_Train.md)

View File

@ -26,11 +26,22 @@ LLaVA | <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/
### COCO captions ### COCO captions
Download the COCO 2014 images and captions Download the COCO 2014 images and captions
coco 2014 images path
```
${MINIGPTv2_DATASET}
├── coco
│ ├── images
│ ...
...
```
coco caption annotation path
``` ```
${MINIGPTv2_DATASET} ${MINIGPTv2_DATASET}
├── coco_captions ├── coco_captions
│ ├── coco_images
│ └── annotations │ └── annotations
│ ├── coco_karpathy_train.json │ ├── coco_karpathy_train.json
│ ... │ ...

View File

@ -10,20 +10,11 @@ datasets:
build_info: build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing # Be careful not to append minus sign (-) before split to avoid itemizing
# annotations:
# train:
# url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json
# storage:
# - /path/to/aokvqa/annotations/aokvqa_v1p0_train.json
# images:
# storage: /path/to/coco/images/
annotations: annotations:
train: train:
url: url:
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json
storage: storage:
- /ibex/project/c2090/minigptv2_dataset/aokvqa/aokvqa_v1p0_train.json - /path/to/aokvqa_v1p0_train.json
images: images:
storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/ storage: /path/to/coco/images

View File

@ -9,30 +9,13 @@ datasets:
# data_dir: ${env.data_dir}/datasets # data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features] data_type: images # [images|videos|features]
# build_info:
# # Be careful not to append minus sign (-) before split to avoid itemizing
# annotations:
# train:
# url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json
# md5: aa31ac474cf6250ebb81d18348a07ed8
# storage: /path/to/coco_caption/annotations/coco_karpathy_train.json
# images:
# storage: /path/to/coco/images/
build_info: build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing # Be careful not to append minus sign (-) before split to avoid itemizing
annotations: annotations:
train: train:
url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json
md5: aa31ac474cf6250ebb81d18348a07ed8 md5: aa31ac474cf6250ebb81d18348a07ed8
storage: /ibex/project/c2090/minigptv2_dataset/coco_captions/coco_karpathy_train.json storage: /path/to/coco_caption/coco_karpathy_train.json
# val:
# url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json
# md5: b273847456ef5580e33713b1f7de52a0
# storage: /ibex/project/c2133/minigpt4_v2_dataset/coco_caption/annotations/coco_karpathy_val.json
# test:
# url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_test.json
# md5: 3ff34b0ef2db02d01c37399f6a2a6cd1
# storage: /ibex/project/c2133/minigpt4_v2_dataset/coco_caption/annotations/coco_karpathy_test.json
images: images:
storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg storage: /path/to/coco/images

View File

@ -10,24 +10,15 @@ datasets:
build_info: build_info:
# annotations:
# train:
# url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json
# storage:
# - /path/to/vqav2/annotations/vqa_train.json
# - /path/to/vqav2/coco/annotations/vqa_val.json
# images:
# storage: /path/to/coco/images/
annotations: annotations:
train: train:
url: url:
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json
storage: storage:
- /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_train.json - /path/to/vqav2/vqa_train.json
- /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_val.json - /path/to/vqav2/vqa_val.json
images: images:
storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg storage: /path/to/coco/images

View File

@ -2,7 +2,7 @@ datasets:
invrefcoco: invrefcoco:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations ann_path: /path/to/refcoco_annotations
dataset: invrefcoco dataset: invrefcoco
splitBy: unc splitBy: unc

View File

@ -2,7 +2,7 @@ datasets:
invrefcocog: invrefcocog:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations ann_path: /path/to/refcoco_annotations
dataset: invrefcocog dataset: invrefcocog
splitBy: umd splitBy: umd

View File

@ -2,7 +2,7 @@ datasets:
invrefcocop: invrefcocop:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations ann_path: /path/to/refcoco_annotations
dataset: invrefcoco+ dataset: invrefcoco+
splitBy: unc splitBy: unc

View File

@ -2,7 +2,7 @@ datasets:
refcoco: refcoco:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations ann_path: /path/to/refcoco_annotations
dataset: refcoco dataset: refcoco
splitBy: unc splitBy: unc

View File

@ -2,7 +2,7 @@ datasets:
refcocog: refcocog:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations ann_path: /path/to/refcoco_annotations
dataset: refcocog dataset: refcocog
splitBy: umd splitBy: umd

View File

@ -2,7 +2,7 @@ datasets:
refcocop: refcocop:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations ann_path: /path/to/refcoco_annotations
dataset: refcoco+ dataset: refcoco+
splitBy: unc splitBy: unc

View File

@ -2,5 +2,5 @@ datasets:
flickr_CaptionToPhrase: flickr_CaptionToPhrase:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images image_path: /path/to/filtered_flikcr/images
ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/captiontobbox.json ann_path: /path/to/filtered_flickr/captiontobbox.json

View File

@ -2,5 +2,5 @@ datasets:
flickr_grounded_caption: flickr_grounded_caption:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images image_path: /path/to/filtered_flikcr/images
ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/groundedcaption.json ann_path: /path/to/filtered_flikcr/groundedcaption.json

View File

@ -2,5 +2,5 @@ datasets:
flickr_ObjectToPhrase: flickr_ObjectToPhrase:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images image_path: /path/to/filtered_flikcr/images
ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/phrasetobbox.json ann_path: /path/to/filtered_flikcr/phrasetobbox.json

View File

@ -8,19 +8,6 @@ datasets:
# data_dir: ${env.data_dir}/datasets # data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features] data_type: images # [images|videos|features]
# build_info:
# # Be careful not to append minus sign (-) before split to avoid itemizing
# annotations:
# train:
# url:
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/train_balanced_questions.json
# storage:
# - /path/to/gqa/annotations/train_balanced_questions.json
# images:
# storage: /path/to/gqa/images/
build_info: build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing # Be careful not to append minus sign (-) before split to avoid itemizing
annotations: annotations:
@ -28,6 +15,7 @@ datasets:
url: url:
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/train_balanced_questions.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/gqa/train_balanced_questions.json
storage: storage:
- /ibex/project/c2090/minigptv2_dataset/gqa/train_balanced_questions.json - /path/to/gqa/train_balanced_questions.json
images: images:
storage: /ibex/project/c2090/minigptv2_dataset/gqa/images storage: /path/to/gqa/images

View File

@ -1,12 +1,7 @@
datasets: datasets:
# llava_conversation:
# data_type: images
# build_info:
# image_path: /path/to/coco/images
# ann_path: /path/to/llava/conversation_58k.json
llava_conversation: llava_conversation:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/llava/conversation_58k.json ann_path: /path/to/llava/conversation_58k.json

View File

@ -1,12 +1,6 @@
datasets: datasets:
# llava_detail:
# data_type: images
# build_info:
# image_path: /path/to/coco/images
# ann_path: /path/to/llava/detail_23k.json
llava_detail: llava_detail:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/llava/detail_23k.json ann_path: /path/to/llava/detail_23k.json

View File

@ -1,12 +1,7 @@
datasets: datasets:
# llava_reason:
# data_type: images
# build_info:
# image_path: /path/to/coco/images
# ann_path: /path/to/llava/complex_reasoning_77k.json
llava_reason: llava_reason:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/llava/complex_reasoning_77k.json ann_path: /path/to/llava/complex_reasoning_77k.json

View File

@ -1,14 +1,7 @@
datasets: datasets:
# multitask_conversation:
# data_type: images
# build_info:
# image_path: /path/to/coco/images
# ann_path: /path/to/multitask_conversation/multi_task_conversation.json
multitask_conversation: multitask_conversation:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /path/to/coco/images
ann_path: /ibex/project/c2090/minigptv2_dataset/multitask_conversation/multi_task_conversation.json ann_path: /path/to/multitask_conversation/multi_task_conversation.json

View File

@ -1,10 +1,5 @@
datasets: datasets:
# unnatural_instruction:
# data_type: text
# build_info:
# ann_path: /path/to/unnatural-instructions/data/unnatural_instruction_filer.json
unnatural_instruction: unnatural_instruction:
data_type: text data_type: text
build_info: build_info:
ann_path: /ibex/project/c2090/minigptv2_dataset/unnatural_instructions/unnatural_instruction_filer.json ann_path: /path/to/unnatural_instructions/filtered_unnatural_instruction.json

View File

@ -1,12 +1,6 @@
datasets: datasets:
# ocrvqa:
# data_type: images
# build_info:
# image_path: /ibex/project/c2133/minigpt4_v2_dataset/ocrvqa/images
# ann_path: /ibex/project/c2133/minigpt4_v2_dataset/ocrvqa/dataset.json
ocrvqa: ocrvqa:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2090/minigptv2_dataset/ocrvqa/images image_path: /path/to/ocrvqa/images
ann_path: /ibex/project/c2090/minigptv2_dataset/ocrvqa/dataset.json ann_path: /path/to/ocrvqa/dataset.json

View File

@ -8,19 +8,6 @@ datasets:
# data_dir: ${env.data_dir}/datasets # data_dir: ${env.data_dir}/datasets
data_type: images # [images|videos|features] data_type: images # [images|videos|features]
# build_info:
# # Be careful not to append minus sign (-) before split to avoid itemizing
# annotations:
# train:
# url:
# # TODO make this order insensitive
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json
# storage:
# - /path/to/okvqa/annotations/okvqa_train.json
# images:
# storage: /path/to/okvqa/images
build_info: build_info:
# Be careful not to append minus sign (-) before split to avoid itemizing # Be careful not to append minus sign (-) before split to avoid itemizing
annotations: annotations:
@ -28,9 +15,7 @@ datasets:
url: url:
# TODO make this order insensitive # TODO make this order insensitive
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/OpenEnded_mscoco_train2014_questions.json
# - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/mscoco_train2014_annotations.json
storage: storage:
- /ibex/project/c2090/minigptv2_dataset/okvqa/okvqa_train.json - /path/to/okvqa/okvqa_train.json
images: images:
storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg storage: /path/to/coco/images

View File

@ -1,16 +1,9 @@
datasets: datasets:
# textcaps_caption:
# data_type: images
# build_info:
# image_path: /path/to/TextCaps/train_images
# ann_path: /path/to/TextCaps/TextCaps_0.1_train.json
textcaps_caption: textcaps_caption:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2090/minigptv2_dataset/textcaps/train_images image_path: /path/to/textcaps/train_images
ann_path: /ibex/project/c2090/minigptv2_dataset/textcaps/TextCaps_0.1_train.json ann_path: /path/to/textcaps/TextCaps_0.1_train.json

View File

@ -1,10 +1,5 @@
datasets: datasets:
# refvg:
# data_type: images
# build_info:
# data_dir: /path/to/visual_genome
refvg: refvg:
data_type: images data_type: images
build_info: build_info:
data_dir: /ibex/project/c2090/minigptv2_dataset/visual_genome data_dir: /path/to/visual_genome

View File

@ -101,6 +101,9 @@ class CaptionToObjectDataset(Dataset):
instruction = "<Img><ImageHere></Img> {} ".format(instruction) instruction = "<Img><ImageHere></Img> {} ".format(instruction)
print("CaptionToObject instruction", instruction)
print("CaptionToObject answer", answer)
return { return {
"image": image, "image": image,
"instruction_input": instruction, "instruction_input": instruction,
@ -145,6 +148,9 @@ class PhraseToObjectDataset(Dataset):
instruction = "<Img><ImageHere></Img> {} ".format(instruction) instruction = "<Img><ImageHere></Img> {} ".format(instruction)
print("PhraseToObject instruction", instruction)
print("PhraseToObject answer", answer)
return { return {
"image": image, "image": image,
"instruction_input": instruction, "instruction_input": instruction,

View File

@ -1,22 +1,17 @@
model: model:
arch: minigpt_v2 arch: minigpt_v2
model_type: pretrain model_type: pretrain
freeze_vit: True
freeze_qformer: True
max_txt_len: 1024 max_txt_len: 1024
low_resource: False
image_size: 448 image_size: 448
end_sym: "</s>" end_sym: "</s>"
llama_model: "/ibex/project/c2133/llama_v2/llama-2-7b-chat-pytorch_update" llama_model: "/path/to/llama_checkpoint"
ckpt: "/ibex/project/c2090/minigpt4_ckpt/448_perforamnce_correct_v10_vg/20230925064/checkpoint_32.pth" ckpt: "/path/to/pretrained_checkpoint"
use_grad_checkpoint: True use_grad_checkpoint: True
chat_template: True chat_template: True
lora_r: 64 lora_r: 64
lora_alpha: 16 lora_alpha: 16
datasets: datasets:
multitask_conversation: multitask_conversation:
batch_size: 2 batch_size: 2
vis_processor: vis_processor:
@ -26,9 +21,9 @@ datasets:
text_processor: text_processor:
train: train:
name: "blip_caption" name: "blip_caption"
sample_ratio: 40 sample_ratio: 50
llava_conversation: # 77k llava_conversation:
batch_size: 2 batch_size: 2
vis_processor: vis_processor:
train: train:
@ -37,8 +32,7 @@ datasets:
text_processor: text_processor:
train: train:
name: "blip_caption" name: "blip_caption"
sample_ratio: 10 sample_ratio: 30
unnatural_instruction: unnatural_instruction:
batch_size: 1 batch_size: 1
@ -49,7 +43,7 @@ datasets:
text_processor: text_processor:
train: train:
name: "blip_caption" name: "blip_caption"
sample_ratio: 15 sample_ratio: 5
refvg: refvg:
@ -63,7 +57,7 @@ datasets:
name: "blip_caption" name: "blip_caption"
sample_ratio: 40 sample_ratio: 40
llava_detail: #23K llava_detail:
batch_size: 4 batch_size: 4
vis_processor: vis_processor:
train: train:
@ -74,7 +68,7 @@ datasets:
name: "blip_caption" name: "blip_caption"
sample_ratio: 20 sample_ratio: 20
llava_reason: # 77k llava_reason:
batch_size: 4 batch_size: 4
vis_processor: vis_processor:
train: train:
@ -142,7 +136,7 @@ datasets:
name: "blip_caption" name: "blip_caption"
sample_ratio: 10 sample_ratio: 10
refcoco: # 142k refcoco:
batch_size: 6 batch_size: 6
vis_processor: vis_processor:
train: train:
@ -151,7 +145,7 @@ datasets:
text_processor: text_processor:
train: train:
name: "blip_caption" name: "blip_caption"
sample_ratio: 15 sample_ratio: 20
refcocop: refcocop:
@ -163,7 +157,7 @@ datasets:
text_processor: text_processor:
train: train:
name: "blip_caption" name: "blip_caption"
sample_ratio: 15 sample_ratio: 20
refcocog: refcocog:
batch_size: 6 batch_size: 6
@ -174,7 +168,7 @@ datasets:
text_processor: text_processor:
train: train:
name: "blip_caption" name: "blip_caption"
sample_ratio: 15 sample_ratio: 20
@ -212,7 +206,7 @@ datasets:
sample_ratio: 10 sample_ratio: 10
coco_vqa: # 82K coco_vqa:
batch_size: 6 batch_size: 6
vis_processor: vis_processor:
train: train:
@ -223,7 +217,7 @@ datasets:
name: "blip_caption" name: "blip_caption"
sample_ratio: 15 sample_ratio: 15
ok_vqa: # 9k ok_vqa:
batch_size: 6 batch_size: 6
vis_processor: vis_processor:
train: train:
@ -234,7 +228,7 @@ datasets:
name: "blip_caption" name: "blip_caption"
sample_ratio: 8 sample_ratio: 8
aok_vqa: # 17k aok_vqa:
batch_size: 6 batch_size: 6
vis_processor: vis_processor:
train: train:
@ -245,7 +239,7 @@ datasets:
name: "blip_caption" name: "blip_caption"
sample_ratio: 12 sample_ratio: 12
gqa: # 82K gqa:
batch_size: 6 batch_size: 6
vis_processor: vis_processor:
train: train:
@ -254,9 +248,9 @@ datasets:
text_processor: text_processor:
train: train:
name: "blip_caption" name: "blip_caption"
sample_ratio: 40 sample_ratio: 50
ocrvqa: # 800K ocrvqa:
batch_size: 6 batch_size: 6
vis_processor: vis_processor:
train: train:
@ -283,7 +277,7 @@ run:
iters_per_epoch: 1000 iters_per_epoch: 1000
seed: 42 seed: 42
output_dir: "/ibex/project/c2090/minigpt4_ckpt/448_finetune_test_online" output_dir: "/path/to/save_checkpoint"
amp: True amp: True
resume_ckpt_path: null resume_ckpt_path: null