From 89878d661ea66417b2e2bdfa7ebba52a5f8ce453 Mon Sep 17 00:00:00 2001 From: junchen14 Date: Wed, 25 Oct 2023 07:52:44 +0300 Subject: [PATCH] update dataset readme --- .gitignore | 4 +- MiniGPTv2_Train .md | 22 ++++++++++ README.md | 8 ++-- dataset/README_MINIGPTv2_FINETUNE.md | 13 +++++- .../configs/datasets/aokvqa/defaults.yaml | 13 +----- minigpt4/configs/datasets/coco/caption.yaml | 23 ++-------- .../configs/datasets/coco/defaults_vqa.yaml | 19 +++----- .../datasets/coco_bbox/invrefcoco.yaml | 4 +- .../datasets/coco_bbox/invrefcocog.yaml | 4 +- .../datasets/coco_bbox/invrefcocop.yaml | 4 +- .../configs/datasets/coco_bbox/refcoco.yaml | 4 +- .../configs/datasets/coco_bbox/refcocog.yaml | 4 +- .../configs/datasets/coco_bbox/refcocop.yaml | 4 +- .../datasets/flickr/caption_to_phrase.yaml | 4 +- minigpt4/configs/datasets/flickr/default.yaml | 4 +- .../datasets/flickr/object_to_phrase.yaml | 4 +- .../configs/datasets/gqa/balanced_val.yaml | 18 ++------ .../configs/datasets/llava/conversation.yaml | 9 +--- minigpt4/configs/datasets/llava/detail.yaml | 10 +---- minigpt4/configs/datasets/llava/reason.yaml | 9 +--- .../multitask_conversation/default.yaml | 11 +---- .../datasets/nlp/unnatural_instruction.yaml | 7 +-- minigpt4/configs/datasets/ocrvqa/ocrvqa.yaml | 10 +---- minigpt4/configs/datasets/okvqa/defaults.yaml | 19 +------- .../configs/datasets/textcaps/caption.yaml | 11 +---- minigpt4/configs/datasets/vg/ref.yaml | 7 +-- minigpt4/datasets/datasets/flickr.py | 6 +++ train_configs/minigpt_v2_finetune.yaml | 44 ++++++++----------- 28 files changed, 114 insertions(+), 185 deletions(-) create mode 100644 MiniGPTv2_Train .md diff --git a/.gitignore b/.gitignore index 7120f43..1dc019b 100755 --- a/.gitignore +++ b/.gitignore @@ -178,4 +178,6 @@ jobs/ *.slurm slurm* -sbatch_generate* \ No newline at end of file +sbatch_generate* +eval_data/ +dataset/Evaluation.md \ No newline at end of file diff --git a/MiniGPTv2_Train .md b/MiniGPTv2_Train .md new file mode 100644 index 0000000..bd62ef2 --- /dev/null +++ b/MiniGPTv2_Train .md @@ -0,0 +1,22 @@ +## Finetune of MiniGPT-4 + +The training of MiniGPT-4 contains two alignment stages. + +**1. First pretraining stage** + +In the first pretrained stage, the model is trained using image-text pairs from Laion and CC datasets +to align the vision and language model. To download and prepare the datasets, please check +our [first stage dataset preparation instruction](dataset/README_1_STAGE.md). +After the first stage, the visual features are mapped and can be understood by the language +model. +To launch the first stage training, run the following command. In our experiments, we use 4 A100. +You can change the save path in the config file +[train_configs/minigpt4_stage1_pretrain.yaml](train_configs/minigpt4_stage1_pretrain.yaml) + +```bash +torchrun --nproc-per-node NUM_GPU train.py --cfg-path train_configs/minigpt4_stage1_pretrain.yaml +``` + +A MiniGPT-4 checkpoint with only stage one training can be downloaded +[here (13B)](https://drive.google.com/file/d/1u9FRRBB3VovP1HxCAlpD9Lw4t4P6-Yq8/view?usp=share_link) or [here (7B)](https://drive.google.com/file/d/1HihQtCEXUyBM1i9DQbaK934wW3TZi-h5/view?usp=share_link). +Compared to the model after stage two, this checkpoint generate incomplete and repeated sentences frequently. diff --git a/README.md b/README.md index 24c371a..d75463b 100644 --- a/README.md +++ b/README.md @@ -93,9 +93,10 @@ Then, set the variable *llama_model* in the model config file to the LLM weight Download the pretrained model checkpoints -| MiniGPT-v2 (LLaMA-2 Chat 7B) | -|------------------------------| -| [Download](https://drive.google.com/file/d/1aVbfW7nkCSYx99_vCRyP1sOlQiWVSnAl/view?usp=sharing) | +| MiniGPT-v2 (developing model (online demo)) | MiniGPT-v2 (after stage-2) | MiniGPT-v2 (after stage-3) +|------------------------------|------------------------------|------------------------------| +| [Download](https://drive.google.com/file/d/1aVbfW7nkCSYx99_vCRyP1sOlQiWVSnAl/view?usp=sharing) |[Download](https://drive.google.com/file/d/1Vi_E7ZtZXRAQcyz4f8E6LtLh2UXABCmu/view?usp=sharing) |[Download](https://drive.google.com/file/d/1jAbxUiyl04SFJMN4sF1vvUU69Etuz4qa/view?usp=sharing) | + For **MiniGPT-v2**, set the path to the pretrained checkpoint in the evaluation config file in [eval_configs/minigptv2_eval.yaml](eval_configs/minigptv2_eval.yaml#L10) at Line 8. @@ -146,6 +147,7 @@ Thanks [@WangRongsheng](https://github.com/WangRongsheng), you can also run Mini ### Training For training details of MiniGPT-4, check [here](MiniGPT4_Train.md). +For finetuning details of MiniGPT-v2, check [here](MiniGPTv2_Train.md) diff --git a/dataset/README_MINIGPTv2_FINETUNE.md b/dataset/README_MINIGPTv2_FINETUNE.md index be99f23..6647ce2 100644 --- a/dataset/README_MINIGPTv2_FINETUNE.md +++ b/dataset/README_MINIGPTv2_FINETUNE.md @@ -26,11 +26,22 @@ LLaVA | {} ".format(instruction) + print("CaptionToObject instruction", instruction) + print("CaptionToObject answer", answer) + return { "image": image, "instruction_input": instruction, @@ -145,6 +148,9 @@ class PhraseToObjectDataset(Dataset): instruction = " {} ".format(instruction) + print("PhraseToObject instruction", instruction) + print("PhraseToObject answer", answer) + return { "image": image, "instruction_input": instruction, diff --git a/train_configs/minigpt_v2_finetune.yaml b/train_configs/minigpt_v2_finetune.yaml index 89d595a..d0f0ed7 100644 --- a/train_configs/minigpt_v2_finetune.yaml +++ b/train_configs/minigpt_v2_finetune.yaml @@ -1,22 +1,17 @@ model: arch: minigpt_v2 model_type: pretrain - freeze_vit: True - freeze_qformer: True max_txt_len: 1024 - low_resource: False image_size: 448 end_sym: "" - llama_model: "/ibex/project/c2133/llama_v2/llama-2-7b-chat-pytorch_update" - ckpt: "/ibex/project/c2090/minigpt4_ckpt/448_perforamnce_correct_v10_vg/20230925064/checkpoint_32.pth" + llama_model: "/path/to/llama_checkpoint" + ckpt: "/path/to/pretrained_checkpoint" use_grad_checkpoint: True chat_template: True lora_r: 64 lora_alpha: 16 - datasets: - multitask_conversation: batch_size: 2 vis_processor: @@ -26,9 +21,9 @@ datasets: text_processor: train: name: "blip_caption" - sample_ratio: 40 + sample_ratio: 50 - llava_conversation: # 77k + llava_conversation: batch_size: 2 vis_processor: train: @@ -37,8 +32,7 @@ datasets: text_processor: train: name: "blip_caption" - sample_ratio: 10 - + sample_ratio: 30 unnatural_instruction: batch_size: 1 @@ -49,7 +43,7 @@ datasets: text_processor: train: name: "blip_caption" - sample_ratio: 15 + sample_ratio: 5 refvg: @@ -63,7 +57,7 @@ datasets: name: "blip_caption" sample_ratio: 40 - llava_detail: #23K + llava_detail: batch_size: 4 vis_processor: train: @@ -74,7 +68,7 @@ datasets: name: "blip_caption" sample_ratio: 20 - llava_reason: # 77k + llava_reason: batch_size: 4 vis_processor: train: @@ -142,7 +136,7 @@ datasets: name: "blip_caption" sample_ratio: 10 - refcoco: # 142k + refcoco: batch_size: 6 vis_processor: train: @@ -151,7 +145,7 @@ datasets: text_processor: train: name: "blip_caption" - sample_ratio: 15 + sample_ratio: 20 refcocop: @@ -163,7 +157,7 @@ datasets: text_processor: train: name: "blip_caption" - sample_ratio: 15 + sample_ratio: 20 refcocog: batch_size: 6 @@ -174,7 +168,7 @@ datasets: text_processor: train: name: "blip_caption" - sample_ratio: 15 + sample_ratio: 20 @@ -212,7 +206,7 @@ datasets: sample_ratio: 10 - coco_vqa: # 82K + coco_vqa: batch_size: 6 vis_processor: train: @@ -223,7 +217,7 @@ datasets: name: "blip_caption" sample_ratio: 15 - ok_vqa: # 9k + ok_vqa: batch_size: 6 vis_processor: train: @@ -234,7 +228,7 @@ datasets: name: "blip_caption" sample_ratio: 8 - aok_vqa: # 17k + aok_vqa: batch_size: 6 vis_processor: train: @@ -245,7 +239,7 @@ datasets: name: "blip_caption" sample_ratio: 12 - gqa: # 82K + gqa: batch_size: 6 vis_processor: train: @@ -254,9 +248,9 @@ datasets: text_processor: train: name: "blip_caption" - sample_ratio: 40 + sample_ratio: 50 - ocrvqa: # 800K + ocrvqa: batch_size: 6 vis_processor: train: @@ -283,7 +277,7 @@ run: iters_per_epoch: 1000 seed: 42 - output_dir: "/ibex/project/c2090/minigpt4_ckpt/448_finetune_test_online" + output_dir: "/path/to/save_checkpoint" amp: True resume_ckpt_path: null