From 29df461edb4dd2ac9e101babad48daf417ca2fe7 Mon Sep 17 00:00:00 2001 From: Deyao Zhu Date: Tue, 24 Oct 2023 11:29:35 +0300 Subject: [PATCH 1/4] update data structure in the finetune readme --- dataset/README_MINIGPTv2_FINETUNE.md | 136 ++++++++++++++------------- 1 file changed, 73 insertions(+), 63 deletions(-) diff --git a/dataset/README_MINIGPTv2_FINETUNE.md b/dataset/README_MINIGPTv2_FINETUNE.md index 5da190b..9e54208 100644 --- a/dataset/README_MINIGPTv2_FINETUNE.md +++ b/dataset/README_MINIGPTv2_FINETUNE.md @@ -27,12 +27,13 @@ Download the COCO 2014 images and captions ``` -├── ${MINIGPTv2_DATASET} -│ ├── coco_captions -│ ├── coco_images -| ├── annotations -| ├── coco_karpathy_train.json - +${MINIGPTv2_DATASET} +├── coco_captions +│ ├── coco_images +│ └── annotations +│ ├── coco_karpathy_train.json +│ ... +... ``` Set **image_path** to the COCO 2014 image folder. @@ -58,15 +59,17 @@ Similarly, set **ann_path** to the vqa_train.json and vqa_val.json path Download visiual genome images and annotation files ``` -├── ${MINIGPTv2_DATASET} -│ ├── visual_genome -│ ├── VG_100K -│ ├── VG_100K_2 -| ├── region_descriptions.json +${MINIGPTv2_DATASET} +├── visual_genome +│ ├── VG_100K +│ ├── VG_100K_2 +│ ├── region_descriptions.json +│ ... +... ``` Set **image_path** to visual_genome folder. -Similarly, set **ann_path** to to visual_genome folder. +Similarly, set **ann_path** to the visual_genome folder. - [minigpt4/configs/datasets/vg/ref.yaml](../minigpt4/configs/datasets/vg/ref.yaml) @@ -75,10 +78,11 @@ Similarly, set **ann_path** to to visual_genome folder. Download the TextCaps images and annotation files ``` -├── ${MINIGPTv2_DATASET} -│ ├── TextCaps -│ ├── train_images -│ ├── TextCaps_0.1_train.json +${MINIGPTv2_DATASET} +├── TextCaps +│ ├── train_images +│ └── TextCaps_0.1_train.json +... ``` Set **image_path** to TextCaps train_images folder. @@ -90,25 +94,26 @@ Similarly, set **ann_path** to the TextCaps_0.1_train.json path Download the RefCOCO, RefCOCO+, RefCOCOg annotation files ``` -Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── refcoco_annotations -│ ├── refcoco -| ├── instances.json -| ├── refs(google).p -| ├── refs(unc).p -│ ├── refcoco+ -| ├── instances.json -| ├── refs(unc).p -│ ├── refcocog -| ├── instances.json -| ├── refs(google).p -| ├── refs(und).p + +${MINIGPTv2_DATASET} +├── refcoco_annotations +│ ├── refcoco +│ │ ├── instances.json +│ │ ├── refs(google).p +│ │ └── refs(unc).p +│ ├── refcoco+ +│ │ ├── instances.json +│ │ └── refs(unc).p +│ └── refcocog +│ ├── instances.json +│ ├── refs(google).p +│ └─── refs(und).p +... ``` Set **image_path** to the COCO 2014 image folder. -Similarly, set **ann_path** in all the following configs to the above folder (Location_you_like) that contains refcoco, refcoco+, and refcocog. +Similarly, set **ann_path** in all the following configs to the above folder *refcoco_annotations* that contains refcoco, refcoco+, and refcocog. - [minigpt4/configs/datasets/coco_bbox/refcoco.yaml](../minigpt4/configs/datasets/coco_bbox/refcoco.yaml) - [minigpt4/configs/datasets/coco_bbox/refcocog.yaml](../minigpt4/configs/datasets/coco_bbox/refcocog.yaml) @@ -122,11 +127,12 @@ Similarly, set **ann_path** in all the following configs to the above folder (Lo ``` Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── llava -│ ├── conversation_58k.json -│ ├── detail_23k.json -│ ├── complex_reasoning_77k.json +${MINIGPTv2_DATASET} +├── llava +│ ├── conversation_58k.json +│ ├── detail_23k.json +│ └── complex_reasoning_77k.json +... ``` Set **image_path** to the COCO 2014 image folder. @@ -143,10 +149,11 @@ detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yam ``` -Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── OKVQA -│ ├── okvqa_train.json +${MINIGPTv2_DATASET} +├── OKVQA +│ ├── okvqa_train.json +│ ... +... ``` Set **image_path** to the COCO 2014 image folder. @@ -170,10 +177,11 @@ curl -fsSL https://prior-datasets.s3.us-east-2.amazonaws.com/aokvqa/aokvqa_v1p0. ``` ``` -Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── AOKVQA -│ ├── aokvqa_v1p0_train.json + ${MINIGPTv2_DATASET} + ├── AOKVQA + │ ├── aokvqa_v1p0_train.json + │ ... + ... ``` @@ -187,11 +195,12 @@ Similarly, set **ann_path** to the location of the AOKVQA dataset Download the OCR-VQA annotation files ``` -Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── OCR-VQA -│ ├── images -│ ├── dataset.json +${MINIGPTv2_DATASET} +├── OCR-VQA +│ ├── images +│ ├── dataset.json +│ ... +... ``` Set **image_path** as the OCR-VQA image folder. @@ -204,13 +213,13 @@ Similarly, set **ann_path** to the lhe OCR-VQA dataset.json Download filtered Flickr-30k images and annotation files ``` -Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── filtered_flickr -│ ├── images -│ ├── captiontobbox.json -│ ├── groundedcaption.json -│ ├── phrasetobbox.json +${MINIGPTv2_DATASET} +├── filtered_flickr +│ ├── images +│ ├── captiontobbox.json +│ ├── groundedcaption.json +│ └── phrasetobbox.json +... ``` Set **image_path** as the flickr-30k images foler. @@ -227,9 +236,10 @@ Download the multi-task converstation dataset ``` Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── multitask_conversation -│ ├── multitask_conversation.json +${MINIGPTv2_DATASET} +├── multitask_conversation +│ └── multitask_conversation.json +... ``` Set **image_path** as the COCO 2014 images folder. @@ -241,10 +251,10 @@ Similarly, set **ann_path** to the multitask_conversation.json file path Download the filtered unnatural instruction annotation files (we remove the very long sentences from the original unnatural instruction dataset) ``` -Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── unnatural-instructions -│ ├── filtered_unnatural_instruction.json +${MINIGPTv2_DATASET} +├── unnatural-instructions +│ └── filtered_unnatural_instruction.json +... ``` There is no image path. From 3d9aad7b4bdd962fb96451af48d1a25df8f13392 Mon Sep 17 00:00:00 2001 From: Deyao Zhu Date: Tue, 24 Oct 2023 11:36:00 +0300 Subject: [PATCH 2/4] update visual genome link --- dataset/README_MINIGPTv2_FINETUNE.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dataset/README_MINIGPTv2_FINETUNE.md b/dataset/README_MINIGPTv2_FINETUNE.md index 9e54208..9843a2f 100644 --- a/dataset/README_MINIGPTv2_FINETUNE.md +++ b/dataset/README_MINIGPTv2_FINETUNE.md @@ -7,7 +7,7 @@ Image source | Download path --- | :---: COCO 2014 images | images    captions COCO VQA | vqa train    vqa val -Visual Genome | images part1 images part2 +Visual Genome | images part1 images part2 annotations TextCaps | images annotations RefCOCO | annotations RefCOCO+ | annotations @@ -63,8 +63,7 @@ ${MINIGPTv2_DATASET} ├── visual_genome │ ├── VG_100K │ ├── VG_100K_2 -│ ├── region_descriptions.json -│ ... +│ └── region_descriptions.json ... ``` @@ -126,7 +125,6 @@ Similarly, set **ann_path** in all the following configs to the above folder *re ### LLaVA ``` -Location_you_like ${MINIGPTv2_DATASET} ├── llava │ ├── conversation_58k.json From f3287eeb4dd12b5b6a4ce0d51a2348f956cb96cd Mon Sep 17 00:00:00 2001 From: XiaoqianShen <64844805+xiaoqian-shen@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:42:09 +0300 Subject: [PATCH 3/4] Update README_MINIGPTv2_FINETUNE.md --- dataset/README_MINIGPTv2_FINETUNE.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataset/README_MINIGPTv2_FINETUNE.md b/dataset/README_MINIGPTv2_FINETUNE.md index 9843a2f..96af29b 100644 --- a/dataset/README_MINIGPTv2_FINETUNE.md +++ b/dataset/README_MINIGPTv2_FINETUNE.md @@ -208,7 +208,7 @@ Similarly, set **ann_path** to the lhe OCR-VQA dataset.json ### filtered Flickr-30k -Download filtered Flickr-30k images and annotation files +Download filtered Flickr-30k images (fill this [form](https://forms.illinois.edu/sec/229675)) and annotation files ``` ${MINIGPTv2_DATASET} @@ -258,4 +258,4 @@ ${MINIGPTv2_DATASET} There is no image path. Similarly, set **ann_path** to the filtered_unnatural_instruction.json file path -- [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml) \ No newline at end of file +- [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml) From 1a522eec60e0ed4a59b2bc02f0505ccd26829dc1 Mon Sep 17 00:00:00 2001 From: XiaoqianShen <64844805+xiaoqian-shen@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:43:55 +0300 Subject: [PATCH 4/4] Update README_MINIGPTv2_FINETUNE.md --- dataset/README_MINIGPTv2_FINETUNE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataset/README_MINIGPTv2_FINETUNE.md b/dataset/README_MINIGPTv2_FINETUNE.md index 96af29b..8de4634 100644 --- a/dataset/README_MINIGPTv2_FINETUNE.md +++ b/dataset/README_MINIGPTv2_FINETUNE.md @@ -208,7 +208,7 @@ Similarly, set **ann_path** to the lhe OCR-VQA dataset.json ### filtered Flickr-30k -Download filtered Flickr-30k images (fill this [form](https://forms.illinois.edu/sec/229675)) and annotation files +Download filtered Flickr-30k images (fill this [form](https://forms.illinois.edu/sec/229675) on official website or from [kaggle](https://www.kaggle.com/datasets/hsankesara/flickr-image-dataset/download?datasetVersionNumber=1)) and annotation files ``` ${MINIGPTv2_DATASET}