From ea106865c6f7b2924fa05fba9480c8f801dcf035 Mon Sep 17 00:00:00 2001 From: junchen14 Date: Tue, 24 Oct 2023 21:26:20 +0300 Subject: [PATCH] update data preparation --- dataset/README_MINIGPTv2_FINETUNE.md | 69 +++++++++++-------- .../configs/datasets/aokvqa/defaults.yaml | 2 +- minigpt4/configs/datasets/coco/caption.yaml | 2 +- .../configs/datasets/coco/defaults_vqa.yaml | 4 +- .../datasets/coco_bbox/invrefcoco.yaml | 2 +- .../datasets/coco_bbox/invrefcocog.yaml | 2 +- .../datasets/coco_bbox/invrefcocop.yaml | 2 +- .../configs/datasets/coco_bbox/refcoco.yaml | 2 +- .../configs/datasets/coco_bbox/refcocog.yaml | 2 +- .../configs/datasets/coco_bbox/refcocop.yaml | 2 +- .../datasets/flickr/caption_to_phrase.yaml | 2 +- minigpt4/configs/datasets/flickr/default.yaml | 2 +- .../datasets/flickr/object_to_phrase.yaml | 2 +- 13 files changed, 55 insertions(+), 40 deletions(-) diff --git a/dataset/README_MINIGPTv2_FINETUNE.md b/dataset/README_MINIGPTv2_FINETUNE.md index 5da190b..e55e5cc 100644 --- a/dataset/README_MINIGPTv2_FINETUNE.md +++ b/dataset/README_MINIGPTv2_FINETUNE.md @@ -8,17 +8,18 @@ Image source | Download path COCO 2014 images | images    captions COCO VQA | vqa train    vqa val Visual Genome | images part1 images part2 -TextCaps | images annotations +TextCaps | images    annotations RefCOCO | annotations RefCOCO+ | annotations RefCOCOg | annotations -LLaVA | Compelex reasoning    Detailed description    Conversation OKVQA | annotations AOK-VQA | annotations OCR-VQA | annotations +GQA | images    annotations Filtered Flickr-30k | annotations Multi-task conversation | annotations Filtered unnatural instruction | annotations +LLaVA | Compelex reasoning    Detailed description    Conversation @@ -76,7 +77,7 @@ Download the TextCaps images and annotation files ``` ├── ${MINIGPTv2_DATASET} -│ ├── TextCaps +│ ├── textcaps │ ├── train_images │ ├── TextCaps_0.1_train.json ``` @@ -118,25 +119,6 @@ Similarly, set **ann_path** in all the following configs to the above folder (Lo - [minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml](../minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml) -### LLaVA - -``` -Location_you_like -├── ${MINIGPTv2_DATASET} -│ ├── llava -│ ├── conversation_58k.json -│ ├── detail_23k.json -│ ├── complex_reasoning_77k.json -``` - -Set **image_path** to the COCO 2014 image folder. -Similarly, set **ann_path** to the location of the previous downloaded conversation_58k.json, -detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yaml, and reason.yaml, respectively. - - -- [minigpt4/configs/datasets/llava/conversation.yaml](../minigpt4/configs/datasets/llava/conversation.yaml) -- [minigpt4/configs/datasets/llava/detail.yaml](../minigpt4/configs/datasets/llava/detail.yaml) -- [minigpt4/configs/datasets/llava/reason.yaml](../minigpt4/configs/datasets/llava/reason.yaml) ### OKVQA @@ -145,7 +127,7 @@ detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yam ``` Location_you_like ├── ${MINIGPTv2_DATASET} -│ ├── OKVQA +│ ├── okvqa │ ├── okvqa_train.json ``` @@ -172,7 +154,7 @@ curl -fsSL https://prior-datasets.s3.us-east-2.amazonaws.com/aokvqa/aokvqa_v1p0. ``` Location_you_like ├── ${MINIGPTv2_DATASET} -│ ├── AOKVQA +│ ├── aokvqa │ ├── aokvqa_v1p0_train.json ``` @@ -185,11 +167,24 @@ Similarly, set **ann_path** to the location of the AOKVQA dataset ### OCR-VQA Download the OCR-VQA annotation files +download the images with loadDataset.py script ``` Location_you_like ├── ${MINIGPTv2_DATASET} -│ ├── OCR-VQA +│ ├── ocrvqa +│ ├── images +│ ├── dataset.json +``` + +### GQA +Download the GQA annotation files +download the images with loadDataset.py script + +``` +Location_you_like +├── ${MINIGPTv2_DATASET} +│ ├── ocrvqa │ ├── images │ ├── dataset.json ``` @@ -243,11 +238,31 @@ Download the filtered unnatural instruction annotation files (we remove the very ``` Location_you_like ├── ${MINIGPTv2_DATASET} -│ ├── unnatural-instructions +│ ├── unnatural_instructions │ ├── filtered_unnatural_instruction.json ``` There is no image path. Similarly, set **ann_path** to the filtered_unnatural_instruction.json file path -- [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml) \ No newline at end of file +- [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml) + +### LLaVA + +``` +Location_you_like +├── ${MINIGPTv2_DATASET} +│ ├── llava +│ ├── conversation_58k.json +│ ├── detail_23k.json +│ ├── complex_reasoning_77k.json +``` + +Set **image_path** to the COCO 2014 image folder. +Similarly, set **ann_path** to the location of the previous downloaded conversation_58k.json, +detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yaml, and reason.yaml, respectively. + + +- [minigpt4/configs/datasets/llava/conversation.yaml](../minigpt4/configs/datasets/llava/conversation.yaml) +- [minigpt4/configs/datasets/llava/detail.yaml](../minigpt4/configs/datasets/llava/detail.yaml) +- [minigpt4/configs/datasets/llava/reason.yaml](../minigpt4/configs/datasets/llava/reason.yaml) diff --git a/minigpt4/configs/datasets/aokvqa/defaults.yaml b/minigpt4/configs/datasets/aokvqa/defaults.yaml index 79d2054..c8828c4 100755 --- a/minigpt4/configs/datasets/aokvqa/defaults.yaml +++ b/minigpt4/configs/datasets/aokvqa/defaults.yaml @@ -24,6 +24,6 @@ datasets: url: - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json storage: - - /ibex/project/c2133/minigpt4_v2_dataset/aokvqa/annotations/aokvqa_v1p0_train.json + - /ibex/project/c2090/minigptv2_dataset/aokvqa/aokvqa_v1p0_train.json images: storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/ \ No newline at end of file diff --git a/minigpt4/configs/datasets/coco/caption.yaml b/minigpt4/configs/datasets/coco/caption.yaml index 873c286..3cf7d5f 100644 --- a/minigpt4/configs/datasets/coco/caption.yaml +++ b/minigpt4/configs/datasets/coco/caption.yaml @@ -25,7 +25,7 @@ datasets: train: url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json md5: aa31ac474cf6250ebb81d18348a07ed8 - storage: /ibex/project/c2133/minigpt4_v2_dataset/coco_caption/annotations/coco_karpathy_train.json + storage: /ibex/project/c2090/minigptv2_dataset/coco_captions/coco_karpathy_train.json # val: # url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json # md5: b273847456ef5580e33713b1f7de52a0 diff --git a/minigpt4/configs/datasets/coco/defaults_vqa.yaml b/minigpt4/configs/datasets/coco/defaults_vqa.yaml index 87ae494..3b1edae 100755 --- a/minigpt4/configs/datasets/coco/defaults_vqa.yaml +++ b/minigpt4/configs/datasets/coco/defaults_vqa.yaml @@ -27,7 +27,7 @@ datasets: - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json storage: - - /ibex/project/c2133/minigpt4_v2_dataset/vqav2/annotations/vqa_train.json - - /ibex/project/c2133/minigpt4_v2_dataset/vqav2/coco/annotations/vqa_val.json + - /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_train.json + - /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_val.json images: storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg \ No newline at end of file diff --git a/minigpt4/configs/datasets/coco_bbox/invrefcoco.yaml b/minigpt4/configs/datasets/coco_bbox/invrefcoco.yaml index 580694b..a4b7c7e 100755 --- a/minigpt4/configs/datasets/coco_bbox/invrefcoco.yaml +++ b/minigpt4/configs/datasets/coco_bbox/invrefcoco.yaml @@ -3,6 +3,6 @@ datasets: data_type: images build_info: image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train - ann_path: /ibex/project/c2133/object_detection_datasets/ + ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations dataset: invrefcoco splitBy: unc \ No newline at end of file diff --git a/minigpt4/configs/datasets/coco_bbox/invrefcocog.yaml b/minigpt4/configs/datasets/coco_bbox/invrefcocog.yaml index 67af2eb..8489dd2 100755 --- a/minigpt4/configs/datasets/coco_bbox/invrefcocog.yaml +++ b/minigpt4/configs/datasets/coco_bbox/invrefcocog.yaml @@ -3,6 +3,6 @@ datasets: data_type: images build_info: image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train - ann_path: /ibex/project/c2133/object_detection_datasets/ + ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations dataset: invrefcocog splitBy: umd \ No newline at end of file diff --git a/minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml b/minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml index 576004e..ff52379 100755 --- a/minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml +++ b/minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml @@ -3,6 +3,6 @@ datasets: data_type: images build_info: image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train - ann_path: /ibex/project/c2133/object_detection_datasets/ + ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations dataset: invrefcoco+ splitBy: unc \ No newline at end of file diff --git a/minigpt4/configs/datasets/coco_bbox/refcoco.yaml b/minigpt4/configs/datasets/coco_bbox/refcoco.yaml index edf16ba..85e4c9a 100755 --- a/minigpt4/configs/datasets/coco_bbox/refcoco.yaml +++ b/minigpt4/configs/datasets/coco_bbox/refcoco.yaml @@ -3,6 +3,6 @@ datasets: data_type: images build_info: image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train - ann_path: /ibex/project/c2133/object_detection_datasets/ + ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations dataset: refcoco splitBy: unc \ No newline at end of file diff --git a/minigpt4/configs/datasets/coco_bbox/refcocog.yaml b/minigpt4/configs/datasets/coco_bbox/refcocog.yaml index 5ed7cc9..7db50ad 100755 --- a/minigpt4/configs/datasets/coco_bbox/refcocog.yaml +++ b/minigpt4/configs/datasets/coco_bbox/refcocog.yaml @@ -3,6 +3,6 @@ datasets: data_type: images build_info: image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train - ann_path: /ibex/project/c2133/object_detection_datasets/ + ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations dataset: refcocog splitBy: umd \ No newline at end of file diff --git a/minigpt4/configs/datasets/coco_bbox/refcocop.yaml b/minigpt4/configs/datasets/coco_bbox/refcocop.yaml index 4e3af6f..42d4021 100755 --- a/minigpt4/configs/datasets/coco_bbox/refcocop.yaml +++ b/minigpt4/configs/datasets/coco_bbox/refcocop.yaml @@ -3,6 +3,6 @@ datasets: data_type: images build_info: image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train - ann_path: /ibex/project/c2133/object_detection_datasets/ + ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations dataset: refcoco+ splitBy: unc \ No newline at end of file diff --git a/minigpt4/configs/datasets/flickr/caption_to_phrase.yaml b/minigpt4/configs/datasets/flickr/caption_to_phrase.yaml index e1bf547..11895c6 100755 --- a/minigpt4/configs/datasets/flickr/caption_to_phrase.yaml +++ b/minigpt4/configs/datasets/flickr/caption_to_phrase.yaml @@ -3,4 +3,4 @@ datasets: data_type: images build_info: image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images - ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_grounding_phrase5_v2_last.json + ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/captiontobbox.json diff --git a/minigpt4/configs/datasets/flickr/default.yaml b/minigpt4/configs/datasets/flickr/default.yaml index 5569629..c3d785d 100755 --- a/minigpt4/configs/datasets/flickr/default.yaml +++ b/minigpt4/configs/datasets/flickr/default.yaml @@ -3,4 +3,4 @@ datasets: data_type: images build_info: image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images - ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_grounding_phrase5_last.json + ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/groundedcaption.json diff --git a/minigpt4/configs/datasets/flickr/object_to_phrase.yaml b/minigpt4/configs/datasets/flickr/object_to_phrase.yaml index d583ed3..30809ff 100755 --- a/minigpt4/configs/datasets/flickr/object_to_phrase.yaml +++ b/minigpt4/configs/datasets/flickr/object_to_phrase.yaml @@ -3,4 +3,4 @@ datasets: data_type: images build_info: image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images - ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_phrase2bbox_resample_last.json + ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/phrasetobbox.json