update data preparation

2025-04-04 18:10:47 +00:00 · 2023-10-24 21:26:20 +03:00 · 2023-10-24 21:26:20 +03:00 · ea106865c6
commit ea106865c6
parent 1d0c37d924
13 changed files with 55 additions and 40 deletions
--- a/dataset/README_MINIGPTv2_FINETUNE.md
+++ b/dataset/README_MINIGPTv2_FINETUNE.md
@ -8,17 +8,18 @@ Image source | Download path
 COCO 2014 images | <a href="http://images.cocodataset.org/zips/train2014.zip">images</a> &nbsp;&nbsp;  <a href="https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json"> captions</a>
 COCO VQA | <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json">vqa train</a> &nbsp;&nbsp;  <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json"> vqa val</a>
 Visual Genome |  <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip">images part1</a> <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip">images part2</a>
-TextCaps | <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip">images</a> <a href="https://dl.fbaipublicfiles.com/textvqa/data/textcaps/TextCaps_0.1_train.json"> annotations</a> 
+TextCaps | <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip">images</a>  &nbsp;&nbsp; <a href="https://dl.fbaipublicfiles.com/textvqa/data/textcaps/TextCaps_0.1_train.json"> annotations</a> 
 RefCOCO | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco.zip"> annotations </a>
 RefCOCO+ | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco+.zip"> annotations </a>
 RefCOCOg | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcocog.zip"> annotations </a>
-LLaVA | <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/complex_reasoning_77k.json"> Compelex reasoning </a> &nbsp;&nbsp;<a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/detail_23k.json"> Detailed description </a> &nbsp;&nbsp; <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/conversation_58k.json"> Conversation </a> 
 OKVQA | <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json"> annotations </a>
 AOK-VQA | <a href="https://prior-datasets.s3.us-east-2.amazonaws.com/aokvqa/aokvqa_v1p0.tar.gz"> annotations </a>
 OCR-VQA | <a href="https://drive.google.com/drive/folders/1_GYPY5UkUy7HIcR0zq3ZCFgeZN7BAfm_?usp=sharing"> annotations </a>
+GQA | <a href="">images</a>  &nbsp;&nbsp; <a href="/ibex/project/c2133/minigpt4_v2_dataset/gqa/annotations/train_balanced_questions.json"> annotations </a>
 Filtered Flickr-30k |  <a href="https://drive.google.com/drive/folders/19c_ggBI77AvdtYlPbuI0ZpnPz73T5teX?usp=sharing"> annotations </a>
 Multi-task conversation |  <a href="https://drive.google.com/file/d/11HHqB2c29hbSk-WLxdta-nG8UCUrcCN1/view?usp=sharing"> annotations </a> 
 Filtered unnatural instruction |  <a href="https://drive.google.com/file/d/1lXNnBcb5WU-sc8Fe2T2N8J0NRw4sBLev/view?usp=sharing"> annotations </a>
+LLaVA | <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/complex_reasoning_77k.json"> Compelex reasoning </a> &nbsp;&nbsp;<a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/detail_23k.json"> Detailed description </a> &nbsp;&nbsp; <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/conversation_58k.json"> Conversation </a> 



@ -76,7 +77,7 @@ Download the TextCaps images and annotation files

 ```
 ├── ${MINIGPTv2_DATASET}
-│   ├── TextCaps
+│   ├── textcaps
 │       ├── train_images
 │       ├── TextCaps_0.1_train.json
 ```
@ -118,25 +119,6 @@ Similarly, set **ann_path** in all the following configs to the above folder (Lo
 - [minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml](../minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml)


-### LLaVA
-
-```
-Location_you_like
-├── ${MINIGPTv2_DATASET}
-│   ├── llava
-│       ├── conversation_58k.json
-│       ├── detail_23k.json
-│       ├── complex_reasoning_77k.json
-```
-
-Set **image_path** to the COCO 2014 image folder.
-Similarly, set **ann_path** to the location of the previous downloaded conversation_58k.json, 
-detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yaml, and reason.yaml, respectively.
-
-
- [minigpt4/configs/datasets/llava/conversation.yaml](../minigpt4/configs/datasets/llava/conversation.yaml)
- [minigpt4/configs/datasets/llava/detail.yaml](../minigpt4/configs/datasets/llava/detail.yaml) 
- [minigpt4/configs/datasets/llava/reason.yaml](../minigpt4/configs/datasets/llava/reason.yaml)


 ### OKVQA
@ -145,7 +127,7 @@ detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yam
 ```
 Location_you_like
 ├── ${MINIGPTv2_DATASET}
-│   ├── OKVQA
+│   ├── okvqa
 │       ├── okvqa_train.json
 ```

@ -172,7 +154,7 @@ curl -fsSL https://prior-datasets.s3.us-east-2.amazonaws.com/aokvqa/aokvqa_v1p0.
 ```
 Location_you_like
 ├── ${MINIGPTv2_DATASET}
-│   ├── AOKVQA
+│   ├── aokvqa
 │       ├── aokvqa_v1p0_train.json
 ```

@ -185,11 +167,24 @@ Similarly, set **ann_path** to the location of the AOKVQA dataset

 ### OCR-VQA
 Download the OCR-VQA annotation files
+download the images with loadDataset.py script

 ```
 Location_you_like
 ├── ${MINIGPTv2_DATASET}
-│   ├── OCR-VQA
+│   ├── ocrvqa
+│       ├── images
+│       ├── dataset.json
+```
+
+### GQA
+Download the GQA annotation files
+download the images with loadDataset.py script
+
+```
+Location_you_like
+├── ${MINIGPTv2_DATASET}
+│   ├── ocrvqa
 │       ├── images
 │       ├── dataset.json
 ```
@ -243,11 +238,31 @@ Download the filtered unnatural instruction annotation files (we remove the very
 ```
 Location_you_like
 ├── ${MINIGPTv2_DATASET}
-│   ├── unnatural-instructions
+│   ├── unnatural_instructions
 │       ├── filtered_unnatural_instruction.json
 ```

 There is no image path.
 Similarly, set **ann_path** to the filtered_unnatural_instruction.json file path

- [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml)
+- [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml)
+
+### LLaVA
+
+```
+Location_you_like
+├── ${MINIGPTv2_DATASET}
+│   ├── llava
+│       ├── conversation_58k.json
+│       ├── detail_23k.json
+│       ├── complex_reasoning_77k.json
+```
+
+Set **image_path** to the COCO 2014 image folder.
+Similarly, set **ann_path** to the location of the previous downloaded conversation_58k.json, 
+detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yaml, and reason.yaml, respectively.
+
+
+- [minigpt4/configs/datasets/llava/conversation.yaml](../minigpt4/configs/datasets/llava/conversation.yaml)
+- [minigpt4/configs/datasets/llava/detail.yaml](../minigpt4/configs/datasets/llava/detail.yaml) 
+- [minigpt4/configs/datasets/llava/reason.yaml](../minigpt4/configs/datasets/llava/reason.yaml)
--- a/minigpt4/configs/datasets/aokvqa/defaults.yaml
+++ b/minigpt4/configs/datasets/aokvqa/defaults.yaml
@ -24,6 +24,6 @@ datasets:
          url:
              - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json
          storage:
-              - /ibex/project/c2133/minigpt4_v2_dataset/aokvqa/annotations/aokvqa_v1p0_train.json
+              - /ibex/project/c2090/minigptv2_dataset/aokvqa/aokvqa_v1p0_train.json
      images:
          storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/
--- a/minigpt4/configs/datasets/coco/caption.yaml
+++ b/minigpt4/configs/datasets/coco/caption.yaml
@ -25,7 +25,7 @@ datasets:
        train:
          url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json
          md5: aa31ac474cf6250ebb81d18348a07ed8
-          storage: /ibex/project/c2133/minigpt4_v2_dataset/coco_caption/annotations/coco_karpathy_train.json
+          storage: /ibex/project/c2090/minigptv2_dataset/coco_captions/coco_karpathy_train.json
        # val:
        #   url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json
        #   md5: b273847456ef5580e33713b1f7de52a0
--- a/minigpt4/configs/datasets/coco/defaults_vqa.yaml
+++ b/minigpt4/configs/datasets/coco/defaults_vqa.yaml
@ -27,7 +27,7 @@ datasets:
              - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json
              - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json
          storage:
-              - /ibex/project/c2133/minigpt4_v2_dataset/vqav2/annotations/vqa_train.json
-              - /ibex/project/c2133/minigpt4_v2_dataset/vqav2/coco/annotations/vqa_val.json
+              - /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_train.json
+              - /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_val.json
      images:
          storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg
--- a/minigpt4/configs/datasets/coco_bbox/invrefcoco.yaml
+++ b/minigpt4/configs/datasets/coco_bbox/invrefcoco.yaml
@ -3,6 +3,6 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
-      ann_path: /ibex/project/c2133/object_detection_datasets/
+      ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
      dataset: invrefcoco
      splitBy: unc
--- a/minigpt4/configs/datasets/coco_bbox/invrefcocog.yaml
+++ b/minigpt4/configs/datasets/coco_bbox/invrefcocog.yaml
@ -3,6 +3,6 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
-      ann_path: /ibex/project/c2133/object_detection_datasets/
+      ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
      dataset: invrefcocog
      splitBy: umd
--- a/minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml
+++ b/minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml
@ -3,6 +3,6 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
-      ann_path: /ibex/project/c2133/object_detection_datasets/
+      ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
      dataset: invrefcoco+
      splitBy: unc
--- a/minigpt4/configs/datasets/coco_bbox/refcoco.yaml
+++ b/minigpt4/configs/datasets/coco_bbox/refcoco.yaml
@ -3,6 +3,6 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
-      ann_path: /ibex/project/c2133/object_detection_datasets/
+      ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
      dataset: refcoco
      splitBy: unc
--- a/minigpt4/configs/datasets/coco_bbox/refcocog.yaml
+++ b/minigpt4/configs/datasets/coco_bbox/refcocog.yaml
@ -3,6 +3,6 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
-      ann_path: /ibex/project/c2133/object_detection_datasets/
+      ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
      dataset: refcocog
      splitBy: umd
--- a/minigpt4/configs/datasets/coco_bbox/refcocop.yaml
+++ b/minigpt4/configs/datasets/coco_bbox/refcocop.yaml
@ -3,6 +3,6 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
-      ann_path: /ibex/project/c2133/object_detection_datasets/
+      ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
      dataset: refcoco+
      splitBy: unc
--- a/minigpt4/configs/datasets/flickr/caption_to_phrase.yaml
+++ b/minigpt4/configs/datasets/flickr/caption_to_phrase.yaml
@ -3,4 +3,4 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images
-      ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_grounding_phrase5_v2_last.json
+      ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/captiontobbox.json
--- a/minigpt4/configs/datasets/flickr/default.yaml
+++ b/minigpt4/configs/datasets/flickr/default.yaml
@ -3,4 +3,4 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images
-      ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_grounding_phrase5_last.json
+      ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/groundedcaption.json
--- a/minigpt4/configs/datasets/flickr/object_to_phrase.yaml
+++ b/minigpt4/configs/datasets/flickr/object_to_phrase.yaml
@ -3,4 +3,4 @@ datasets:
    data_type: images
    build_info:
      image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images
-      ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_phrase2bbox_resample_last.json
+      ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/phrasetobbox.json