update data preparation

This commit is contained in:
junchen14 2023-10-24 21:26:20 +03:00
parent 1d0c37d924
commit ea106865c6
13 changed files with 55 additions and 40 deletions

View File

@ -8,17 +8,18 @@ Image source | Download path
COCO 2014 images | <a href="http://images.cocodataset.org/zips/train2014.zip">images</a> &nbsp;&nbsp; <a href="https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json"> captions</a> COCO 2014 images | <a href="http://images.cocodataset.org/zips/train2014.zip">images</a> &nbsp;&nbsp; <a href="https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json"> captions</a>
COCO VQA | <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json">vqa train</a> &nbsp;&nbsp; <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json"> vqa val</a> COCO VQA | <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json">vqa train</a> &nbsp;&nbsp; <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json"> vqa val</a>
Visual Genome | <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip">images part1</a> <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip">images part2</a> Visual Genome | <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip">images part1</a> <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip">images part2</a>
TextCaps | <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip">images</a> <a href="https://dl.fbaipublicfiles.com/textvqa/data/textcaps/TextCaps_0.1_train.json"> annotations</a> TextCaps | <a href="https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip">images</a> &nbsp;&nbsp; <a href="https://dl.fbaipublicfiles.com/textvqa/data/textcaps/TextCaps_0.1_train.json"> annotations</a>
RefCOCO | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco.zip"> annotations </a> RefCOCO | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco.zip"> annotations </a>
RefCOCO+ | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco+.zip"> annotations </a> RefCOCO+ | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcoco+.zip"> annotations </a>
RefCOCOg | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcocog.zip"> annotations </a> RefCOCOg | <a href="https://bvisionweb1.cs.unc.edu/licheng/referit/data/refcocog.zip"> annotations </a>
LLaVA | <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/complex_reasoning_77k.json"> Compelex reasoning </a> &nbsp;&nbsp;<a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/detail_23k.json"> Detailed description </a> &nbsp;&nbsp; <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/conversation_58k.json"> Conversation </a>
OKVQA | <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json"> annotations </a> OKVQA | <a href="https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/okvqa/okvqa_train.json"> annotations </a>
AOK-VQA | <a href="https://prior-datasets.s3.us-east-2.amazonaws.com/aokvqa/aokvqa_v1p0.tar.gz"> annotations </a> AOK-VQA | <a href="https://prior-datasets.s3.us-east-2.amazonaws.com/aokvqa/aokvqa_v1p0.tar.gz"> annotations </a>
OCR-VQA | <a href="https://drive.google.com/drive/folders/1_GYPY5UkUy7HIcR0zq3ZCFgeZN7BAfm_?usp=sharing"> annotations </a> OCR-VQA | <a href="https://drive.google.com/drive/folders/1_GYPY5UkUy7HIcR0zq3ZCFgeZN7BAfm_?usp=sharing"> annotations </a>
GQA | <a href="">images</a> &nbsp;&nbsp; <a href="/ibex/project/c2133/minigpt4_v2_dataset/gqa/annotations/train_balanced_questions.json"> annotations </a>
Filtered Flickr-30k | <a href="https://drive.google.com/drive/folders/19c_ggBI77AvdtYlPbuI0ZpnPz73T5teX?usp=sharing"> annotations </a> Filtered Flickr-30k | <a href="https://drive.google.com/drive/folders/19c_ggBI77AvdtYlPbuI0ZpnPz73T5teX?usp=sharing"> annotations </a>
Multi-task conversation | <a href="https://drive.google.com/file/d/11HHqB2c29hbSk-WLxdta-nG8UCUrcCN1/view?usp=sharing"> annotations </a> Multi-task conversation | <a href="https://drive.google.com/file/d/11HHqB2c29hbSk-WLxdta-nG8UCUrcCN1/view?usp=sharing"> annotations </a>
Filtered unnatural instruction | <a href="https://drive.google.com/file/d/1lXNnBcb5WU-sc8Fe2T2N8J0NRw4sBLev/view?usp=sharing"> annotations </a> Filtered unnatural instruction | <a href="https://drive.google.com/file/d/1lXNnBcb5WU-sc8Fe2T2N8J0NRw4sBLev/view?usp=sharing"> annotations </a>
LLaVA | <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/complex_reasoning_77k.json"> Compelex reasoning </a> &nbsp;&nbsp;<a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/detail_23k.json"> Detailed description </a> &nbsp;&nbsp; <a href="https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/conversation_58k.json"> Conversation </a>
@ -76,7 +77,7 @@ Download the TextCaps images and annotation files
``` ```
├── ${MINIGPTv2_DATASET} ├── ${MINIGPTv2_DATASET}
│ ├── TextCaps │ ├── textcaps
│ ├── train_images │ ├── train_images
│ ├── TextCaps_0.1_train.json │ ├── TextCaps_0.1_train.json
``` ```
@ -118,25 +119,6 @@ Similarly, set **ann_path** in all the following configs to the above folder (Lo
- [minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml](../minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml) - [minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml](../minigpt4/configs/datasets/coco_bbox/invrefcocop.yaml)
### LLaVA
```
Location_you_like
├── ${MINIGPTv2_DATASET}
│ ├── llava
│ ├── conversation_58k.json
│ ├── detail_23k.json
│ ├── complex_reasoning_77k.json
```
Set **image_path** to the COCO 2014 image folder.
Similarly, set **ann_path** to the location of the previous downloaded conversation_58k.json,
detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yaml, and reason.yaml, respectively.
- [minigpt4/configs/datasets/llava/conversation.yaml](../minigpt4/configs/datasets/llava/conversation.yaml)
- [minigpt4/configs/datasets/llava/detail.yaml](../minigpt4/configs/datasets/llava/detail.yaml)
- [minigpt4/configs/datasets/llava/reason.yaml](../minigpt4/configs/datasets/llava/reason.yaml)
### OKVQA ### OKVQA
@ -145,7 +127,7 @@ detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yam
``` ```
Location_you_like Location_you_like
├── ${MINIGPTv2_DATASET} ├── ${MINIGPTv2_DATASET}
│ ├── OKVQA │ ├── okvqa
│ ├── okvqa_train.json │ ├── okvqa_train.json
``` ```
@ -172,7 +154,7 @@ curl -fsSL https://prior-datasets.s3.us-east-2.amazonaws.com/aokvqa/aokvqa_v1p0.
``` ```
Location_you_like Location_you_like
├── ${MINIGPTv2_DATASET} ├── ${MINIGPTv2_DATASET}
│ ├── AOKVQA │ ├── aokvqa
│ ├── aokvqa_v1p0_train.json │ ├── aokvqa_v1p0_train.json
``` ```
@ -185,11 +167,24 @@ Similarly, set **ann_path** to the location of the AOKVQA dataset
### OCR-VQA ### OCR-VQA
Download the OCR-VQA annotation files Download the OCR-VQA annotation files
download the images with loadDataset.py script
``` ```
Location_you_like Location_you_like
├── ${MINIGPTv2_DATASET} ├── ${MINIGPTv2_DATASET}
│ ├── OCR-VQA │ ├── ocrvqa
│ ├── images
│ ├── dataset.json
```
### GQA
Download the GQA annotation files
download the images with loadDataset.py script
```
Location_you_like
├── ${MINIGPTv2_DATASET}
│ ├── ocrvqa
│ ├── images │ ├── images
│ ├── dataset.json │ ├── dataset.json
``` ```
@ -243,11 +238,31 @@ Download the filtered unnatural instruction annotation files (we remove the very
``` ```
Location_you_like Location_you_like
├── ${MINIGPTv2_DATASET} ├── ${MINIGPTv2_DATASET}
│ ├── unnatural-instructions │ ├── unnatural_instructions
│ ├── filtered_unnatural_instruction.json │ ├── filtered_unnatural_instruction.json
``` ```
There is no image path. There is no image path.
Similarly, set **ann_path** to the filtered_unnatural_instruction.json file path Similarly, set **ann_path** to the filtered_unnatural_instruction.json file path
- [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml) - [minigpt4/configs/datasets/nlp/unnatural_instruction.yaml](../minigpt4/configs/datasets/nlp/unnatural_instruction.yaml)
### LLaVA
```
Location_you_like
├── ${MINIGPTv2_DATASET}
│ ├── llava
│ ├── conversation_58k.json
│ ├── detail_23k.json
│ ├── complex_reasoning_77k.json
```
Set **image_path** to the COCO 2014 image folder.
Similarly, set **ann_path** to the location of the previous downloaded conversation_58k.json,
detail_23k.json, and complex_reasoning_77k.json in conversation.yaml, detail.yaml, and reason.yaml, respectively.
- [minigpt4/configs/datasets/llava/conversation.yaml](../minigpt4/configs/datasets/llava/conversation.yaml)
- [minigpt4/configs/datasets/llava/detail.yaml](../minigpt4/configs/datasets/llava/detail.yaml)
- [minigpt4/configs/datasets/llava/reason.yaml](../minigpt4/configs/datasets/llava/reason.yaml)

View File

@ -24,6 +24,6 @@ datasets:
url: url:
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/aokvqa/aokvqa_v1p0_train.json
storage: storage:
- /ibex/project/c2133/minigpt4_v2_dataset/aokvqa/annotations/aokvqa_v1p0_train.json - /ibex/project/c2090/minigptv2_dataset/aokvqa/aokvqa_v1p0_train.json
images: images:
storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/ storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/

View File

@ -25,7 +25,7 @@ datasets:
train: train:
url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json
md5: aa31ac474cf6250ebb81d18348a07ed8 md5: aa31ac474cf6250ebb81d18348a07ed8
storage: /ibex/project/c2133/minigpt4_v2_dataset/coco_caption/annotations/coco_karpathy_train.json storage: /ibex/project/c2090/minigptv2_dataset/coco_captions/coco_karpathy_train.json
# val: # val:
# url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json # url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json
# md5: b273847456ef5580e33713b1f7de52a0 # md5: b273847456ef5580e33713b1f7de52a0

View File

@ -27,7 +27,7 @@ datasets:
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json
- https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json
storage: storage:
- /ibex/project/c2133/minigpt4_v2_dataset/vqav2/annotations/vqa_train.json - /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_train.json
- /ibex/project/c2133/minigpt4_v2_dataset/vqav2/coco/annotations/vqa_val.json - /ibex/project/c2090/minigptv2_dataset/vqav2/vqa_val.json
images: images:
storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg storage: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg

View File

@ -3,6 +3,6 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
ann_path: /ibex/project/c2133/object_detection_datasets/ ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
dataset: invrefcoco dataset: invrefcoco
splitBy: unc splitBy: unc

View File

@ -3,6 +3,6 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
ann_path: /ibex/project/c2133/object_detection_datasets/ ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
dataset: invrefcocog dataset: invrefcocog
splitBy: umd splitBy: umd

View File

@ -3,6 +3,6 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
ann_path: /ibex/project/c2133/object_detection_datasets/ ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
dataset: invrefcoco+ dataset: invrefcoco+
splitBy: unc splitBy: unc

View File

@ -3,6 +3,6 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
ann_path: /ibex/project/c2133/object_detection_datasets/ ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
dataset: refcoco dataset: refcoco
splitBy: unc splitBy: unc

View File

@ -3,6 +3,6 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
ann_path: /ibex/project/c2133/object_detection_datasets/ ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
dataset: refcocog dataset: refcocog
splitBy: umd splitBy: umd

View File

@ -3,6 +3,6 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train image_path: /ibex/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train
ann_path: /ibex/project/c2133/object_detection_datasets/ ann_path: /ibex/project/c2090/minigptv2_dataset/refcoco_annotations
dataset: refcoco+ dataset: refcoco+
splitBy: unc splitBy: unc

View File

@ -3,4 +3,4 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images
ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_grounding_phrase5_v2_last.json ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/captiontobbox.json

View File

@ -3,4 +3,4 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images
ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_grounding_phrase5_last.json ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/groundedcaption.json

View File

@ -3,4 +3,4 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images image_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/images
ann_path: /ibex/project/c2133/minigpt4_v2_dataset/flickr/train_phrase2bbox_resample_last.json ann_path: /ibex/project/c2090/minigptv2_dataset/filtered_flickr/phrasetobbox.json