diff --git a/environment.yml b/environment.yml index 8f94afe..51561c7 100644 --- a/environment.yml +++ b/environment.yml @@ -31,3 +31,5 @@ dependencies: - accelerate==0.20.3 - bitsandbytes==0.37.0 - wandb + - visual_genome + - scikit-image diff --git a/eval_configs/minigptv2_eval.yaml b/eval_configs/minigptv2_eval.yaml index 0479f2a..00f3604 100644 --- a/eval_configs/minigptv2_eval.yaml +++ b/eval_configs/minigptv2_eval.yaml @@ -5,7 +5,7 @@ model: end_sym: "" low_resource: True prompt_template: '[INST] {} [/INST]' - ckpt: 'please set this value to the path of pretrained checkpoint' + ckpt: '/mnt/pfs-guan-ssai/nlu/wanghanzi/models/minigptv2/minigptv2_checkpoint.pth' lora_r: 64 lora_alpha: 16 diff --git a/minigpt4/configs/datasets/coco/caption.yaml b/minigpt4/configs/datasets/coco/caption.yaml index ac072a4..d392064 100644 --- a/minigpt4/configs/datasets/coco/caption.yaml +++ b/minigpt4/configs/datasets/coco/caption.yaml @@ -15,7 +15,7 @@ datasets: train: url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json md5: aa31ac474cf6250ebb81d18348a07ed8 - storage: /path/to/coco_caption/coco_karpathy_train.json + storage: /mnt/pfs-guan-ssai/nlu/wanghanzi/data/COCO_Cap/coco_karpathy_train.json images: - storage: /path/to/coco/images + storage: /mnt/pfs-guan-ssai/nlu/dingyifeng/data/COCO diff --git a/minigpt4/configs/datasets/coco/defaults_vqa.yaml b/minigpt4/configs/datasets/coco/defaults_vqa.yaml index 457e0a3..4255cbe 100755 --- a/minigpt4/configs/datasets/coco/defaults_vqa.yaml +++ b/minigpt4/configs/datasets/coco/defaults_vqa.yaml @@ -16,9 +16,9 @@ datasets: - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json storage: - - /path/to/vqav2/vqa_train.json - - /path/to/vqav2/vqa_val.json + - /mnt/pfs-guan-ssai/nlu/wanghanzi/data/VQAv2/vqa_train.json + - /mnt/pfs-guan-ssai/nlu/wanghanzi/data/VQAv2/vqa_val.json images: - storage: /path/to/coco/images + storage: /mnt/pfs-guan-ssai/nlu/dingyifeng/data/COCO \ No newline at end of file diff --git a/minigpt4/configs/models/minigpt_v2.yaml b/minigpt4/configs/models/minigpt_v2.yaml index 1d85d20..f67fd0e 100755 --- a/minigpt4/configs/models/minigpt_v2.yaml +++ b/minigpt4/configs/models/minigpt_v2.yaml @@ -11,7 +11,8 @@ model: # generation configs prompt: "" - llama_model: "please set this value to the path of llama2-chat-7b" + # llama_model: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/llama_2_7b_chat" + llama_model: "/mnt/pfs-guan-ssai/nlu/data/luhengtong/llama2_model/meta-llama:Llama-2-7b-chat-hf" lora_r: 64 lora_alpha: 16 diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..fe2b17a --- /dev/null +++ b/test.txt @@ -0,0 +1,92 @@ +datasets: + multitask_conversation: + batch_size: 2 + sample_ratio: 50 + + llava_conversation: + batch_size: 2 + sample_ratio: 30 + + unnatural_instruction: + batch_size: 1 + sample_ratio: 10 + + refvg: + batch_size: 6 + sample_ratio: 40 + + llava_detail: + batch_size: 4 + sample_ratio: 20 + + llava_reason: + batch_size: 4 + sample_ratio: 80 + + + flickr_grounded_caption: + batch_size: 2 + sample_ratio: 80 + + flickr_CaptionToPhrase: + batch_size: 2 + sample_ratio: 80 + + flickr_ObjectToPhrase: + batch_size: 2 + sample_ratio: 80 + + coco_caption: + batch_size: 6 + sample_ratio: 10 + + + textcaps_caption: + batch_size: 6 + sample_ratio: 30 + + refcoco: + batch_size: 6 + sample_ratio: 25 + + + refcocop: + batch_size: 6 + sample_ratio: 25 + + refcocog: + batch_size: 6 + sample_ratio: 25 + + invrefcoco: + batch_size: 6 + sample_ratio: 10 + + invrefcocop: + batch_size: 6 + sample_ratio: 10 + + invrefcocog: + batch_size: 6 + sample_ratio: 10 + + + coco_vqa: + batch_size: 6 + sample_ratio: 15 + + ok_vqa: + batch_size: 6 + sample_ratio: 8 + + aok_vqa: + batch_size: 6 + sample_ratio: 12 + + gqa: + batch_size: 6 + sample_ratio: 50 + + ocrvqa: + batch_size: 6 + sample_ratio: 30 \ No newline at end of file diff --git a/train_configs/minigptv2_finetune.yaml b/train_configs/minigptv2_finetune.yaml index 114d7e9..3c6de7c 100644 --- a/train_configs/minigptv2_finetune.yaml +++ b/train_configs/minigptv2_finetune.yaml @@ -4,8 +4,8 @@ model: max_txt_len: 1024 image_size: 448 end_sym: "" - llama_model: "/path/to/llama_checkpoint" - ckpt: "/path/to/pretrained_checkpoint" + llama_model: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/llama_2_7b_chat" + ckpt: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/minigptv2/checkpoint_stage2.pth" use_grad_checkpoint: True chat_template: True lora_r: 64