diff --git a/eval_configs/minigpt4_llama2_eval.yaml b/eval_configs/minigpt4_llama2_eval.yaml index 19e9c2f..03c6c77 100644 --- a/eval_configs/minigpt4_llama2_eval.yaml +++ b/eval_configs/minigpt4_llama2_eval.yaml @@ -13,7 +13,7 @@ datasets: vis_processor: train: name: "blip2_image_eval" - image_size: 512 + image_size: 224 text_processor: train: name: "blip_caption" diff --git a/minigpt4/configs/models/minigpt4_llama2.yaml b/minigpt4/configs/models/minigpt4_llama2.yaml index 42334d6..1162b0e 100644 --- a/minigpt4/configs/models/minigpt4_llama2.yaml +++ b/minigpt4/configs/models/minigpt4_llama2.yaml @@ -2,7 +2,7 @@ model: arch: mini_gpt4 # vit encoder - image_size: 512 + image_size: 224 drop_path_rate: 0 use_grad_checkpoint: False vit_precision: "fp16" @@ -18,10 +18,10 @@ preprocess: vis_processor: train: name: "blip2_image_train" - image_size: 512 + image_size: 224 eval: name: "blip2_image_eval" - image_size: 512 + image_size: 224 text_processor: train: name: "blip_caption" diff --git a/train_configs/minigpt4_llama2_stage2_finetune.yaml b/train_configs/minigpt4_llama2_stage2_finetune.yaml index 052c1ae..1dd707b 100644 --- a/train_configs/minigpt4_llama2_stage2_finetune.yaml +++ b/train_configs/minigpt4_llama2_stage2_finetune.yaml @@ -14,7 +14,7 @@ datasets: vis_processor: train: name: "blip2_image_train" - image_size: 512 + image_size: 224 text_processor: train: name: "blip_caption"