raw minigptv2

2025-04-09 12:30:45 +00:00 · 2023-10-27 16:39:56 +08:00 · 2023-10-27 16:39:56 +08:00 · 5d4e8a3d43
commit 5d4e8a3d43
parent 56a2fd6796
7 changed files with 104 additions and 9 deletions
--- a/environment.yml
+++ b/environment.yml
@ -31,3 +31,5 @@ dependencies:
    - accelerate==0.20.3
    - bitsandbytes==0.37.0
    - wandb
+    - visual_genome
+    - scikit-image
--- a/eval_configs/minigptv2_eval.yaml
+++ b/eval_configs/minigptv2_eval.yaml
@ -5,7 +5,7 @@ model:
  end_sym: "</s>"
  low_resource: True
  prompt_template: '[INST] {} [/INST]'
-  ckpt: 'please set this value to the path of pretrained checkpoint'
+  ckpt: '/mnt/pfs-guan-ssai/nlu/wanghanzi/models/minigptv2/minigptv2_checkpoint.pth'
  lora_r: 64
  lora_alpha: 16

--- a/minigpt4/configs/datasets/coco/caption.yaml
+++ b/minigpt4/configs/datasets/coco/caption.yaml
@ -15,7 +15,7 @@ datasets:
        train:
          url: https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json
          md5: aa31ac474cf6250ebb81d18348a07ed8
-          storage: /path/to/coco_caption/coco_karpathy_train.json
+          storage: /mnt/pfs-guan-ssai/nlu/wanghanzi/data/COCO_Cap/coco_karpathy_train.json
      images:
-        storage: /path/to/coco/images
+        storage: /mnt/pfs-guan-ssai/nlu/dingyifeng/data/COCO
        
--- a/minigpt4/configs/datasets/coco/defaults_vqa.yaml
+++ b/minigpt4/configs/datasets/coco/defaults_vqa.yaml
@ -16,9 +16,9 @@ datasets:
              - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_train.json
              - https://storage.googleapis.com/sfr-vision-language-research/LAVIS/datasets/vqav2/vqa_val.json
          storage:
-              - /path/to/vqav2/vqa_train.json
-              - /path/to/vqav2/vqa_val.json
+              - /mnt/pfs-guan-ssai/nlu/wanghanzi/data/VQAv2/vqa_train.json
+              - /mnt/pfs-guan-ssai/nlu/wanghanzi/data/VQAv2/vqa_val.json
      images:
-          storage: /path/to/coco/images
+          storage: /mnt/pfs-guan-ssai/nlu/dingyifeng/data/COCO

  
--- a/minigpt4/configs/models/minigpt_v2.yaml
+++ b/minigpt4/configs/models/minigpt_v2.yaml
@ -11,7 +11,8 @@ model:
  # generation configs
  prompt: ""

-  llama_model: "please set this value to the path of llama2-chat-7b"
+  # llama_model: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/llama_2_7b_chat"
+  llama_model: "/mnt/pfs-guan-ssai/nlu/data/luhengtong/llama2_model/meta-llama:Llama-2-7b-chat-hf"
  lora_r: 64
  lora_alpha: 16

--- a/test.txt
+++ b/test.txt
@ -0,0 +1,92 @@
+datasets:
+  multitask_conversation:
+    batch_size: 2
+    sample_ratio: 50
+
+  llava_conversation: 
+    batch_size: 2
+    sample_ratio: 30
+
+  unnatural_instruction:
+    batch_size: 1
+    sample_ratio: 10
+
+  refvg:
+    batch_size: 6
+    sample_ratio: 40
+
+  llava_detail:
+    batch_size: 4
+    sample_ratio: 20
+
+  llava_reason: 
+    batch_size: 4
+    sample_ratio: 80
+    
+
+  flickr_grounded_caption:
+    batch_size: 2
+    sample_ratio: 80
+
+  flickr_CaptionToPhrase:
+    batch_size: 2
+    sample_ratio: 80
+
+  flickr_ObjectToPhrase:
+    batch_size: 2
+    sample_ratio: 80
+
+  coco_caption:
+    batch_size: 6
+    sample_ratio: 10  
+
+    
+  textcaps_caption:  
+    batch_size: 6
+    sample_ratio: 30
+
+  refcoco: 
+    batch_size: 6
+    sample_ratio: 25
+
+
+  refcocop:
+    batch_size: 6
+    sample_ratio: 25
+
+  refcocog:
+    batch_size: 6
+    sample_ratio: 25
+
+  invrefcoco:
+    batch_size: 6
+    sample_ratio: 10
+
+  invrefcocop:
+    batch_size: 6
+    sample_ratio: 10
+
+  invrefcocog:
+    batch_size: 6
+    sample_ratio: 10
+
+
+  coco_vqa:    
+    batch_size: 6
+    sample_ratio: 15
+
+  ok_vqa:   
+    batch_size: 6
+    sample_ratio: 8
+
+  aok_vqa: 
+    batch_size: 6
+    sample_ratio: 12
+
+  gqa:  
+    batch_size: 6
+    sample_ratio: 50
+
+  ocrvqa: 
+    batch_size: 6
+    sample_ratio: 30
--- a/train_configs/minigptv2_finetune.yaml
+++ b/train_configs/minigptv2_finetune.yaml
@ -4,8 +4,8 @@ model:
  max_txt_len: 1024
  image_size: 448
  end_sym: "</s>"
-  llama_model: "/path/to/llama_checkpoint"
-  ckpt: "/path/to/pretrained_checkpoint"
+  llama_model: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/llama_2_7b_chat"
+  ckpt: "/mnt/pfs-guan-ssai/nlu/wanghanzi/models/minigptv2/checkpoint_stage2.pth"
  use_grad_checkpoint: True
  chat_template: True
  lora_r: 64