From 76cad1ebfd0f2bb68d81cc38955823de8d716536 Mon Sep 17 00:00:00 2001 From: junchen14 Date: Wed, 1 Nov 2023 11:05:51 +0300 Subject: [PATCH] update evaluation readme --- .../minigptv2_benchmark_evaluation.yaml | 66 +++++++++++++++++-- eval_scripts/EVAL_README.md | 2 +- eval_scripts/eval_ref.py | 15 +++-- eval_scripts/eval_vqa.py | 52 +++++++++++++-- minigpt4/common/config.py | 29 +++++++- minigpt4/models/base_model.py | 2 +- 6 files changed, 144 insertions(+), 22 deletions(-) diff --git a/eval_configs/minigptv2_benchmark_evaluation.yaml b/eval_configs/minigptv2_benchmark_evaluation.yaml index 0977f82..c7c738b 100644 --- a/eval_configs/minigptv2_benchmark_evaluation.yaml +++ b/eval_configs/minigptv2_benchmark_evaluation.yaml @@ -5,8 +5,8 @@ model: end_sym: "" low_resource: False prompt_template: '[INST] {} [/INST]' - llama_model: "" - ckpt: "" + llama_model: "/ibex/project/c2133/llama_v2/llama-2-7b-chat-pytorch_update" + ckpt: "/ibex/project/c2133/minigpt_checkpoints/checkpoint_stage3_correct/checkpoint_10.pth" lora_r: 64 lora_alpha: 16 @@ -21,14 +21,66 @@ datasets: train: name: "blip_caption" +evaluation_datasets: + refcoco: + eval_file_path: /ibex/project/c2133/minigpt4_v2_dataset + img_path: /ibex/ai/reference/CV/COCO/cocoapi/data/2014/images/jpeg/train + save_path: results/refcoco.json + max_new_tokens: 20 + batch_size: 10 + refcocog: + eval_file_path: /path/to/eval/annotation/path + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + refcoco+: + eval_file_path: /path/to/eval/annotation/path + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + gqa: + eval_file_path: /path/to/eval/annotation/path + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + okvqa: + eval_file_path: /path/to/eval/annotation/path + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + vizwiz: + eval_file_path: /path/to/eval/annotation/path + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + iconvqa: + eval_file_path: /path/to/eval/annotation/path + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + vsr: + eval_file_path: cambridgeltl/vsr_zeroshot + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + hm: + eval_file_path: /path/to/eval/annotation/path + img_path: /path/to/eval/image/path + save_path: /path/to/save/path + max_new_tokens: 20 + batch_size: 10 + run: task: image_text_pretrain - max_new_tokens: 20 name: minigptv2_evaluation - batch_size: 10 - eval_file_path: /path/to/eval/annotation/path # annotation file - img_path: /path/to/eval/image/path # image file path - save_path: /path/to/save/path # saved result + diff --git a/eval_scripts/EVAL_README.md b/eval_scripts/EVAL_README.md index 7885c9a..7db1997 100644 --- a/eval_scripts/EVAL_README.md +++ b/eval_scripts/EVAL_README.md @@ -80,7 +80,7 @@ dataset names: ``` torchrun --master-port ${port} --nproc_per_node 1 eval_ref.py \ - --cfg-path ${cfg_path} --dataset dataset_name + --cfg-path ${cfg_path} --dataset dataset_name --resample ``` diff --git a/eval_scripts/eval_ref.py b/eval_scripts/eval_ref.py index a29856f..b538d9c 100644 --- a/eval_scripts/eval_ref.py +++ b/eval_scripts/eval_ref.py @@ -30,6 +30,7 @@ eval_dict = {'refcoco': ['val','testA','testB'], 'refcoco+': ['val','testA','testB'], 'refcocog': ['val','test']} + model, vis_processor = init_model(args) model.eval() CONV_VISION = CONV_VISION_minigptv2 @@ -39,13 +40,17 @@ conv_temp.system = "" # model.eval() -eval_file_path = cfg.run_cfg.eval_file_path -img_path = cfg.run_cfg.img_path -batch_size = cfg.run_cfg.batch_size -max_new_tokens = cfg.run_cfg.max_new_tokens + for dataset in args.dataset: for split in eval_dict[dataset]: + + eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] + img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] + batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] + max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] + save_path = cfg.evaluation_datasets_cfg[dataset]["save_path"] + with open(os.path.join(eval_file_path,f"{dataset}/{dataset}_{split}.json"), 'r') as f: refcoco = json.load(f) @@ -83,7 +88,7 @@ for dataset in args.dataset: if len(resamples) == 0: break - with open(save_path,'w') as f: + with open(os.path.join(save_path,f"{args.dataset}_{split}.json"),'w') as f: json.dump(minigpt4_predict, f) count=0 diff --git a/eval_scripts/eval_vqa.py b/eval_scripts/eval_vqa.py index 14c2e1f..68ca951 100644 --- a/eval_scripts/eval_vqa.py +++ b/eval_scripts/eval_vqa.py @@ -36,15 +36,18 @@ conv_temp = CONV_VISION_minigptv2.copy() conv_temp.system = "" model.eval() -eval_file_path = cfg.run_cfg.eval_file_path -img_path=cfg.run_cfg.img_path -save_path = cfg.run_cfg.save_path -batch_size = cfg.run_cfg.batch_size -max_new_tokens = cfg.run_cfg.max_new_tokens + if 'okvqa' in args.dataset: - evaluation_annntation_path = os.path.join(eval_file_path, "okvqa_test_split.json") - with open(evaluation_annntation_path) as f: + + eval_file_path = cfg.evaluation_datasets_cfg["okvqa"]["eval_file_path"] + img_path = cfg.evaluation_datasets_cfg["okvqa"]["img_path"] + batch_size = cfg.evaluation_datasets_cfg["okvqa"]["batch_size"] + max_new_tokens = cfg.evaluation_datasets_cfg["okvqa"]["max_new_tokens"] + save_path = cfg.evaluation_datasets_cfg["okvqa"]["save_path"] + + # evaluation_annntation_path = os.path.join(eval_file_path, "okvqa_test_split.json") + with open(eval_file_path) as f: ok_vqa_test_split = json.load(f) data = OKVQAEvalData(ok_vqa_test_split, vis_processor, img_path) @@ -76,6 +79,13 @@ if 'okvqa' in args.dataset: print ("Overall OKVQA Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']), flush=True) if 'vizwiz' in args.dataset: + + eval_file_path = cfg.evaluation_datasets_cfg["vizwiz"]["eval_file_path"] + img_path = cfg.evaluation_datasets_cfg["vizwiz"]["img_path"] + batch_size = cfg.evaluation_datasets_cfg["vizwiz"]["batch_size"] + max_new_tokens = cfg.evaluation_datasets_cfg["vizwiz"]["max_new_tokens"] + save_path = cfg.evaluation_datasets_cfg["vizwiz"]["save_path"] + vizwiz = json.load(open(eval_file_path, 'r')) data = VizWizEvalData(vizwiz, vis_processor, img_path) @@ -105,6 +115,14 @@ if 'vizwiz' in args.dataset: if 'iconvqa' in args.dataset: + + eval_file_path = cfg.evaluation_datasets_cfg["iconvqa"]["eval_file_path"] + img_path = cfg.evaluation_datasets_cfg["iconvqa"]["img_path"] + batch_size = cfg.evaluation_datasets_cfg["iconvqa"]["batch_size"] + max_new_tokens = cfg.evaluation_datasets_cfg["iconvqa"]["max_new_tokens"] + save_path = cfg.evaluation_datasets_cfg["iconvqa"]["save_path"] + + iconqa_text_val = json.load(open(eval_file_path,"r")) data = IconQAEvalData(iconqa_text_val, vis_processor, img_path) @@ -127,6 +145,13 @@ if 'iconvqa' in args.dataset: if 'gqa' in args.dataset: + + eval_file_path = cfg.evaluation_datasets_cfg["gqa"]["eval_file_path"] + img_path = cfg.evaluation_datasets_cfg["gqa"]["img_path"] + batch_size = cfg.evaluation_datasets_cfg["gqa"]["batch_size"] + max_new_tokens = cfg.evaluation_datasets_cfg["gqa"]["max_new_tokens"] + save_path = cfg.evaluation_datasets_cfg["gqa"]["save_path"] + gqa = json.load(open(eval_file_path)) data = GQAEvalData(gqa, vis_processor, img_path) eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) @@ -151,6 +176,12 @@ if 'gqa' in args.dataset: json.dump(minigpt4_predict, f) if 'vsr' in args.dataset: + + img_path = cfg.evaluation_datasets_cfg["vsr"]["img_path"] + batch_size = cfg.evaluation_datasets_cfg["vsr"]["batch_size"] + max_new_tokens = cfg.evaluation_datasets_cfg["vsr"]["max_new_tokens"] + save_path = cfg.evaluation_datasets_cfg["vsr"]["save_path"] + annotation = load_dataset("cambridgeltl/vsr_zeroshot", split='test') data = VSREvalData(annotation, vis_processor, img_path) eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) @@ -176,6 +207,13 @@ if 'vsr' in args.dataset: json.dump(minigpt4_predict, f) if 'hm' in args.dataset: + + eval_file_path = cfg.evaluation_datasets_cfg["hm"]["eval_file_path"] + img_path = cfg.evaluation_datasets_cfg["hm"]["img_path"] + batch_size = cfg.evaluation_datasets_cfg["hm"]["batch_size"] + max_new_tokens = cfg.evaluation_datasets_cfg["hm"]["max_new_tokens"] + save_path = cfg.evaluation_datasets_cfg["hm"]["save_path"] + annotation = [] with open(eval_file_path, 'r') as jsonl_file: for line in jsonl_file: diff --git a/minigpt4/common/config.py b/minigpt4/common/config.py index e184b1f..5fe7239 100644 --- a/minigpt4/common/config.py +++ b/minigpt4/common/config.py @@ -29,6 +29,7 @@ class Config: runner_config = self.build_runner_config(config) model_config = self.build_model_config(config, **user_config) dataset_config = self.build_dataset_config(config) + evaluation_dataset_config = self.build_evaluation_dataset_config(config) # Validate the user-provided runner configuration # model and dataset configuration are supposed to be validated by the respective classes @@ -37,7 +38,7 @@ class Config: # Override the default configuration with user options. self.config = OmegaConf.merge( - runner_config, model_config, dataset_config, user_config + runner_config, model_config, dataset_config,evaluation_dataset_config, user_config ) def _validate_runner_config(self, runner_config): @@ -111,6 +112,28 @@ class Config: return dataset_config + + @staticmethod + def build_evaluation_dataset_config(config): + datasets = config.get("evaluation_datasets", None) + if datasets is None: + raise KeyError( + "Expecting 'datasets' as the root key for dataset configuration." + ) + + dataset_config = OmegaConf.create() + + for dataset_name in datasets: + builder_cls = registry.get_builder_class(dataset_name) + + # hierarchy override, customized config > default config + dataset_config = OmegaConf.merge( + dataset_config, + {"evaluation_datasets": {dataset_name: config["evaluation_datasets"][dataset_name]}}, + ) + + return dataset_config + def _convert_to_dot_list(self, opts): if opts is None: opts = [] @@ -136,6 +159,10 @@ class Config: def datasets_cfg(self): return self.config.datasets + @property + def evaluation_datasets_cfg(self): + return self.config.evaluation_datasets + @property def model_cfg(self): return self.config.model diff --git a/minigpt4/models/base_model.py b/minigpt4/models/base_model.py index b01273e..d70ca18 100644 --- a/minigpt4/models/base_model.py +++ b/minigpt4/models/base_model.py @@ -184,7 +184,7 @@ class BaseModel(nn.Module): else: llama_model = LlamaForCausalLM.from_pretrained( llama_model_path, - torch_dtype=torch.float32, + torch_dtype=torch.float16, ) if lora_r > 0: