MiniGPT-4/eval_scripts/eval_vqa.py

253 lines
9.7 KiB
Python
Raw Normal View History

2023-10-25 18:54:46 +00:00
import os
import re
import json
import argparse
from collections import defaultdict
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from datasets import load_dataset
2023-10-29 07:41:16 +00:00
from minigpt4.datasets.datasets.vqa_datasets import OKVQAEvalData,VizWizEvalData,IconQAEvalData,GQAEvalData,VSREvalData,HMEvalData
2023-10-25 18:54:46 +00:00
from minigpt4.common.vqa_tools.VQA.PythonHelperTools.vqaTools.vqa import VQA
from minigpt4.common.vqa_tools.VQA.PythonEvaluationTools.vqaEvaluation.vqaEval import VQAEval
from minigpt4.common.eval_utils import prepare_texts, init_model, eval_parser
2023-10-29 07:41:16 +00:00
from minigpt4.conversation.conversation import CONV_VISION_minigptv2
2023-11-01 06:33:48 +00:00
from minigpt4.common.config import Config
2023-10-25 18:54:46 +00:00
def list_of_str(arg):
return list(map(str, arg.split(',')))
parser = eval_parser()
parser.add_argument("--dataset", type=list_of_str, default='refcoco', help="dataset to evaluate")
args = parser.parse_args()
2023-11-01 06:33:48 +00:00
cfg = Config(args)
2023-10-25 18:54:46 +00:00
2023-10-31 01:56:49 +00:00
2023-10-25 18:54:46 +00:00
model, vis_processor = init_model(args)
2023-10-31 01:56:49 +00:00
conv_temp = CONV_VISION_minigptv2.copy()
2023-10-25 18:54:46 +00:00
conv_temp.system = ""
model.eval()
2023-11-01 08:39:18 +00:00
save_path = cfg.run_cfg.save_path
2023-11-01 08:05:51 +00:00
2023-10-25 18:54:46 +00:00
if 'okvqa' in args.dataset:
2023-11-01 08:05:51 +00:00
eval_file_path = cfg.evaluation_datasets_cfg["okvqa"]["eval_file_path"]
img_path = cfg.evaluation_datasets_cfg["okvqa"]["img_path"]
batch_size = cfg.evaluation_datasets_cfg["okvqa"]["batch_size"]
max_new_tokens = cfg.evaluation_datasets_cfg["okvqa"]["max_new_tokens"]
2023-11-01 08:39:18 +00:00
2023-11-01 08:05:51 +00:00
2023-11-01 08:39:18 +00:00
evaluation_annntation_path = os.path.join(eval_file_path, "okvqa_test_split.json")
with open(evaluation_annntation_path) as f:
2023-10-25 18:54:46 +00:00
ok_vqa_test_split = json.load(f)
2023-11-01 06:33:48 +00:00
data = OKVQAEvalData(ok_vqa_test_split, vis_processor, img_path)
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
2023-10-25 18:54:46 +00:00
minigpt4_predict = []
for images, questions, question_ids, img_ids in eval_dataloader:
texts = prepare_texts(questions, conv_temp) # warp the texts with conversation template
2023-11-01 06:33:48 +00:00
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
2023-10-25 18:54:46 +00:00
for answer, question_id, question, img_id in zip(answers, question_ids, questions, img_ids):
result = dict()
answer = answer.lower().replace('<unk>','').strip()
result['answer'] = answer
result['question_id'] = int(question_id)
2023-10-31 01:56:49 +00:00
minigpt4_predict.append(result)
2023-11-01 08:51:39 +00:00
file_save_path= os.path.join(save_path,"okvqa.json")
2023-11-01 08:39:18 +00:00
with open(file_save_path,'w') as f:
2023-10-25 18:54:46 +00:00
json.dump(minigpt4_predict, f)
2023-11-01 06:33:48 +00:00
annFile = os.path.join(eval_file_path,"mscoco_val2014_annotations_clean.json")
quesFile = os.path.join(eval_file_path,"OpenEnded_mscoco_val2014_questions_clean.json" )
2023-10-25 18:54:46 +00:00
vqa = VQA(annFile, quesFile)
2023-11-01 08:39:18 +00:00
vqaRes = vqa.loadRes(file_save_path, quesFile)
2023-10-25 18:54:46 +00:00
vqaEval = VQAEval(vqa, vqaRes, n=2)
vqaEval.evaluate()
print ("Overall OKVQA Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']), flush=True)
if 'vizwiz' in args.dataset:
2023-11-01 08:05:51 +00:00
eval_file_path = cfg.evaluation_datasets_cfg["vizwiz"]["eval_file_path"]
img_path = cfg.evaluation_datasets_cfg["vizwiz"]["img_path"]
batch_size = cfg.evaluation_datasets_cfg["vizwiz"]["batch_size"]
max_new_tokens = cfg.evaluation_datasets_cfg["vizwiz"]["max_new_tokens"]
2023-11-01 06:33:48 +00:00
vizwiz = json.load(open(eval_file_path, 'r'))
2023-10-25 18:54:46 +00:00
data = VizWizEvalData(vizwiz, vis_processor, img_path)
2023-11-01 06:33:48 +00:00
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
2023-10-25 18:54:46 +00:00
minigpt4_predict = []
total_acc = []
for images, texts, gt_answers in tqdm(eval_dataloader):
texts = prepare_texts(texts, conv_temp) # warp the texts with conversation template
with torch.no_grad():
2023-11-01 06:33:48 +00:00
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False,repetition_penalty=1.0)
2023-10-25 18:54:46 +00:00
for answer, gt_answer in zip(answers, gt_answers):
result = dict()
result['answer'] = answer.replace('<unk>','').strip()
minigpt4_predict.append(result)
count=0
gt_answer = gt_answer.split('_')
for gt in gt_answer:
if gt.lower() == answer.lower():
count += 1
acc = min(count/3.0, 1.0)
total_acc.append(acc)
2023-11-01 08:39:18 +00:00
file_save_path = os.path.join(save_path, "vizwiz.json")
with open(file_save_path,'w') as f:
2023-10-25 18:54:46 +00:00
json.dump(minigpt4_predict, f)
print('vizwiz Acc: ', np.average(total_acc)* 100.0, flush=True)
2023-10-31 01:56:49 +00:00
if 'iconvqa' in args.dataset:
2023-11-01 08:05:51 +00:00
eval_file_path = cfg.evaluation_datasets_cfg["iconvqa"]["eval_file_path"]
img_path = cfg.evaluation_datasets_cfg["iconvqa"]["img_path"]
batch_size = cfg.evaluation_datasets_cfg["iconvqa"]["batch_size"]
max_new_tokens = cfg.evaluation_datasets_cfg["iconvqa"]["max_new_tokens"]
2023-11-01 06:33:48 +00:00
iconqa_text_val = json.load(open(eval_file_path,"r"))
2023-10-25 18:54:46 +00:00
data = IconQAEvalData(iconqa_text_val, vis_processor, img_path)
2023-11-01 06:33:48 +00:00
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
2023-10-25 18:54:46 +00:00
count = 0
for images, texts, candidates, answers in tqdm(eval_dataloader):
candidates = [candidate.split('_') for candidate in candidates]
num_cand = [len(candidate) for candidate in candidates]
for candidate in candidates:
candidate.extend(['none'] * (max(num_cand) - len(candidate)))
candidates = [list(x) for x in zip(*candidates)]
2023-11-01 06:33:48 +00:00
instructions = ["<s>[INST] <Img><ImageHere></Img> {} [/INST]".format(text) for text in texts]
2023-10-25 18:54:46 +00:00
answer_ranks = model.multi_select(images, instructions, candidates, num_cand=num_cand)
for idx, answer in enumerate(answers):
if answer_ranks[idx][0] == answer:
count += 1
print('iconqa Acc: ', count / len(iconqa_text_val) * 100.0, flush=True)
if 'gqa' in args.dataset:
2023-11-01 08:05:51 +00:00
eval_file_path = cfg.evaluation_datasets_cfg["gqa"]["eval_file_path"]
img_path = cfg.evaluation_datasets_cfg["gqa"]["img_path"]
batch_size = cfg.evaluation_datasets_cfg["gqa"]["batch_size"]
max_new_tokens = cfg.evaluation_datasets_cfg["gqa"]["max_new_tokens"]
2023-11-01 06:33:48 +00:00
gqa = json.load(open(eval_file_path))
2023-10-25 18:54:46 +00:00
data = GQAEvalData(gqa, vis_processor, img_path)
2023-11-01 06:33:48 +00:00
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
2023-10-25 18:54:46 +00:00
count=0
total=0
minigpt4_predict = []
for images, texts, labels in tqdm(eval_dataloader):
texts = prepare_texts(texts, conv_temp) # warp the texts with conversation template
2023-11-01 06:33:48 +00:00
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
2023-10-25 18:54:46 +00:00
for answer, label in zip(answers, labels):
result = dict()
result['pred'] = answer.lower().replace('<unk>','').strip()
result['gt'] = label
minigpt4_predict.append(result)
if answer.lower() == label:
count+=1
total+=1
print('gqa val:', count / total * 100, flush=True)
2023-11-01 08:39:18 +00:00
file_save_path = os.path.join(save_path, "gqa.json")
with open(file_save_path,'w') as f:
2023-10-25 18:54:46 +00:00
json.dump(minigpt4_predict, f)
if 'vsr' in args.dataset:
2023-11-01 08:05:51 +00:00
img_path = cfg.evaluation_datasets_cfg["vsr"]["img_path"]
batch_size = cfg.evaluation_datasets_cfg["vsr"]["batch_size"]
max_new_tokens = cfg.evaluation_datasets_cfg["vsr"]["max_new_tokens"]
2023-10-31 06:04:43 +00:00
annotation = load_dataset("cambridgeltl/vsr_zeroshot", split='test')
2023-10-25 18:54:46 +00:00
data = VSREvalData(annotation, vis_processor, img_path)
2023-11-01 06:33:48 +00:00
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
2023-10-25 18:54:46 +00:00
count=0
total=0
minigpt4_predict = []
for images, texts, labels in tqdm(eval_dataloader):
texts = prepare_texts(texts, conv_temp) # warp the texts with conversation template
2023-11-01 06:33:48 +00:00
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
2023-10-25 18:54:46 +00:00
for answer, label in zip(answers, labels):
result = dict()
result['pred'] = answer.replace('<unk>','').strip()
result['gt'] = label
minigpt4_predict.append(result)
if answer.lower() == label.lower():
count+=1
total+=1
print('vsr test:', count / total * 100, flush=True)
2023-11-01 08:39:18 +00:00
file_save_path = os.path.join(save_path,"vsr.json")
with open(file_save_path,'w') as f:
2023-10-25 18:54:46 +00:00
json.dump(minigpt4_predict, f)
if 'hm' in args.dataset:
2023-11-01 08:05:51 +00:00
eval_file_path = cfg.evaluation_datasets_cfg["hm"]["eval_file_path"]
img_path = cfg.evaluation_datasets_cfg["hm"]["img_path"]
batch_size = cfg.evaluation_datasets_cfg["hm"]["batch_size"]
max_new_tokens = cfg.evaluation_datasets_cfg["hm"]["max_new_tokens"]
2023-10-25 18:54:46 +00:00
annotation = []
2023-11-01 06:33:48 +00:00
with open(eval_file_path, 'r') as jsonl_file:
2023-10-25 18:54:46 +00:00
for line in jsonl_file:
json_obj = json.loads(line)
annotation.append(json_obj)
data = HMEvalData(annotation, vis_processor, img_path)
2023-11-01 06:33:48 +00:00
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
2023-10-25 18:54:46 +00:00
count=0
total=0
minigpt4_predict = []
for images, texts, labels in tqdm(eval_dataloader):
texts = prepare_texts(texts, conv_temp) # warp the texts with conversation template
2023-10-31 01:56:49 +00:00
2023-11-01 06:33:48 +00:00
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
2023-10-25 18:54:46 +00:00
for answer, label in zip(answers, labels):
result = dict()
2023-10-31 01:56:49 +00:00
if answer.lower().strip() =="yes":
answer=1
elif answer.lower().strip()=="no":
answer=0
else:
2023-11-01 06:33:48 +00:00
print("non-matching answer",answer)
2023-10-31 01:56:49 +00:00
result['pred'] = answer
2023-10-25 18:54:46 +00:00
result['gt'] = int(label)
minigpt4_predict.append(result)
if answer == label:
count+=1
total+=1
2023-11-01 08:51:39 +00:00
2023-10-25 18:54:46 +00:00
print('hm val:', count / total * 100, flush=True)
2023-11-01 08:39:18 +00:00
file_save_path = os.path.join(save_path, "hm.json")
with open(file_save_path,'w') as f:
2023-10-25 18:54:46 +00:00
json.dump(minigpt4_predict, f)