diff --git a/eval_configs/minigptv2_eval.yaml b/eval_configs/minigptv2_eval.yaml
index dedcde0..00c21ed 100644
--- a/eval_configs/minigptv2_eval.yaml
+++ b/eval_configs/minigptv2_eval.yaml
@@ -3,10 +3,10 @@ model:
   model_type: pretrain
   max_txt_len: 500
   end_sym: "</s>"
-  low_resource: True
+  low_resource: False
   prompt_template: '[INST] {} [/INST]'
   llama_model: "/ibex/project/c2133/llama_v2/llama-2-7b-chat-pytorch_update"
-  ckpt: "/ibex/ai/project/c2090/minigpt4_ckpt/448_conversation_correct_best_v7_ablation1_v5_v6/20231007035/checkpoint_35.pth"
+  ckpt: ""
   lora_r: 64
   lora_alpha: 16
 
diff --git a/eval_scripts/EVAL_README.md b/eval_scripts/EVAL_README.md
new file mode 100644
index 0000000..9285f31
--- /dev/null
+++ b/eval_scripts/EVAL_README.md
@@ -0,0 +1,53 @@
+## Evaluation Instruction for MiniGPT-v2
+
+### Data preparation
+
+
+
+### environment setup
+
+```
+export PYTHONPATH=$PYTHONPATH:/path/to/directory/of/MiniGPT-4
+```
+
+### start evalauting RefCOCO, RefCOCO+, RefCOCOg
+port=port_number
+cfg_path=/path/to/eval_configs/minigptv2_eval.yaml
+eval_file_path=/path/to/eval/image/path
+save_path=/path/to/save/path
+ckpt=/path/to/evaluation/checkpoint
+
+
+split=/evaluation/data/split/type  # e.g. val, testA, testB, test
+dataset=/data/type  #refcoco, refcoco+, refcocog
+
+```
+torchrun --master-port ${port} --nproc_per_node 1 eval_ref.py \
+ --cfg-path ${cfg_path} --img_path ${IMG_PATH} --eval_file_path ${eval_file_path} --save_path ${save_path} \
+ --ckpt ${ckpt} --split ${split}  --dataset ${dataset} --lora_r 64 --lora_alpha 16 \
+ --batch_size 10 --max_new_tokens 20 --resample
+```
+
+
+### start evaluating visual question answering
+
+port=port_number
+cfg_path=/path/to/eval_configs/minigptv2_eval.yaml
+eval_file_path=/path/to/eval/image/path
+save_path=/path/to/save/path
+ckpt=/path/to/evaluation/checkpoint
+
+
+split=/evaluation/data/split/type  # e.g. val,test
+dataset=/data/type  # vqa data types: okvqa, vizwiz, iconvqa, gqa, vsr, hm
+
+```
+torchrun --master-port ${port} --nproc_per_node 1 eval_ref.py \
+ --cfg-path ${cfg_path} --img_path ${IMG_PATH} --eval_file_path ${eval_file_path} --save_path ${save_path} \
+ --ckpt ${ckpt} --split ${split}  --dataset ${dataset} --lora_r 64 --lora_alpha 16 \
+ --batch_size 10 --max_new_tokens 20 --resample
+```
+
+
+
+
diff --git a/eval_scripts/eval_ref.py b/eval_scripts/eval_ref.py
index 9ef700e..5f8e60f 100644
--- a/eval_scripts/eval_ref.py
+++ b/eval_scripts/eval_ref.py
@@ -11,7 +11,7 @@ import torch
 from torch.utils.data import DataLoader
 
 from minigpt4.common.eval_utils import prepare_texts, init_model, eval_parser, computeIoU
-from minigpt4.conversation.conversation import CONV_VISION_LLama2
+from minigpt4.conversation.conversation import CONV_VISION_minigptv2
 
 from minigpt4.datasets.datasets.coco_caption import RefCOCOEvalData
 
@@ -25,8 +25,10 @@ parser.add_argument("--res", type=float, default=100.0, help="resolution used in
 parser.add_argument("--resample", action='store_true', help="resolution used in refcoco")
 parser.add_argument("--img_path", type=str)
 parser.add_argument("--eval_file_path", type=str)
+parser.add_argument("--save_path", type=str)
 args = parser.parse_args()
 
+
 print(args.ckpt)
 print(args.name)
 
@@ -36,23 +38,20 @@ eval_dict = {'refcoco': args.split,
 
 model, vis_processor = init_model(args)
 model.eval()
-CONV_VISION = CONV_VISION_LLama2
+CONV_VISION = CONV_VISION_minigptv2
 conv_temp = CONV_VISION.copy()
 conv_temp.system = ""
-
+# 
 model.eval()
-img_path=f'{args.img_path}/COCO/cocoapi/data/2017/images/jpeg/train'
-    
+
 for dataset in args.dataset:
     for split in eval_dict[dataset]:
-        with open(f'{args.eval_file_path}/{dataset}/{dataset}_{split}.json', 'r') as f:
+        with open(os.path.join(args.eval_file_path,f"{dataset}/{dataset}_{split}.json"), 'r') as f:
             refcoco = json.load(f)
 
-        data = RefCOCOEvalData(refcoco, vis_processor, img_path)
+        data = RefCOCOEvalData(refcoco, vis_processor, args.img_path)
         eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
-
         minigpt4_predict = defaultdict(list)
-
         resamples = []
 
         for images, questions, img_ids in tqdm(eval_dataloader):
@@ -64,11 +63,10 @@ for dataset in args.dataset:
                 if re.match(pattern, answer):
                     minigpt4_predict[img_id].append(answer)
                 else:
-                    resamples.append({'img_id': img_id, 'sents': [question.replace('[refer] where is','').replace('?','').strip()]})
-        
+                    resamples.append({'img_id': img_id, 'sents': [question.replace('[refer] give me the location of','').strip()]})
         if args.resample:
             for i in range(20):
-                data = RefCOCOEvalData(resamples, vis_processor, img_path)
+                data = RefCOCOEvalData(resamples, vis_processor, args.img_path)
                 resamples = []
                 eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
                 for images, questions, img_ids in tqdm(eval_dataloader):
@@ -80,12 +78,12 @@ for dataset in args.dataset:
                         if re.match(pattern, answer) or i == 4:
                             minigpt4_predict[img_id].append(answer)
                         else:
-                            resamples.append({'img_id': img_id, 'sents': [question.replace('[refer] where is','').replace('?','').strip()]})
+                            resamples.append({'img_id': img_id, 'sents': [question.replace('[refer] give me the location of','').strip()]})
                             
                 if len(resamples) == 0:
                     break
 
-        with open(f'results/{args.name}_{dataset}_{split}.json','w') as f:
+        with open(args.save_path,'w') as f:
             json.dump(minigpt4_predict, f)
 
         count=0
diff --git a/eval_vqa.py b/eval_scripts/eval_vqa.py
similarity index 57%
rename from eval_vqa.py
rename to eval_scripts/eval_vqa.py
index b3dd07a..3d5cdfc 100644
--- a/eval_vqa.py
+++ b/eval_scripts/eval_vqa.py
@@ -13,44 +13,45 @@ from datasets import load_dataset
 
 
 from minigpt4.datasets.datasets.vqa_datasets import OKVQAEvalData,VizWizEvalData,IconQAEvalData,GQAEvalData,VSREvalData,HMEvalData
-
 from minigpt4.common.vqa_tools.VQA.PythonHelperTools.vqaTools.vqa import VQA
 from minigpt4.common.vqa_tools.VQA.PythonEvaluationTools.vqaEvaluation.vqaEval import VQAEval
 
 from minigpt4.common.eval_utils import prepare_texts, init_model, eval_parser
 from minigpt4.conversation.conversation import CONV_VISION_minigptv2
-import random
 
 
 def list_of_str(arg):
     return list(map(str, arg.split(',')))
 
-
 parser = eval_parser()
 parser.add_argument("--dataset", type=list_of_str, default='refcoco', help="dataset to evaluate")
 parser.add_argument("--split", type=list_of_str, default='testB', help="dataset split to evaluate")
 parser.add_argument("--resample", action='store_true', help="resolution used in refcoco")
 parser.add_argument("--img_path", type=str)
 parser.add_argument("--eval_file_path", type=str)
+parser.add_argument("--save_path", type=str)
 args = parser.parse_args()
 
+
 print(args.ckpt)
 print(args.name)
 
 model, vis_processor = init_model(args)
-conv_temp = CONV_VISION_LLama2.copy()
+conv_temp = CONV_VISION_minigptv2.copy()
 conv_temp.system = ""
 
+
+
 model.eval()
 
 os.makedirs('results', exist_ok=True)
 
 if 'okvqa' in args.dataset:
-    img_path=os.path.join(args.img_path,"train")
-    with open(os.path.join(args.eval_file_path,"ok_vqa/test_split.json")) as f:
+    evaluation_annntation_path = os.path.join(args.eval_file_path, "test_split.json")
+    with open(evaluation_annntation_path) as f:
         ok_vqa_test_split = json.load(f)
 
-    data = OKVQAEvalData(ok_vqa_test_split, vis_processor, img_path)
+    data = OKVQAEvalData(ok_vqa_test_split, vis_processor, args.img_path)
     eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
     minigpt4_predict = []
 
@@ -62,56 +63,27 @@ if 'okvqa' in args.dataset:
 
         for answer, question_id, question, img_id in zip(answers, question_ids, questions, img_ids):
             result = dict()
-            if "<unk>" in answer.lower():
-                print("answer: ", answer)
             answer = answer.lower().replace('<unk>','').strip()
             result['answer'] = answer
             result['question_id'] = int(question_id)
-            if answer == "":
-                resamples.append({'image_id': img_id, 'question_id':question_id, 'question': [question.replace('[vqa] Based on the image, respond to this question with a short answer:','').strip()]})
-            else:
-                minigpt4_predict.append(result)
+            minigpt4_predict.append(result)
 
-    if args.resample:
-        for i in range(20):
-            data = OKVQAEvalData(resamples, vis_processor, img_path)
-            resamples = []
-            eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
-            for images, questions, question_ids, img_ids in eval_dataloader:
-                texts = prepare_texts(questions, conv_temp)  # warp the texts with conversation template
-                answers = model.generate(images, texts, max_new_tokens=args.max_new_tokens, do_sample=False)
-                for answer, question_id, question in zip(answers, question_ids, questions):
-                    result = dict()
-                    answer = answer.lower().replace('<unk>','').strip()
-                    result['answer'] = answer
-                    result['question_id'] = int(question_id)
-                    minigpt4_predict.append(result)
-                    if answer == "":
-                        resamples.append({'image_id': img_id, 'question_id':question_id, 'question': [question.replace('[vqa] Based on the image, respond to this question with a short answer:','').strip()]})
-                    else:
-                        minigpt4_predict.append(result)
-            if len(resamples) == 0:
-                break
-
-    save_path=f'results/{args.name}_okvqa.json'
-    with open(save_path,'w') as f:
+    with open(args.save_path,'w') as f:
         json.dump(minigpt4_predict, f)
 
-    annFile     =f'{args.eval_file_path}/ok_vqa/mscoco_val2014_annotations_clean.json'
-    quesFile    =f'{args.eval_file_path}/ok_vqa/OpenEnded_mscoco_val2014_questions_clean.json'
+    annFile = os.path.join(args.eval_file_path,"mscoco_val2014_annotations_clean.json")
+    quesFile = os.path.join(args.eval_file_path,"OpenEnded_mscoco_val2014_questions_clean.json" )
 
     vqa = VQA(annFile, quesFile)
-    vqaRes = vqa.loadRes(save_path, quesFile)
+    vqaRes = vqa.loadRes(args.save_path, quesFile)
 
     vqaEval = VQAEval(vqa, vqaRes, n=2)
-
     vqaEval.evaluate()
-
     print ("Overall OKVQA Accuracy is: %.02f\n" %(vqaEval.accuracy['overall']), flush=True)
 
 if 'vizwiz' in args.dataset:
-    img_path=f'{args.img_path}/vizwiz/val'
-    vizwiz = json.load(open(f'{args.eval_file_path}/vizwiz/val.json', 'r'))
+    img_path= args.img_path
+    vizwiz = json.load(open(args.eval_file_path, 'r'))
 
     data = VizWizEvalData(vizwiz, vis_processor, img_path)
     eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
@@ -120,7 +92,7 @@ if 'vizwiz' in args.dataset:
     for images, texts, gt_answers in tqdm(eval_dataloader):
         texts = prepare_texts(texts, conv_temp)  # warp the texts with conversation template
         with torch.no_grad():
-            answers = model.generate(images, texts, max_new_tokens=args.max_new_tokens, do_sample=False)
+            answers = model.generate(images, texts, max_new_tokens=args.max_new_tokens, do_sample=False,repetition_penalty=1.0)
 
         for answer, gt_answer in zip(answers, gt_answers):
             result = dict()
@@ -134,52 +106,16 @@ if 'vizwiz' in args.dataset:
             acc = min(count/3.0, 1.0)
             total_acc.append(acc)
         
-    save_path=f'results/{args.name}_vizwiz.json'
+    save_path=args.save_path
     with open(save_path,'w') as f:
         json.dump(minigpt4_predict, f)
-
     print('vizwiz Acc: ', np.average(total_acc)* 100.0, flush=True)
 
-if 'aokvqa' in args.dataset:
-    img_path=f'{args.img_path}/aokvqa/images'
 
-    for split in args.split:
-        with open(f'{args.eval_file_path}/aokvqa/annotations/aokvqa_v1p0_{split}.json','r') as f:
-            aokvqa_v1p0 = json.load(f)
-            
-        data = AOKVQADAEvalData(aokvqa_v1p0, vis_processor, img_path)
-        eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
+if 'iconvqa' in args.dataset:
+    iconqa_text_val = json.load(open(args.eval_file_path,"r"))
+    img_path = args.img_path
 
-        minigpt4_predict = defaultdict(dict)
-
-        for images, texts, question_ids in tqdm(eval_dataloader):
-            texts = prepare_texts(texts, conv_temp)  # warp the texts with conversation template
-            answers = model.generate(images, texts, max_new_tokens=args.max_new_tokens, do_sample=False)
-
-            for answer, question_id in zip(answers, question_ids):
-                minigpt4_predict[question_id]['direct_answer'] = answer.lower().replace('<unk>','').strip()
-
-        data = AOKVQAMCEvalData(aokvqa_v1p0, vis_processor, img_path)
-        eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
-
-        for images, texts, question_ids, answers in tqdm(eval_dataloader):
-            instructions = ["[INST] <Img><ImageHere></Img> {} [/INST]".format(text) for text in texts]
-            answer_ranks = model.multi_select(images, instructions, answers)
-            candidates = [list(x) for x in zip(*answers)]
-            for idx, question_id in enumerate(question_ids):
-                minigpt4_predict[question_id]['multiple_choice'] = candidates[idx][answer_ranks[idx][0]]
-
-        save_path=f'results/{args.name}_a_okvqa_{split}.json'
-        with open(save_path,'w') as f:
-            json.dump(minigpt4_predict, f)
-
-        os.chdir('minigpt4/common/vqa_tools/aokvqa')
-        print(os.system(f'python evaluation/eval_predictions.py --aokvqa-dir {args.eval_file_path}/aokvqa/annotations --split {split} --preds ../../../../{save_path}'), flush=True)
-        os.chdir('../../../../')
-
-if 'iconqa' in args.dataset:
-    iconqa_text_val = json.load(open(f'{eval_file_path}/iconqa/choose_text_val.json','r'))
-    img_path = f'{args.img_path}/iconqa/val/choose_txt'
     data = IconQAEvalData(iconqa_text_val, vis_processor, img_path)
     eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
 
@@ -200,8 +136,8 @@ if 'iconqa' in args.dataset:
 
 
 if 'gqa' in args.dataset:
-    img_path = f'{args.img_path}/gqa/images/val'
-    gqa = json.load(open(f'{args.eval_file_path}/gqa/annotations/testdev_balanced_questions.json', 'r'))
+    img_path = args.img_path
+    gqa = json.load(open(args.eval_file_path))
     data = GQAEvalData(gqa, vis_processor, img_path)
     eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
     count=0
@@ -221,13 +157,13 @@ if 'gqa' in args.dataset:
             total+=1
     print('gqa val:', count / total * 100, flush=True)
 
-    save_path=f'results/{args.name}_gqa.json'
+    save_path=args.save_path
     with open(save_path,'w') as f:
         json.dump(minigpt4_predict, f)
 
 if 'vsr' in args.dataset:
-    annotation = load_dataset("cambridgeltl/vsr_zeroshot", split='test')
-    img_path = f'{args.img_path}/vsr/images'
+    annotation = load_dataset(args.eval_file_path, split='test')
+    img_path = args.img_path
     data = VSREvalData(annotation, vis_processor, img_path)
     eval_dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=False)
     count=0
@@ -237,11 +173,9 @@ if 'vsr' in args.dataset:
 
     for images, texts, labels in tqdm(eval_dataloader):
         texts = prepare_texts(texts, conv_temp)  # warp the texts with conversation template
-        # print("texts",texts)
         answers = model.generate(images, texts, max_new_tokens=args.max_new_tokens, do_sample=False)
 
         for answer, label in zip(answers, labels):
-            print(answer)
             result = dict()
             result['pred'] = answer.replace('<unk>','').strip()
             result['gt'] = label
@@ -250,14 +184,13 @@ if 'vsr' in args.dataset:
                 count+=1
             total+=1
     print('vsr test:', count / total * 100, flush=True)
-    save_path=f'results/{args.name}_vsr.json'
-    with open(save_path,'w') as f:
+    with open(args.save_path,'w') as f:
         json.dump(minigpt4_predict, f)
 
 if 'hm' in args.dataset:
-    img_path = f'{args.img_path}/hateful_meme'
+    img_path = args.img_path
     annotation = []
-    with open(f'{args.eval_file_path}/hateful_meme/dev.jsonl', 'r') as jsonl_file:
+    with open(args.eval_file_path, 'r') as jsonl_file:
         for line in jsonl_file:
             json_obj = json.loads(line)
             annotation.append(json_obj)
@@ -271,19 +204,27 @@ if 'hm' in args.dataset:
 
     for images, texts, labels in tqdm(eval_dataloader):
         texts = prepare_texts(texts, conv_temp)  # warp the texts with conversation template
+        
         answers = model.generate(images, texts, max_new_tokens=args.max_new_tokens, do_sample=False)
 
         for answer, label in zip(answers, labels):
             result = dict()
-            answer = 1 if answer.lower().__contains__('yes') else 0
-            result['pred'] = int(str(answer).replace('<unk>','').strip())
+            if answer.lower().strip() =="yes":
+                answer=1
+            elif answer.lower().strip()=="no":
+                answer=0
+            else:
+                print("answer",answer)
+
+            result['pred'] = answer
+            
             result['gt'] = int(label)
             minigpt4_predict.append(result)
             if answer == label:
                 count+=1
             total+=1
+
     print('hm val:', count / total * 100, flush=True)
 
-    save_path=f'results/{args.name}_hm.json'
-    with open(save_path,'w') as f:
+    with open(args.save_path,'w') as f:
         json.dump(minigpt4_predict, f)
diff --git a/minigpt4/datasets/datasets/coco_caption.py b/minigpt4/datasets/datasets/coco_caption.py
index 76f86e4..2cafd93 100755
--- a/minigpt4/datasets/datasets/coco_caption.py
+++ b/minigpt4/datasets/datasets/coco_caption.py
@@ -91,7 +91,7 @@ class RefCOCOEvalData(torch.utils.data.Dataset):
         image_path = os.path.join(self.root_path, f'{img_id[:27]}.jpg')
         image = Image.open(image_path).convert('RGB')
         image = self.vis_processor(image)
-        question = f"[refer] tell me the location of {sent}?"
+        question = f"[refer] give me the location of {sent}"
         return image, question, img_id
 
 class EvalCaptionData(torch.utils.data.Dataset):
diff --git a/minigpt4/datasets/datasets/vqa_datasets.py b/minigpt4/datasets/datasets/vqa_datasets.py
index 486b5f2..a8df3cc 100755
--- a/minigpt4/datasets/datasets/vqa_datasets.py
+++ b/minigpt4/datasets/datasets/vqa_datasets.py
@@ -16,30 +16,6 @@ class VQADataset(BaseDataset):
     def __init__(self, vis_processor, text_processor, vis_root, ann_paths):
         super().__init__(vis_processor, text_processor, vis_root, ann_paths)
 
-    # def collater(self, samples):
-    #     image_list, question_list, answer_list, weight_list = [], [], [], []
-    
-    #     num_answers = []
-    
-    #     for sample in samples:
-    #         image_list.append(sample["image"])
-    #         question_list.append(sample["question"])
-    
-    #         weight_list.extend(sample["weights"])
-    
-    #         answers = sample["answer"]
-    
-    #         answer_list.extend(answers)
-    #         num_answers.append(len(answers))
-    
-    #     return {
-    #         "image": torch.stack(image_list, dim=0),
-    #         "text_input": question_list,
-    #         "answer": answer_list,
-    #         "weight": torch.Tensor(weight_list),
-    #         "n_answers": torch.LongTensor(num_answers),
-    #     }
-
 
 class VQAEvalDataset(BaseDataset):
     def __init__(self, vis_processor, text_processor, vis_root, ann_paths):
@@ -85,7 +61,7 @@ class VizWizEvalData(torch.utils.data.Dataset):
         image_path = os.path.join(self.root_path, img_id)
         image = Image.open(image_path).convert('RGB')
         image = self.vis_processor(image)
-        question = f"[vqa] The question is'{question}' Based on the image, answer the question with a single word or phrase. and reply 'unanswerable' when the provided information is insufficient"  # 52.0
+        question = f"[vqa] The question is '{question}' Based on the image, answer the question with a single word or phrase. and reply 'unanswerable' when the provided information is insufficient"
         return image, question, answers
 
 class IconQAEvalData(torch.utils.data.Dataset):
diff --git a/minigpt4/models/base_model.py b/minigpt4/models/base_model.py
index d70ca18..b01273e 100644
--- a/minigpt4/models/base_model.py
+++ b/minigpt4/models/base_model.py
@@ -184,7 +184,7 @@ class BaseModel(nn.Module):
         else:
             llama_model = LlamaForCausalLM.from_pretrained(
                 llama_model_path,
-                torch_dtype=torch.float16,
+                torch_dtype=torch.float32,
             )
 
         if lora_r > 0:
diff --git a/minigpt4/models/minigpt_base.py b/minigpt4/models/minigpt_base.py
index 22c4251..fa82187 100644
--- a/minigpt4/models/minigpt_base.py
+++ b/minigpt4/models/minigpt_base.py
@@ -367,9 +367,18 @@ class MiniGPTBase(BaseModel):
                 min_length=min_length,
                 top_p=top_p,
                 repetition_penalty=repetition_penalty,
-                stopping_criteria=stopping_criteria,
+                # stopping_criteria=stopping_criteria,
             )
 
+        # with self.maybe_autocast():
+        #     outputs = self.llama_model.generate(
+        #         inputs_embeds=embs,
+        #         attention_mask=attn_mask,
+        #         max_new_tokens=max_new_tokens,
+        #         num_beams=num_beams,
+        #         do_sample=do_sample,
+        #         # stopping_criteria=stopping_criteria,
+        #     )
         answers = []
         for output_token in outputs:
             if output_token[0] == 0: