From c70775af99a105eaeac66049b4b129ff819507cb Mon Sep 17 00:00:00 2001 From: XiaoqianShen <64844805+xiaoqian-shen@users.noreply.github.com> Date: Fri, 27 Oct 2023 13:49:07 +0300 Subject: [PATCH] Add files via upload --- eval_scripts/eval_sciencevqa.py | 178 +++++++++++++++++++++++++++++ eval_scripts/eval_textvqa.py | 185 +++++++++++++++++++++++++++++++ eval_scripts/scienceqa_eval.yaml | 48 ++++++++ eval_scripts/textvqa_eval.yaml | 48 ++++++++ 4 files changed, 459 insertions(+) create mode 100644 eval_scripts/eval_sciencevqa.py create mode 100644 eval_scripts/eval_textvqa.py create mode 100644 eval_scripts/scienceqa_eval.yaml create mode 100644 eval_scripts/textvqa_eval.yaml diff --git a/eval_scripts/eval_sciencevqa.py b/eval_scripts/eval_sciencevqa.py new file mode 100644 index 0000000..915f67e --- /dev/null +++ b/eval_scripts/eval_sciencevqa.py @@ -0,0 +1,178 @@ +import argparse +import os +import random +import requests +from io import BytesIO + +import numpy as np +from PIL import Image +import torch +import torch.backends.cudnn as cudnn +import gradio as gr + +import minigpt4.tasks as tasks +from minigpt4.common.config import Config +from minigpt4.common.dist_utils import get_rank, init_distributed_mode +from minigpt4.common.logger import setup_logger +from minigpt4.common.optims import ( + LinearWarmupCosineLRScheduler, + LinearWarmupStepLRScheduler, +) +from minigpt4.common.registry import registry +from minigpt4.common.utils import now +from minigpt4.conversation.conversation import Conversation, SeparatorStyle, StoppingCriteriaList, StoppingCriteriaSub + +# imports modules for registration +from minigpt4.datasets.builders import * +from minigpt4.models import * +from minigpt4.processors import * +from minigpt4.runners import * +from minigpt4.tasks import * + + +parser = argparse.ArgumentParser(description="Demo") +parser.add_argument("--cfg-path", required=False, default='scienceqa_eval.yaml', help="path to configuration file.") +parser.add_argument("--ckpt_path", required=False, help="path to configuration file.") +parser.add_argument("--lora_r", type=int, default=64, help="path to configuration file.") +parser.add_argument("--lora_alpha", type=int, default=16, help="path to configuration file.") +parser.add_argument("--name", type=str) + +parser.add_argument( + "--options", + nargs="+", + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file (deprecate), " + "change to --cfg-options instead.", +) + +print('Initializing Chat') +args = parser.parse_args() +cfg = Config(args) + + +ckpt_list = [args.ckpt_path] + +print('evaluating config:', args.cfg_path) +print('evaluating checkpoint:', args.ckpt_path) + + +for ckpt in ckpt_list: + cfg.model_cfg.ckpt = ckpt + cfg.model_cfg.lora_r=args.lora_r + cfg.model_cfg.lora_alpha=args.lora_alpha + + model_config = cfg.model_cfg + model_cls = registry.get_model_class(model_config.arch) + model = model_cls.from_config(model_config).to('cuda:0') + model.eval() + print('Initialization Finished') + + vis_processor_cfg = cfg.datasets_cfg.coco_vqa.vis_processor.eval + vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg) + + text_processor_cfg = cfg.datasets_cfg.coco_vqa.text_processor.eval + text_processor = registry.get_processor_class(text_processor_cfg.name).from_config(text_processor_cfg) + + + task = tasks.setup_task(cfg) + + + from minigpt4.datasets.datasets.vqa_datasets import VQADataset, VQAEvalDataset + + class textVQAEvalDataset(VQADataset): + def __init__(self, vis_processor, text_processor, vis_root=None, ann_paths=None): + # super().__init__(vis_processor, text_processor, vis_root, ann_paths) + + from datasets import load_dataset + + from datasets import load_dataset + self.annotation = load_dataset("derek-thomas/ScienceQA", split='test') + + ## select anns with image + self.annotation = [ann for ann in self.annotation if ann['image']] + + self.vis_processor = vis_processor + self.text_processor = text_processor + self.instruction_pool = [ + '[vqa] Question: {} Answer:', + ] + + self.alphabet_options = ["A", "B", "C", "D", "E"] + self.max_num_choices = len(self.alphabet_options) + + + def __getitem__(self, index): + ann = self.annotation[index] + image = ann['image'].convert("RGB") + + image = self.vis_processor(image) + question = self.text_processor(ann["question"]) + lecture = self.text_processor(ann['lecture']) + hint = self.text_processor(ann['hint']) + options = ann['choices'] + + num_choices = len(ann['choices']) + if len(ann['choices'])" + low_resource: True + prompt_template: '[INST] {} [/INST] ' + ckpt: 'please set this value to the path of pretrained checkpoint' + + +datasets: + coco_vqa: # not used + type: eval + vis_processor: + eval: + name: "blip2_image_eval" + image_size: 336 + text_processor: + eval: + name: "blip_caption" + +run: + task: scienceqa + # optimization-specific + batch_size_train: 16 + batch_size_eval: 32 + num_workers: 8 + + # inference-specific + num_ans_candidates: 5 + max_len: 10 + min_len: 1 + num_beams: 5 + inference_method: "generate" + prompt: "Question: {} Short answer:" + + seed: 42 + output_dir: "results" + + evaluate: True + test_splits: ["val"] + + # distribution-specific + device: "cuda" + world_size: 1 + dist_url: "env://" + distributed: True + \ No newline at end of file diff --git a/eval_scripts/textvqa_eval.yaml b/eval_scripts/textvqa_eval.yaml new file mode 100644 index 0000000..701a819 --- /dev/null +++ b/eval_scripts/textvqa_eval.yaml @@ -0,0 +1,48 @@ +model: + arch: minigpt4 + model_type: pretrain_llama2 + max_txt_len: 160 + end_sym: "" + low_resource: True + prompt_template: '[INST] {} [/INST] ' + ckpt: 'please set this value to the path of pretrained checkpoint' + +datasets: + coco_vqa: # not used + type: eval + vis_processor: + eval: + name: "blip2_image_eval" + image_size: 336 + text_processor: + eval: + name: "blip_caption" + +run: + task: vqa + # optimization-specific + batch_size_train: 32 + batch_size_eval: 128 + # if do not use OCR token: batch=32, Evaluation Total time: 0:23:47 (9.0954 s / it) + use_ocr: True + # use OCR token: batch_size=16; Evaluation Total time: 0:36:34 (7.0116 s / it) + num_workers: 8 + + # inference-specific + max_len: 10 + min_len: 1 + num_beams: 1 + inference_method: "generate" + prompt: "Question: {} Short answer:" + + seed: 42 + output_dir: "results" + + evaluate: True + test_splits: ["val"] + + # distribution-specific + device: "cuda" + world_size: 1 + dist_url: "env://" + distributed: True \ No newline at end of file