From 59eafed6ca0a3f990bdab0e584a6add69159d264 Mon Sep 17 00:00:00 2001 From: ThuanNaN Date: Sun, 12 Jan 2025 09:27:32 +0700 Subject: [PATCH] add mvtec dataset --- minigpt4/configs/datasets/mvtec/default.yaml | 6 ++ .../builders/image_text_pair_builder.py | 25 ++++++++- minigpt4/datasets/datasets/mvtec_dataset.py | 51 +++++++++++++++++ train_configs/minigptv2_finetune_mvtec.yaml | 55 +++++++++++++++++++ 4 files changed, 136 insertions(+), 1 deletion(-) create mode 100755 minigpt4/configs/datasets/mvtec/default.yaml create mode 100644 minigpt4/datasets/datasets/mvtec_dataset.py create mode 100644 train_configs/minigptv2_finetune_mvtec.yaml diff --git a/minigpt4/configs/datasets/mvtec/default.yaml b/minigpt4/configs/datasets/mvtec/default.yaml new file mode 100755 index 0000000..de8878a --- /dev/null +++ b/minigpt4/configs/datasets/mvtec/default.yaml @@ -0,0 +1,6 @@ +datasets: + mvtec_ad: + data_type: images + build_info: + image_path: /mnt/Repository/MiniGPT-4/MVTEC_det/images + ann_path: /mnt/Repository/MiniGPT-4/MVTEC_det/mvtech_ad_data_for_regression.json diff --git a/minigpt4/datasets/builders/image_text_pair_builder.py b/minigpt4/datasets/builders/image_text_pair_builder.py index fb344f1..403de6e 100644 --- a/minigpt4/datasets/builders/image_text_pair_builder.py +++ b/minigpt4/datasets/builders/image_text_pair_builder.py @@ -18,7 +18,7 @@ from minigpt4.datasets.datasets.aok_vqa_datasets import AOKVQADataset from minigpt4.datasets.datasets.coco_vqa_datasets import COCOVQADataset from minigpt4.datasets.datasets.ocrvqa_dataset import OCRVQADataset from minigpt4.datasets.datasets.coco_caption import COCOCapDataset - +from minigpt4.datasets.datasets.mvtec_dataset import MVTecDataset @registry.register_builder("multitask_conversation") class MultitaskConversationBuilder(BaseDatasetBuilder): @@ -394,6 +394,29 @@ class CaptionToPhraseBuilder(BaseDatasetBuilder): return datasets +@registry.register_builder("mvtec_ad") +class MVTECADBuilder(BaseDatasetBuilder): + train_dataset_cls = MVTecDataset + DATASET_CONFIG_DICT = { + "default": "configs/datasets/mvtec/default.yaml", + } + def build_datasets(self): + logging.info("Building datasets...") + self.build_processors() + build_info = self.config.build_info + datasets = dict() + + # create datasets + dataset_cls = self.train_dataset_cls + datasets['train'] = dataset_cls( + vis_processor=self.vis_processors["train"], + text_processor=self.text_processors["train"], + ann_path=build_info.ann_path, + vis_root=build_info.image_path, + ) + + return datasets + class DocumentVQABuilder(BaseDatasetBuilder): diff --git a/minigpt4/datasets/datasets/mvtec_dataset.py b/minigpt4/datasets/datasets/mvtec_dataset.py new file mode 100644 index 0000000..2586232 --- /dev/null +++ b/minigpt4/datasets/datasets/mvtec_dataset.py @@ -0,0 +1,51 @@ +import os +import json +import random +from PIL import Image +from torch.utils.data import Dataset + + +class MVTecDataset(Dataset): + def __init__(self, vis_processor, text_processor, vis_root, ann_path): + """ + vis_root (string): Root directory of images (e.g. coco/images/) + ann_root (string): directory to store the annotation file + """ + self.vis_root = vis_root + + self.vis_processor = vis_processor + self.text_processor = text_processor + + self.instruction_pool = [ + '[detection] {}', + ] + + with open(ann_path, 'r') as f: + self.ann = json.load(f) + + def __len__(self): + return len(self.ann) + + def __getitem__(self, index): + info = self.ann[index] + gt_bbox = info["bbox"] + + image_path = os.path.join(self.vis_root, info['image_path']) + image = Image.open(image_path).convert("RGB") + image = self.vis_processor(image) + + input = "detect defect or non-defect and return the bounding box" + + ans_cls = "defect" if info["is_broken"] == True else "non-defect" + answer = f"{ans_cls}<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>" + + instruction = random.choice(self.instruction_pool).format(input) + instruction = " {} ".format(instruction) + + return { + "image": image, + "instruction_input": instruction, + "answer": answer, + "image_id": info['image_path'], + } + \ No newline at end of file diff --git a/train_configs/minigptv2_finetune_mvtec.yaml b/train_configs/minigptv2_finetune_mvtec.yaml new file mode 100644 index 0000000..6ae8ba0 --- /dev/null +++ b/train_configs/minigptv2_finetune_mvtec.yaml @@ -0,0 +1,55 @@ +model: + arch: minigpt_v2 + model_type: pretrain + max_txt_len: 1024 + image_size: 448 + end_sym: "" + llama_model: "meta-llama/Llama-2-7b-chat-hf" + ckpt: "./ckpt/checkpoint_stage3.pth" + use_grad_checkpoint: True + chat_template: True + lora_r: 64 + lora_alpha: 16 + +datasets: + mvtec_ad: + batch_size: 2 + vis_processor: + train: + name: "blip2_image_train" + image_size: 448 + text_processor: + train: + name: "blip_caption" + sample_ratio: 100 + +run: + task: image_text_pretrain + # optimizer + lr_sched: "linear_warmup_cosine_lr" + init_lr: 1e-5 + min_lr: 1e-6 + warmup_lr: 1e-6 + + weight_decay: 0.05 + max_epoch: 50 + num_workers: 6 + warmup_steps: 1000 + iters_per_epoch: 1000 + + seed: 42 + output_dir: "mvtec_outputs" + + amp: True + resume_ckpt_path: null + + evaluate: False + train_splits: ["train"] + + device: "cuda" + world_size: 1 + dist_url: "env://" + distributed: True + + wandb_log: True + job_name: minigptv2_finetune