Merge pull request #2 from ThuanNaN/create_dataset

Add MVTEC dataset
This commit is contained in:
Nguyen Thuan Duong 2025-01-13 05:06:46 +07:00 committed by GitHub
commit f90fc4aeb8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 527 additions and 3 deletions

5
.gitignore vendored
View File

@ -163,7 +163,6 @@ cython_debug/
wandb/ wandb/
jobs/logs/ jobs/logs/
*.out *.out
*ipynb
.history/ .history/
*.json *.json
*.sh *.sh
@ -182,3 +181,7 @@ sbatch_generate*
eval_data/ eval_data/
dataset/Evaluation.md dataset/Evaluation.md
jupyter_notebook.slurm jupyter_notebook.slurm
MVTEC/
MVTEC_det/
*.pth
log.*

361
create_dataset.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,25 @@
model:
arch: minigpt_v2
model_type: pretrain
max_txt_len: 500
end_sym: "</s>"
low_resource: True
prompt_template: '[INST] {} [/INST]'
# ckpt: "./minigpt4/mvtec_outputs/20250112105/checkpoint_9.pth"
ckpt: "./ckpt/checkpoint_stage3.pth"
lora_r: 64
lora_alpha: 16
datasets:
cc_sbu_align:
vis_processor:
train:
name: "blip2_image_eval"
image_size: 448
text_processor:
train:
name: "blip_caption"
run:
task: image_text_pretrain

View File

@ -0,0 +1,6 @@
datasets:
mvtec_ad:
data_type: images
build_info:
image_path: ./MVTEC_det/images
ann_path: ./MVTEC_det/mvtech_ad_data_for_regression.json

View File

@ -11,7 +11,7 @@ model:
# generation configs # generation configs
prompt: "" prompt: ""
llama_model: "please set this value to the path of llama2-chat-7b" llama_model: "meta-llama/Llama-2-7b-chat-hf"
lora_r: 64 lora_r: 64
lora_alpha: 16 lora_alpha: 16

View File

@ -18,7 +18,7 @@ from minigpt4.datasets.datasets.aok_vqa_datasets import AOKVQADataset
from minigpt4.datasets.datasets.coco_vqa_datasets import COCOVQADataset from minigpt4.datasets.datasets.coco_vqa_datasets import COCOVQADataset
from minigpt4.datasets.datasets.ocrvqa_dataset import OCRVQADataset from minigpt4.datasets.datasets.ocrvqa_dataset import OCRVQADataset
from minigpt4.datasets.datasets.coco_caption import COCOCapDataset from minigpt4.datasets.datasets.coco_caption import COCOCapDataset
from minigpt4.datasets.datasets.mvtec_dataset import MVTecDataset
@registry.register_builder("multitask_conversation") @registry.register_builder("multitask_conversation")
class MultitaskConversationBuilder(BaseDatasetBuilder): class MultitaskConversationBuilder(BaseDatasetBuilder):
@ -394,6 +394,29 @@ class CaptionToPhraseBuilder(BaseDatasetBuilder):
return datasets return datasets
@registry.register_builder("mvtec_ad")
class MVTECADBuilder(BaseDatasetBuilder):
train_dataset_cls = MVTecDataset
DATASET_CONFIG_DICT = {
"default": "configs/datasets/mvtec/default.yaml",
}
def build_datasets(self):
logging.info("Building datasets...")
self.build_processors()
build_info = self.config.build_info
datasets = dict()
# create datasets
dataset_cls = self.train_dataset_cls
datasets['train'] = dataset_cls(
vis_processor=self.vis_processors["train"],
text_processor=self.text_processors["train"],
ann_path=build_info.ann_path,
vis_root=build_info.image_path,
)
return datasets
class DocumentVQABuilder(BaseDatasetBuilder): class DocumentVQABuilder(BaseDatasetBuilder):

View File

@ -0,0 +1,51 @@
import os
import json
import random
from PIL import Image
from torch.utils.data import Dataset
class MVTecDataset(Dataset):
def __init__(self, vis_processor, text_processor, vis_root, ann_path):
"""
vis_root (string): Root directory of images (e.g. coco/images/)
ann_root (string): directory to store the annotation file
"""
self.vis_root = vis_root
self.vis_processor = vis_processor
self.text_processor = text_processor
self.instruction_pool = [
'[detection] {}',
]
with open(ann_path, 'r') as f:
self.ann = json.load(f)
def __len__(self):
return len(self.ann)
def __getitem__(self, index):
info = self.ann[index]
gt_bbox = info["bbox"]
image_path = os.path.join(self.vis_root, info['image_path'])
image = Image.open(image_path).convert("RGB")
image = self.vis_processor(image)
input = "detect defect or non-defect and return the bounding box"
ans_cls = "defect" if info["is_broken"] == True else "non-defect"
answer = f"{ans_cls}<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>"
instruction = random.choice(self.instruction_pool).format(input)
instruction = "<Img><ImageHere></Img> {} ".format(instruction)
return {
"image": image,
"instruction_input": instruction,
"answer": answer,
"image_id": info['image_path'],
}

View File

@ -0,0 +1,55 @@
model:
arch: minigpt_v2
model_type: pretrain
max_txt_len: 1024
image_size: 448
end_sym: "</s>"
llama_model: "meta-llama/Llama-2-7b-chat-hf"
ckpt: "./ckpt/checkpoint_stage3.pth"
use_grad_checkpoint: True
chat_template: True
lora_r: 64
lora_alpha: 16
datasets:
mvtec_ad:
batch_size: 2
vis_processor:
train:
name: "blip2_image_train"
image_size: 448
text_processor:
train:
name: "blip_caption"
sample_ratio: 100
run:
task: image_text_pretrain
# optimizer
lr_sched: "linear_warmup_cosine_lr"
init_lr: 1e-5
min_lr: 1e-6
warmup_lr: 1e-6
weight_decay: 0.05
max_epoch: 10
num_workers: 6
warmup_steps: 1000
iters_per_epoch: 1000
seed: 42
output_dir: "mvtec_outputs"
amp: True
resume_ckpt_path: null
evaluate: False
train_splits: ["train"]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
wandb_log: True
job_name: minigptv2_finetune