From e2158361f1238af0ccf8123dd5cba99afd896d0e Mon Sep 17 00:00:00 2001 From: ThuanNaN Date: Mon, 13 Jan 2025 05:44:21 +0700 Subject: [PATCH 1/4] update finetune cfg --- minigpt4/configs/datasets/mvtec/default.yaml | 2 +- minigpt4/datasets/datasets/mvtec_dataset.py | 8 +++++--- train_configs/minigptv2_finetune_mvtec.yaml | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/minigpt4/configs/datasets/mvtec/default.yaml b/minigpt4/configs/datasets/mvtec/default.yaml index 2fbb16b..68c63a5 100755 --- a/minigpt4/configs/datasets/mvtec/default.yaml +++ b/minigpt4/configs/datasets/mvtec/default.yaml @@ -3,4 +3,4 @@ datasets: data_type: images build_info: image_path: ./MVTEC_det/images - ann_path: ./MVTEC_det/mvtech_ad_data_for_regression.json + ann_path: ./MVTEC_det/train_data.json diff --git a/minigpt4/datasets/datasets/mvtec_dataset.py b/minigpt4/datasets/datasets/mvtec_dataset.py index 2586232..287f09b 100644 --- a/minigpt4/datasets/datasets/mvtec_dataset.py +++ b/minigpt4/datasets/datasets/mvtec_dataset.py @@ -29,15 +29,17 @@ class MVTecDataset(Dataset): def __getitem__(self, index): info = self.ann[index] gt_bbox = info["bbox"] + ans_cls = info["class"] image_path = os.path.join(self.vis_root, info['image_path']) image = Image.open(image_path).convert("RGB") image = self.vis_processor(image) - input = "detect defect or non-defect and return the bounding box" + input = "detect a defect or not-defect object and return the bounding boxes and its label. If not, bound around the object." - ans_cls = "defect" if info["is_broken"] == True else "non-defect" - answer = f"{ans_cls}<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>" + ans_defect = "defect" if info["is_broken"] == True else "non-defect" + ans_para = f"

{ans_cls}-{ans_defect}

" + answer = f"{ans_para}<{gt_bbox[0]}> <{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>" instruction = random.choice(self.instruction_pool).format(input) instruction = " {} ".format(instruction) diff --git a/train_configs/minigptv2_finetune_mvtec.yaml b/train_configs/minigptv2_finetune_mvtec.yaml index 83cab0c..2a1c09d 100644 --- a/train_configs/minigptv2_finetune_mvtec.yaml +++ b/train_configs/minigptv2_finetune_mvtec.yaml @@ -32,10 +32,10 @@ run: warmup_lr: 1e-6 weight_decay: 0.05 - max_epoch: 10 + max_epoch: 5 num_workers: 6 warmup_steps: 1000 - iters_per_epoch: 1000 + iters_per_epoch: 2000 seed: 42 output_dir: "mvtec_outputs" @@ -51,5 +51,5 @@ run: dist_url: "env://" distributed: True - wandb_log: True + wandb_log: False job_name: minigptv2_finetune From 3718f9ff6981176481f53fa172fe3f680cfbef4f Mon Sep 17 00:00:00 2001 From: ThuanNaN Date: Mon, 13 Jan 2025 06:48:23 +0700 Subject: [PATCH 2/4] update anwser ins --- minigpt4/datasets/datasets/mvtec_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minigpt4/datasets/datasets/mvtec_dataset.py b/minigpt4/datasets/datasets/mvtec_dataset.py index 287f09b..893414a 100644 --- a/minigpt4/datasets/datasets/mvtec_dataset.py +++ b/minigpt4/datasets/datasets/mvtec_dataset.py @@ -39,7 +39,7 @@ class MVTecDataset(Dataset): ans_defect = "defect" if info["is_broken"] == True else "non-defect" ans_para = f"

{ans_cls}-{ans_defect}

" - answer = f"{ans_para}<{gt_bbox[0]}> <{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>" + answer = f"{ans_para} {{<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>}}" instruction = random.choice(self.instruction_pool).format(input) instruction = " {} ".format(instruction) From 8415c675a5fb1a1a2dc98e6bf1cf9c56066a9507 Mon Sep 17 00:00:00 2001 From: ThuanNaN Date: Mon, 13 Jan 2025 11:07:02 +0700 Subject: [PATCH 3/4] add normalize bbox --- create_dataset.ipynb | 51 ++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/create_dataset.ipynb b/create_dataset.ipynb index 6c6c54e..ca909ee 100644 --- a/create_dataset.ipynb +++ b/create_dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 48, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +30,25 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def normalize_bbox(bbox, width, height):\n", + " \"\"\"\n", + " Normalize bounding box to the range [0, 100].\n", + " \"\"\"\n", + " return [\n", + " int((bbox[0] / width) * 100), # x_min\n", + " int((bbox[1] / height) * 100), # y_min\n", + " int((bbox[2] / width) * 100), # x_max\n", + " int((bbox[3] / height) * 100), # y_max\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -144,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -153,7 +171,7 @@ "5354" ] }, - "execution_count": 51, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -164,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -178,17 +196,22 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "for data in mvtech_ad_data_for_regression:\n", - " data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")" + " data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")\n", + " data[\"bbox\"] = normalize_bbox(\n", + " bbox=data[\"bbox\"],\n", + " width=data[\"width\"],\n", + " height=data[\"height\"]\n", + " )" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -198,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -213,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -240,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -279,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -288,7 +311,7 @@ "Text(0.5, 0, 'Good/Defect')" ] }, - "execution_count": 58, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, From 82016e32fc13423342cfa380931e4296a517ea4b Mon Sep 17 00:00:00 2001 From: ThuanNaN Date: Mon, 13 Jan 2025 11:12:55 +0700 Subject: [PATCH 4/4] update prompt ins --- minigpt4/datasets/datasets/mvtec_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/minigpt4/datasets/datasets/mvtec_dataset.py b/minigpt4/datasets/datasets/mvtec_dataset.py index 893414a..705e774 100644 --- a/minigpt4/datasets/datasets/mvtec_dataset.py +++ b/minigpt4/datasets/datasets/mvtec_dataset.py @@ -35,11 +35,11 @@ class MVTecDataset(Dataset): image = Image.open(image_path).convert("RGB") image = self.vis_processor(image) - input = "detect a defect or not-defect object and return the bounding boxes and its label. If not, bound around the object." + input = "a defect or not-defect object and return the bounding boxes and its label. If not, bound around the object." ans_defect = "defect" if info["is_broken"] == True else "non-defect" ans_para = f"

{ans_cls}-{ans_defect}

" - answer = f"{ans_para} {{<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>}}" + answer = f"{ans_para}{{<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>}}" instruction = random.choice(self.instruction_pool).format(input) instruction = " {} ".format(instruction)