diff --git a/create_dataset.ipynb b/create_dataset.ipynb index 6c6c54e..ca909ee 100644 --- a/create_dataset.ipynb +++ b/create_dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 48, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +30,25 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def normalize_bbox(bbox, width, height):\n", + " \"\"\"\n", + " Normalize bounding box to the range [0, 100].\n", + " \"\"\"\n", + " return [\n", + " int((bbox[0] / width) * 100), # x_min\n", + " int((bbox[1] / height) * 100), # y_min\n", + " int((bbox[2] / width) * 100), # x_max\n", + " int((bbox[3] / height) * 100), # y_max\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -144,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -153,7 +171,7 @@ "5354" ] }, - "execution_count": 51, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -164,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -178,17 +196,22 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "for data in mvtech_ad_data_for_regression:\n", - " data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")" + " data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")\n", + " data[\"bbox\"] = normalize_bbox(\n", + " bbox=data[\"bbox\"],\n", + " width=data[\"width\"],\n", + " height=data[\"height\"]\n", + " )" ] }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -198,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -213,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -240,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -279,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -288,7 +311,7 @@ "Text(0.5, 0, 'Good/Defect')" ] }, - "execution_count": 58, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, diff --git a/minigpt4/configs/datasets/mvtec/default.yaml b/minigpt4/configs/datasets/mvtec/default.yaml index 2fbb16b..68c63a5 100755 --- a/minigpt4/configs/datasets/mvtec/default.yaml +++ b/minigpt4/configs/datasets/mvtec/default.yaml @@ -3,4 +3,4 @@ datasets: data_type: images build_info: image_path: ./MVTEC_det/images - ann_path: ./MVTEC_det/mvtech_ad_data_for_regression.json + ann_path: ./MVTEC_det/train_data.json diff --git a/minigpt4/datasets/datasets/mvtec_dataset.py b/minigpt4/datasets/datasets/mvtec_dataset.py index 2586232..705e774 100644 --- a/minigpt4/datasets/datasets/mvtec_dataset.py +++ b/minigpt4/datasets/datasets/mvtec_dataset.py @@ -29,15 +29,17 @@ class MVTecDataset(Dataset): def __getitem__(self, index): info = self.ann[index] gt_bbox = info["bbox"] + ans_cls = info["class"] image_path = os.path.join(self.vis_root, info['image_path']) image = Image.open(image_path).convert("RGB") image = self.vis_processor(image) - input = "detect defect or non-defect and return the bounding box" + input = "a defect or not-defect object and return the bounding boxes and its label. If not, bound around the object." - ans_cls = "defect" if info["is_broken"] == True else "non-defect" - answer = f"{ans_cls}<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>" + ans_defect = "defect" if info["is_broken"] == True else "non-defect" + ans_para = f"
{ans_cls}-{ans_defect}
" + answer = f"{ans_para}{{<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>}}" instruction = random.choice(self.instruction_pool).format(input) instruction = "