Merge pull request #4 from ThuanNaN/create_dataset

normalize bbox and update train cgf
This commit is contained in:
Nguyen Thuan Duong 2025-01-13 19:41:52 +07:00 committed by GitHub
commit 340177badc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 46 additions and 21 deletions

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 48, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -15,7 +15,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 49, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -30,7 +30,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 50, "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def normalize_bbox(bbox, width, height):\n",
" \"\"\"\n",
" Normalize bounding box to the range [0, 100].\n",
" \"\"\"\n",
" return [\n",
" int((bbox[0] / width) * 100), # x_min\n",
" int((bbox[1] / height) * 100), # y_min\n",
" int((bbox[2] / width) * 100), # x_max\n",
" int((bbox[3] / height) * 100), # y_max\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -144,7 +162,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 51, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -153,7 +171,7 @@
"5354" "5354"
] ]
}, },
"execution_count": 51, "execution_count": 10,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -164,7 +182,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 52, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -178,17 +196,22 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 53, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"for data in mvtech_ad_data_for_regression:\n", "for data in mvtech_ad_data_for_regression:\n",
" data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")" " data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")\n",
" data[\"bbox\"] = normalize_bbox(\n",
" bbox=data[\"bbox\"],\n",
" width=data[\"width\"],\n",
" height=data[\"height\"]\n",
" )"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 54, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -198,7 +221,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 55, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -213,7 +236,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 56, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -240,7 +263,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 57, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -279,7 +302,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 58, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -288,7 +311,7 @@
"Text(0.5, 0, 'Good/Defect')" "Text(0.5, 0, 'Good/Defect')"
] ]
}, },
"execution_count": 58, "execution_count": 17,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
}, },

View File

@ -3,4 +3,4 @@ datasets:
data_type: images data_type: images
build_info: build_info:
image_path: ./MVTEC_det/images image_path: ./MVTEC_det/images
ann_path: ./MVTEC_det/mvtech_ad_data_for_regression.json ann_path: ./MVTEC_det/train_data.json

View File

@ -29,15 +29,17 @@ class MVTecDataset(Dataset):
def __getitem__(self, index): def __getitem__(self, index):
info = self.ann[index] info = self.ann[index]
gt_bbox = info["bbox"] gt_bbox = info["bbox"]
ans_cls = info["class"]
image_path = os.path.join(self.vis_root, info['image_path']) image_path = os.path.join(self.vis_root, info['image_path'])
image = Image.open(image_path).convert("RGB") image = Image.open(image_path).convert("RGB")
image = self.vis_processor(image) image = self.vis_processor(image)
input = "detect defect or non-defect and return the bounding box" input = "a defect or not-defect object and return the bounding boxes and its label. If not, bound around the object."
ans_cls = "defect" if info["is_broken"] == True else "non-defect" ans_defect = "defect" if info["is_broken"] == True else "non-defect"
answer = f"{ans_cls}<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>" ans_para = f"<p>{ans_cls}-{ans_defect}</p>"
answer = f"{ans_para}{{<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>}}"
instruction = random.choice(self.instruction_pool).format(input) instruction = random.choice(self.instruction_pool).format(input)
instruction = "<Img><ImageHere></Img> {} ".format(instruction) instruction = "<Img><ImageHere></Img> {} ".format(instruction)

View File

@ -32,10 +32,10 @@ run:
warmup_lr: 1e-6 warmup_lr: 1e-6
weight_decay: 0.05 weight_decay: 0.05
max_epoch: 10 max_epoch: 5
num_workers: 6 num_workers: 6
warmup_steps: 1000 warmup_steps: 1000
iters_per_epoch: 1000 iters_per_epoch: 2000
seed: 42 seed: 42
output_dir: "mvtec_outputs" output_dir: "mvtec_outputs"
@ -51,5 +51,5 @@ run:
dist_url: "env://" dist_url: "env://"
distributed: True distributed: True
wandb_log: True wandb_log: False
job_name: minigptv2_finetune job_name: minigptv2_finetune