mirror of
https://github.com/Vision-CAIR/MiniGPT-4.git
synced 2025-04-05 10:30:45 +00:00
Merge pull request #4 from ThuanNaN/create_dataset
normalize bbox and update train cgf
This commit is contained in:
commit
340177badc
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -15,7 +15,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -30,7 +30,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def normalize_bbox(bbox, width, height):\n",
|
||||
" \"\"\"\n",
|
||||
" Normalize bounding box to the range [0, 100].\n",
|
||||
" \"\"\"\n",
|
||||
" return [\n",
|
||||
" int((bbox[0] / width) * 100), # x_min\n",
|
||||
" int((bbox[1] / height) * 100), # y_min\n",
|
||||
" int((bbox[2] / width) * 100), # x_max\n",
|
||||
" int((bbox[3] / height) * 100), # y_max\n",
|
||||
" ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -144,7 +162,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -153,7 +171,7 @@
|
||||
"5354"
|
||||
]
|
||||
},
|
||||
"execution_count": 51,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -164,7 +182,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -178,17 +196,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for data in mvtech_ad_data_for_regression:\n",
|
||||
" data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")"
|
||||
" data[\"image_path\"] = data[\"image_path\"].replace(\"MVTEC_det\", \".\")\n",
|
||||
" data[\"bbox\"] = normalize_bbox(\n",
|
||||
" bbox=data[\"bbox\"],\n",
|
||||
" width=data[\"width\"],\n",
|
||||
" height=data[\"height\"]\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -198,7 +221,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -213,7 +236,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -240,7 +263,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -279,7 +302,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -288,7 +311,7 @@
|
||||
"Text(0.5, 0, 'Good/Defect')"
|
||||
]
|
||||
},
|
||||
"execution_count": 58,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
|
@ -3,4 +3,4 @@ datasets:
|
||||
data_type: images
|
||||
build_info:
|
||||
image_path: ./MVTEC_det/images
|
||||
ann_path: ./MVTEC_det/mvtech_ad_data_for_regression.json
|
||||
ann_path: ./MVTEC_det/train_data.json
|
||||
|
@ -29,15 +29,17 @@ class MVTecDataset(Dataset):
|
||||
def __getitem__(self, index):
|
||||
info = self.ann[index]
|
||||
gt_bbox = info["bbox"]
|
||||
ans_cls = info["class"]
|
||||
|
||||
image_path = os.path.join(self.vis_root, info['image_path'])
|
||||
image = Image.open(image_path).convert("RGB")
|
||||
image = self.vis_processor(image)
|
||||
|
||||
input = "detect defect or non-defect and return the bounding box"
|
||||
input = "a defect or not-defect object and return the bounding boxes and its label. If not, bound around the object."
|
||||
|
||||
ans_cls = "defect" if info["is_broken"] == True else "non-defect"
|
||||
answer = f"{ans_cls}<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>"
|
||||
ans_defect = "defect" if info["is_broken"] == True else "non-defect"
|
||||
ans_para = f"<p>{ans_cls}-{ans_defect}</p>"
|
||||
answer = f"{ans_para}{{<{gt_bbox[0]}><{gt_bbox[1]}><{gt_bbox[2]}><{gt_bbox[3]}>}}"
|
||||
|
||||
instruction = random.choice(self.instruction_pool).format(input)
|
||||
instruction = "<Img><ImageHere></Img> {} ".format(instruction)
|
||||
|
@ -32,10 +32,10 @@ run:
|
||||
warmup_lr: 1e-6
|
||||
|
||||
weight_decay: 0.05
|
||||
max_epoch: 10
|
||||
max_epoch: 5
|
||||
num_workers: 6
|
||||
warmup_steps: 1000
|
||||
iters_per_epoch: 1000
|
||||
iters_per_epoch: 2000
|
||||
|
||||
seed: 42
|
||||
output_dir: "mvtec_outputs"
|
||||
@ -51,5 +51,5 @@ run:
|
||||
dist_url: "env://"
|
||||
distributed: True
|
||||
|
||||
wandb_log: True
|
||||
wandb_log: False
|
||||
job_name: minigptv2_finetune
|
||||
|
Loading…
Reference in New Issue
Block a user