From 006a7b863b744c4d2b0dcc18a0b2bb467da94805 Mon Sep 17 00:00:00 2001 From: ThuanNaN Date: Mon, 20 Jan 2025 07:23:11 +0700 Subject: [PATCH] fix: missing "answer" key --- data/create_textvqa_dataset.ipynb | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/data/create_textvqa_dataset.ipynb b/data/create_textvqa_dataset.ipynb index 335e8f6..5120eaf 100644 --- a/data/create_textvqa_dataset.ipynb +++ b/data/create_textvqa_dataset.ipynb @@ -81,7 +81,7 @@ { "data": { "text/plain": [ - "(34120, 4922)" + "(34109, 4922)" ] }, "execution_count": 5, @@ -96,21 +96,18 @@ " train_set = json.load(f)[\"data\"]\n", " train_set = [{k: v for k, v in d.items() if k in target_fields} for d in train_set]\n", " for d in train_set:\n", - " filtered_answer = filter_answers(d[\"answers\"])\n", - " if filtered_answer is None:\n", - " train_set.remove(d)\n", - " else:\n", - " d[\"answer\"] = filter_answers(d[\"answers\"])\n", + " d[\"answer\"] = filter_answers(d[\"answers\"])\n", + " # drop unanswerable questions\n", + " train_set = [d for d in train_set if d[\"answer\"] is not None]\n", + " \n", "\n", "with open(BASE_DIR / \"TextVQA_0.5.1_val.json\", \"r\") as f:\n", " val_set = json.load(f)[\"data\"]\n", " val_set = [{k: v for k, v in d.items() if k in target_fields} for d in val_set]\n", " for d in val_set:\n", - " filtered_answer = filter_answers(d[\"answers\"])\n", - " if filtered_answer is None:\n", - " val_set.remove(d)\n", - " else:\n", - " d[\"answer\"] = filter_answers(d[\"answers\"])\n", + " d[\"answer\"] = filter_answers(d[\"answers\"])\n", + " # drop unanswerable questions\n", + " val_set = [d for d in val_set if d[\"answer\"] is not None]\n", " \n", "\n", "len(train_set), len(val_set)"