fix: missing "answer" key

This commit is contained in:
ThuanNaN 2025-01-20 07:23:11 +07:00
parent c5796f091b
commit 006a7b863b

View File

@ -81,7 +81,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(34120, 4922)" "(34109, 4922)"
] ]
}, },
"execution_count": 5, "execution_count": 5,
@ -96,21 +96,18 @@
" train_set = json.load(f)[\"data\"]\n", " train_set = json.load(f)[\"data\"]\n",
" train_set = [{k: v for k, v in d.items() if k in target_fields} for d in train_set]\n", " train_set = [{k: v for k, v in d.items() if k in target_fields} for d in train_set]\n",
" for d in train_set:\n", " for d in train_set:\n",
" filtered_answer = filter_answers(d[\"answers\"])\n",
" if filtered_answer is None:\n",
" train_set.remove(d)\n",
" else:\n",
" d[\"answer\"] = filter_answers(d[\"answers\"])\n", " d[\"answer\"] = filter_answers(d[\"answers\"])\n",
" # drop unanswerable questions\n",
" train_set = [d for d in train_set if d[\"answer\"] is not None]\n",
" \n",
"\n", "\n",
"with open(BASE_DIR / \"TextVQA_0.5.1_val.json\", \"r\") as f:\n", "with open(BASE_DIR / \"TextVQA_0.5.1_val.json\", \"r\") as f:\n",
" val_set = json.load(f)[\"data\"]\n", " val_set = json.load(f)[\"data\"]\n",
" val_set = [{k: v for k, v in d.items() if k in target_fields} for d in val_set]\n", " val_set = [{k: v for k, v in d.items() if k in target_fields} for d in val_set]\n",
" for d in val_set:\n", " for d in val_set:\n",
" filtered_answer = filter_answers(d[\"answers\"])\n",
" if filtered_answer is None:\n",
" val_set.remove(d)\n",
" else:\n",
" d[\"answer\"] = filter_answers(d[\"answers\"])\n", " d[\"answer\"] = filter_answers(d[\"answers\"])\n",
" # drop unanswerable questions\n",
" val_set = [d for d in val_set if d[\"answer\"] is not None]\n",
" \n", " \n",
"\n", "\n",
"len(train_set), len(val_set)" "len(train_set), len(val_set)"